In [1]:
import numpy as np
import pandas as pd
import math
import seaborn as sns
from datetime import datetime
import matplotlib.pyplot as plt
from scipy.stats.mstats import winsorize
import warnings
from tqdm import tqdm
warnings.filterwarnings("ignore")

In [2]:
def generate_grids(n_grids):
    # shorting not allowed

    # Set a random seed for reproducibility
    np.random.seed(535)

    # Generate a DataFrame with 1000 rows and 4 columns
    rand_data1 = np.random.uniform(0, 1, size=(n_grids*10, 2))
    rand_data2 = np.random.uniform(-1, 0.75, size=(n_grids*10, 2))
    df_grid = pd.DataFrame(np.concatenate((rand_data1, rand_data2), axis=1), columns=['x1', 'x2', 'x3','x4'])

    # Calculate x4 as 1 minus the sum of x1 and x2
    df_grid['x5'] = 1 - df_grid['x1'] - df_grid['x2']

    # Drop rows based on the condition
    condition = (df_grid['x5'] >= -1) & (df_grid['x5'] <= 0) & (df_grid['x1']+df_grid['x2']+df_grid['x5']==1)
    df_grid = df_grid[condition]

    df_grid.reset_index(drop=True,inplace=True)
    df_grid = df_grid.iloc[:n_grids,:]

    # Display the sorted DataFrame
    return df_grid

In [3]:
from PathGen import *

def Experiments(MC,path_length,data,CAP,SAVE,g,ratio,year=60,tax=0):
    # MC simulation
    Simu_table = pd.DataFrame(index = range(MC),\
    columns = ['5Y_cap','5Y_sharpe','5Y_sortino','5Y_mdd'])
    year -= 1
    for mc in range(MC):
        mcdf = Path_Generator(path_length, data)
        temdf = MC_generate(mcdf,CAP,SAVE,g,ratio)
        temdf.reset_index(inplace=True,drop=True)
        # Loc out end of different years
        rst = temdf.loc[year,'cap_total']
        rst2 = temdf.loc[year,'cap_input'] 
        # Total Capital after tax for different dates
        Simu_table.iloc[mc,0] = rst - tax*(rst - rst2) # Tax Adjustment (only pay for capital gain)
        Simu_table.iloc[mc,1] = ret_annual_sharpe(temdf.iloc[:year,1])
        Simu_table.iloc[mc,2] = ret_annual_sortino(temdf.iloc[:year,1])
        Simu_table.iloc[mc,3] = max_drawdown2(temdf.iloc[:year,1])
        
    return Simu_table

In [4]:
def get_sim_stats(df_grid,n_grids,MC,path_length,data,CAP,SAVE,g,year,tax):
    # returns a df with each row corresponds to one MC sim, each col corresponds to one candidate objective function values
    df_compare_5Y_NS = pd.DataFrame(index=range(n_grids),columns = ['Utility','Sharpe','Sortino','Mdd','Prob'])

    for idx in tqdm(range(n_grids)):
        ratio = (df_grid.iloc[idx,:]).to_list()
        temp_df = Experiments(MC,path_length,data,CAP,SAVE,g,ratio,year=60,tax=tax)
        uiti_5Y = temp_df['5Y_cap'].apply(lambda x: np.log(x))
        df_compare_5Y_NS.iloc[idx,0] = np.nanmean(uiti_5Y)
        df_compare_5Y_NS.iloc[idx,1] = np.nanmean(temp_df['5Y_sharpe'])
        df_compare_5Y_NS.iloc[idx,2] = np.nanmean(temp_df['5Y_sortino'])
        df_compare_5Y_NS.iloc[idx,3] = np.nanmean(temp_df['5Y_mdd'])
        df_compare_5Y_NS.iloc[idx,2] = (temp_df['5Y_cap'] >= 500000).mean()
    df_compare_5Y_NS = df_compare_5Y_NS.fillna(0)
    df_compare_5Y_NS = df_compare_5Y_NS[df_compare_5Y_NS['Mdd']<=0.25] # putting constraint on the max dd upper bound
    return df_compare_5Y_NS

In [5]:
def get_sim_results(df_grid, df_compare_5Y_NS):
    # the optimal allocation for each of the objectives
    df_results = pd.DataFrame(index=['5Y_uti','5Y_sharpe','5Y_sortino','5Y_mdd','5Y_prob'],\
                            columns=['x1','x2','x3','x4','x5'])

    df_results.iloc[0,:] = df_grid.loc[df_compare_5Y_NS[['Utility']].idxmax(),:]
    df_results.iloc[1,:] = df_grid.loc[df_compare_5Y_NS[['Sharpe']].idxmax(),:]
    df_results.iloc[2,:] = df_grid.loc[df_compare_5Y_NS[['Sortino']].idxmax(),:]
    df_results.iloc[3,:] = df_grid.loc[df_compare_5Y_NS[['Mdd']].idxmin(),:]
    df_results.iloc[4,:] = df_grid.loc[df_compare_5Y_NS[['Prob']].idxmax(),:]

    # the corresponding objective values of the optimal allcoations above
    df_results_val = pd.DataFrame(index=['5Y_uti','5Y_sharpe','5Y_sortino','5Y_mdd','5Y_prob'],\
                            columns=['Utility','Sharpe','Sortino','Mdd','Prob'])

    df_results_val.iloc[0,:] = df_compare_5Y_NS.loc[df_compare_5Y_NS[['Utility']].idxmax(),:]
    df_results_val.iloc[1,:] = df_compare_5Y_NS.loc[df_compare_5Y_NS[['Sharpe']].idxmax(),:]
    df_results_val.iloc[2,:] = df_compare_5Y_NS.loc[df_compare_5Y_NS[['Sortino']].idxmax(),:]
    df_results_val.iloc[3,:] = df_compare_5Y_NS.loc[df_compare_5Y_NS[['Mdd']].idxmin(),:]
    df_results_val.iloc[4,:] = df_compare_5Y_NS.loc[df_compare_5Y_NS[['Prob']].idxmax(),:]
    
    return df_results, df_results_val

In [6]:
train_df = pd.read_csv('train.csv')
data = pd.read_csv('data.csv')

SAVE = 40000
CAP = 50000
g = 0.11
tax = 0
path_length=60
year = 60
MC = 200
n_grids = 2000

df_grid = generate_grids(n_grids)
df_compare_5Y_NS = get_sim_stats(df_grid,n_grids,MC,path_length,data,CAP,SAVE,g,year,tax)
df_results, df_results_val = get_sim_results(df_grid,df_compare_5Y_NS)

 68%|██████▊   | 1368/2000 [14:17<07:04,  1.49it/s]

In [None]:
df_results

Unnamed: 0,x1,x2,x3,x4,x5
5Y_uti,0.947698,0.474223,-0.057359,-0.473233,-0.421921
5Y_sharpe,0.36216,0.746986,0.161775,0.051957,-0.109146
5Y_sortino,0.36216,0.746986,0.161775,0.051957,-0.109146
5Y_mdd,0.762933,0.646803,0.313549,0.12346,-0.409736
5Y_prob,0.36216,0.746986,0.161775,0.051957,-0.109146


In [None]:
df_results_val

Unnamed: 0,Utility,Sharpe,Sortino,Mdd,Prob
5Y_uti,15.070165,1.016202,1.0,0.218201,0.0
5Y_sharpe,13.978372,1.228497,1.0,0.227623,0.0
5Y_sortino,13.978372,1.228497,1.0,0.227623,0.0
5Y_mdd,15.009202,1.137434,1.0,0.1713,0.0
5Y_prob,13.978372,1.228497,1.0,0.227623,0.0
