In [1]:
import numpy as np
import pandas as pd
import math
import seaborn as sns
from datetime import datetime
import matplotlib.pyplot as plt
from scipy.stats.mstats import winsorize
import warnings
from tqdm import tqdm
warnings.filterwarnings("ignore")

In [3]:
train_df = pd.read_csv('train.csv')

In [11]:
# Grid Generation here --------------------------------------------------------------------

# Allow shorting
# Generate a DataFrame with 1000 rows and 4 columns
# Set a random seed for reproducibility
np.random.seed(535)

n_grids = 1000

# Generate a DataFrame with 1000 rows and 4 columns
rand_data1 = np.random.uniform(-1, 2, size=(1400, 2))
rand_data2 = np.random.uniform(-1, 0.75, size=(1400, 2))
df_grid = pd.DataFrame(np.concatenate((rand_data1, rand_data2), axis=1), columns=['x1', 'x2', 'x3','x4'])

# Calculate x4 as 1 minus the sum of x1 and x2
df_grid['x5'] = 1 - df_grid['x1'] - df_grid['x2']

# Drop rows based on the condition
condition = (df_grid['x5'] >= -1) & (df_grid['x5'] <= 2)
df_grid = df_grid[condition]

df_grid.reset_index(drop=True,inplace=True)
df_grid = df_grid.iloc[:n_grids,:]

# Display the sorted DataFrame
print(df_grid)

           x1        x2        x3        x4        x5
0    0.086479  1.240959  0.193399 -0.289057 -0.327439
1   -0.509618  1.007246  0.000601  0.088736  0.502372
2    1.002345  0.317541 -0.300727 -0.985647 -0.319887
3    0.028712  0.443163  0.459251  0.585433  0.528125
4    0.659452 -0.474361  0.406316  0.719263  0.814909
..        ...       ...       ...       ...       ...
995  0.516797 -0.753181 -0.095590 -0.570499  1.236384
996 -0.563399 -0.285216 -0.257118 -0.335824  1.848615
997 -0.993534  1.168260 -0.143694  0.261402  0.825273
998  1.113387  0.311050  0.342542  0.380435 -0.424438
999  0.965484  0.825643 -0.913156  0.267354 -0.791127

[1000 rows x 5 columns]


In [12]:
from PathGen import *

def Experiments(MC,path_length,data,CAP,SAVE,g,ratio,year=60,tax=0):
    # MC simulation --------------------------------------------------------------------------------------------
    Simu_table = pd.DataFrame(index = range(MC),\
    columns = ['5Y_cap','5Y_sharpe','5Y_sortino','5Y_mdd'])

    for mc in range(MC):
        mcdf = Path_Generator(path_length, data)
        temdf = MC_generate(mcdf,CAP,SAVE,g,ratio)
        temdf.reset_index(inplace=True,drop=True)
        # Loc out end of different years
        rst = temdf.loc[year,'cap_total']
        rst2 = temdf.loc[year,'cap_input'] 
        # Total Capital after tax for different dates
        Simu_table.iloc[mc,0] = rst - tax*(rst - rst2) # Tax Adjustment (only pay for capital gain)
        Simu_table.iloc[mc,1] = ret_annual_sharpe(temdf.iloc[:year,1])
        Simu_table.iloc[mc,2] = ret_annual_sortino(temdf.iloc[:year,1])
        Simu_table.iloc[mc,3] = max_drawdown(temdf.iloc[:year,2])
        
    return Simu_table

In [13]:
# search here --- Original Dataset
# Q1: Not allow shorting

SAVE = 40000
CAP = 50000
g = 0.11
tax = 0
path_length=120

MC = 100

df_compare_5Y_NS = pd.DataFrame(index=range(n_grids),columns = ['Utility','Sharpe','Sortino','Mdd','Prob'])

for idx in tqdm(range(n_grids)):
    ratio = (df_grid.iloc[idx,:]).to_list()
    temp_df = Experiments(MC,path_length,train_df,CAP,SAVE,g,ratio,year=60,tax=tax)
    uiti_5Y = temp_df['5Y_cap'].apply(lambda x: np.log(x))
    df_compare_5Y_NS.iloc[idx,0] = np.nanmean(uiti_5Y)
    df_compare_5Y_NS.iloc[idx,1] = np.nanmean(temp_df['5Y_sharpe'])
    df_compare_5Y_NS.iloc[idx,2] = np.nanmean(temp_df['5Y_sortino'])
    df_compare_5Y_NS.iloc[idx,3] = np.nanmean(temp_df['5Y_mdd'])
    df_compare_5Y_NS.iloc[idx,2] = (temp_df['5Y_cap'] >= 500000).mean()
df_compare_5Y_NS = df_compare_5Y_NS.fillna(0)
df_compare_5Y_NS

 26%|██▌       | 260/1000 [01:49<05:59,  2.06it/s]

In [None]:
df_results = pd.DataFrame(index=['5Y_cap','5Y_sharpe','5Y_sortino','5Y_mdd'],\
                          columns=['x1','x2','x3','x4','x5'])

df_results.iloc[0,:] = df_grid2.iloc[df_compare_5Y_NS[['Sharpe']].idxmax(),:]
df_results.iloc[1,:] = df_grid2.iloc[df_compare_5Y_NS[['Utility']].idxmax(),:]
df_results.iloc[2,:] = df_grid2.iloc[df_compare_5Y_NS[['Prob']].idxmax(),:]
df_results.iloc[3,:] = df_grid2.iloc[df_compare_7Y_NS[['Sharpe']].idxmax(),:]
df_results.iloc[4,:] = df_grid2.iloc[df_compare_7Y_NS[['Utility']].idxmax(),:]
df_results.iloc[5,:] = df_grid2.iloc[df_compare_7Y_NS[['Prob']].idxmax(),:]