In [1]:
import numpy as np
import pandas as pd
import math
import seaborn as sns
from datetime import datetime
import matplotlib.pyplot as plt
from scipy.stats.mstats import winsorize
import warnings
warnings.filterwarnings("ignore")

In [2]:
from tqdm import tqdm

In [3]:
# Read data
data = pd.read_csv('data.csv')
data['month'] = data['month'].apply(lambda x: datetime.strptime(x, '%Y-%m')) #assume each date per month is the last day
data.set_index(['month'],inplace=True)
# No missing month

### Functions for Grid Search

- For each grid, take mean over the all possible paths (119 MC)
- Grid Search - Three dimensions (10 * 10 * 10) 

In [4]:
from PathGen import *

def Experiments(MC,data,CAP,SAVE,g,ratio,years=[60,84],tax=0):
    # MC simulation --------------------------------------------------------------------------------------------
    # Years need to be 2 examples
    Simu_table = pd.DataFrame(index = range(MC),\
    columns = ['5Y_cap','7Y_cap','5Y_sharpe','7Y_sharpe'])

    for mc in range(MC):
        temdf = MC_generate(mc,data,CAP,SAVE,g,ratio)
        temdf.reset_index(inplace=True,drop=True)
        # Loc out end of different years
        rst = temdf.loc[np.r_[years[0]-1,years[1]-1],'cap_total']
        rst2 = temdf.loc[np.r_[years[0]-1,years[1]-1],'cap_input'] 
        # Total Capital after tax for different dates
        Simu_table.iloc[mc,:2] = rst - tax*(rst - rst2) # Tax Adjustment (only pay for capital gain)
        Simu_table.iloc[mc,2] = ret_annual_sharpe(temdf.iloc[:years[0],1])
        Simu_table.iloc[mc,3] = ret_annual_sharpe(temdf.iloc[:years[1],1])
        
    return Simu_table

In [13]:
# Does not allow shorting
# Generate a DataFrame with 1000 rows and 4 columns
np.random.seed(535)

rand_data = np.random.uniform(0, 2, size=(9000, 3))
df_grid2 = pd.DataFrame(rand_data, columns=['x1', 'x2', 'x3'])

# Calculate x4 as 1 minus the sum of x1 and x2
df_grid2['x4'] = 1 - df_grid2['x1'] - df_grid2['x2']

# Drop rows based on the condition
condition = (df_grid2['x4'] >= 0) & (df_grid2['x4'] <= 2)
df_grid2 = df_grid2[condition]

df_grid2.reset_index(drop=True,inplace=True)
df_grid2 = df_grid2.iloc[:500,:]

# Display the sorted DataFrame
print(df_grid2)

           x1        x2        x3        x4
0    0.252218  0.389102  0.879787  0.358679
1    0.502337  0.319383  0.845133  0.178280
2    0.202873  0.378159  0.650015  0.418968
3    0.325748  0.632399  1.387910  0.041853
4    0.111440  0.710112  1.133961  0.178449
..        ...       ...       ...       ...
495  0.111281  0.331458  0.091367  0.557261
496  0.477581  0.387882  0.342385  0.134537
497  0.818098  0.125310  0.095685  0.056592
498  0.297952  0.162477  1.990411  0.539571
499  0.418443  0.154279  0.905400  0.427278

[500 rows x 4 columns]


In [14]:
# search here --- Original Dataset
# Q1: Not allow shorting

SAVE = 40000
CAP = 50000
g = 0.05

MC = 119

df_compare_5Y_NS = pd.DataFrame(index=range(500),columns = ['Sharpe','Utility','Prob'])
df_compare_7Y_NS = pd.DataFrame(index=range(500),columns = ['Sharpe','Utility','Prob'])

for idx in tqdm(range(500)):
    ratio = (df_grid2.iloc[idx,:]).to_list()
    temp_df = Experiments(MC,data,CAP,SAVE,g,ratio,years=[60,84],tax=0)
    uiti_5Y = temp_df['5Y_cap'].apply(lambda x: np.log(x))
    uiti_7Y = temp_df['7Y_cap'].apply(lambda x: np.log(x))
    df_compare_5Y_NS.iloc[idx,0] = np.nanmean(temp_df['5Y_sharpe'])
    df_compare_7Y_NS.iloc[idx,0] = np.nanmean(temp_df['7Y_sharpe'])
    df_compare_5Y_NS.iloc[idx,1] = np.nanmean(uiti_5Y)
    df_compare_7Y_NS.iloc[idx,1] = np.nanmean(uiti_7Y)
    df_compare_5Y_NS.iloc[idx,2] = (temp_df['5Y_cap'] >= 500000).mean()
    df_compare_7Y_NS.iloc[idx,2] = (temp_df['7Y_cap'] >= 500000).mean()

100%|██████████| 500/500 [11:05<00:00,  1.33s/it]


In [15]:
df_compare_5Y_NS['Sharpe'] = pd.to_numeric(df_compare_5Y_NS['Sharpe'])
df_compare_5Y_NS['Utility'] = pd.to_numeric(df_compare_5Y_NS['Utility'])
df_compare_5Y_NS['Prob'] = pd.to_numeric(df_compare_5Y_NS['Prob'])
df_compare_7Y_NS['Sharpe'] = pd.to_numeric(df_compare_7Y_NS['Sharpe'])
df_compare_7Y_NS['Utility'] = pd.to_numeric(df_compare_7Y_NS['Utility'])
df_compare_7Y_NS['Prob'] = pd.to_numeric(df_compare_7Y_NS['Prob'])

In [16]:
import pickle
# Store data (serialize)
with open('df_compare_5Y_Q1.pickle', 'wb') as handle:
    pickle.dump(df_compare_5Y_NS, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
with open('df_compare_7Y_Q1.pickle', 'wb') as handle:
    pickle.dump(df_compare_7Y_NS, handle, protocol=pickle.HIGHEST_PROTOCOL) 
 

In [20]:
df_results = pd.DataFrame(index=['5Y_sharpe','5Y_uti','5Y_prob',\
    '7Y_sharpe','7Y_uti','7Y_prob'],columns=['x1','x2','x3','x4'])

df_results.iloc[0,:] = df_grid2.iloc[df_compare_5Y_NS[['Sharpe']].idxmax(),:]
df_results.iloc[1,:] = df_grid2.iloc[df_compare_5Y_NS[['Utility']].idxmax(),:]
df_results.iloc[2,:] = df_grid2.iloc[df_compare_5Y_NS[['Prob']].idxmax(),:]
df_results.iloc[3,:] = df_grid2.iloc[df_compare_7Y_NS[['Sharpe']].idxmax(),:]
df_results.iloc[4,:] = df_grid2.iloc[df_compare_7Y_NS[['Utility']].idxmax(),:]
df_results.iloc[5,:] = df_grid2.iloc[df_compare_7Y_NS[['Prob']].idxmax(),:]

In [21]:
with open('ratios_opt_Q1.pickle', 'wb') as handle:
    pickle.dump(df_results, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [22]:
df_results

Unnamed: 0,x1,x2,x3,x4
5Y_sharpe,0.1941282128300876,0.130897679253146,0.1717893011915889,0.6749741079167664
5Y_uti,0.7132951792394133,0.2484572146718702,1.8985635467053572,0.0382476060887164
5Y_prob,0.7132951792394133,0.2484572146718702,1.8985635467053572,0.0382476060887164
7Y_sharpe,0.1941282128300876,0.130897679253146,0.1717893011915889,0.6749741079167664
7Y_uti,0.7132951792394133,0.2484572146718702,1.8985635467053572,0.0382476060887164
7Y_prob,0.1534992420660981,0.8396523702258318,1.897892712032596,0.00684838770807
