In [83]:
import numpy as np
import pandas as pd
import os
import statsmodels as sm
from sklearn.mixture import GaussianMixture
from matplotlib import pyplot as plt
from pmdarima.arima import auto_arima
import cvxpy as cp
import time
import datetime as dt
import matplotlib.dates as mdates
import yfinance as yf
import mosek

In [84]:
cp.installed_solvers()

['ECOS', 'ECOS_BB', 'MOSEK', 'OSQP', 'SCIPY', 'SCS']

In [85]:
#get data
all_files = os.listdir(r'C:\Users\xyyh\Desktop\Cornell\ORIE 5370\Project\ftse350_data_features')
dates = pd.read_csv(r'C:\Users\xyyh\Desktop\Cornell\ORIE 5370\Project\ftse350_data_features\{}'.format(all_files[0])).Date
dates = dates.drop([1265, 1266, 3705]).reset_index(drop=True)
data_dict = {}
for file in all_files:
    df = pd.read_csv(r'C:\Users\xyyh\Desktop\Cornell\ORIE 5370\Project\ftse350_data_features\{}'.format(file))
    df = df.drop([1265, 1266, 3705]).reset_index(drop=True)
    data_dict[file] = df


In [82]:
#get S&P500 data
yf.Ticker('^FTSE').history(start="2006-04-28", end="2022-03-19").to_csv(r'C:\Users\xyyh\Desktop\Cornell\ORIE 5370\Project\ftse350.csv')

PermissionError: [Errno 13] Permission denied: 'C:\\Users\\xyyh\\Desktop\\Cornell\\ORIE 5370\\Project\\ftse350.csv'

In [86]:
ftse350_df = pd.read_csv(r'C:\Users\xyyh\Desktop\Cornell\ORIE 5370\Project\ftse350.csv')
ftse350_df = ftse350_df.drop([1265]).reset_index(drop=True)
ftse350_close = ftse350_df.Close.values[:]

In [87]:
dates == ftse350_df.Date

0       True
1       True
2       True
3       True
4       True
        ... 
4010    True
4011    True
4012    True
4013    True
4014    True
Name: Date, Length: 4015, dtype: bool

In [88]:
#get features
mu_vec = np.empty([len(dates), len(all_files)])
vw_mu = np.empty([len(dates), len(all_files)])
vw_rsi = np.empty([len(dates), len(all_files)])

for i in range(len(dates)):
    mu_daily = np.array([])
    vw_mu_daily = np.array([])
    vw_rsi_daily = np.array([])
    
    total_volume = 0
    for file in all_files:
        ret = data_dict[file].Return[i]
        volume = data_dict[file].Volume[i]
        rsi = data_dict[file].RSI[i]

        total_volume += volume
        mu_daily = np.append(mu_daily, ret)
        vw_mu_daily = np.append(vw_mu_daily, ret*volume)
        vw_rsi_daily = np.append(vw_rsi_daily, rsi*volume)
    
    if total_volume == 0:
        print(dates.loc[i])
    mu_vec[i] = mu_daily
    vw_mu[i] = vw_mu_daily / total_volume
    vw_rsi[i] = vw_rsi_daily / total_volume

In [90]:
mu_vec.shape

(4015, 200)

In [91]:
def opt_w(mu, Sigma, n, slvr):
    w = cp.Variable(n)
    gamma = cp.Parameter(nonneg=True)
    ret = mu.T@w 
    risk = cp.quad_form(w, Sigma)
    prob = cp.Problem(cp.Maximize(ret - gamma*risk), 
               [cp.sum(w) == 1, 
                w >= 0])
    
    SAMPLES = 10
    gamma_vals = np.logspace(-2, 3, num=SAMPLES)
    
    w_val = None
    prev_sharpe = -1000
    for i in range(SAMPLES):
        gamma.value = gamma_vals[i]
        prob.solve(solver=slvr)
        risk_data = cp.sqrt(risk).value
        ret_data = ret.value
        sharpe = ret_data/risk_data
        if sharpe > prev_sharpe:
            w_val = w.value
        prev_sharpe = sharpe 
    return w_val


def backtest(n_state, feature, train_len, freq):
    '''
    n_state: the number of market states (int);
    feature: the feature used to cluster data points;
    train_len: the time span of training set (in years);
    freq: days between each training set (int).
    
    returns the portfolio net asset value vector
    '''
    train_days = int(train_len * 253)
    n_iter = int((len(feature) - train_days)/freq)+1
    
    nav = [1]
    opt_failed_dates = []
    
    #create an initial equally weighted portfolio in case the optimization problem failed
    w = np.ones(mu_vec.shape[1])/mu_vec.shape[1] 
    
    for i in range(n_iter):
        #get the data and the historical states:
        feature_temp = feature[i*freq:i*freq+train_days]
        mu_temp = mu_vec[i*freq:i*freq+train_days,:]
        mu_test = mu_vec[i*freq+train_days:(i+1)*freq+train_days,:]
        gm = GaussianMixture(n_components = n_state, random_state=0).fit(feature_temp)
        all_states = gm.predict(feature_temp)
        #predict the state:
        predicted_state = int(np.round(auto_arima(all_states, start_p=0, start_q=0).predict(1)))
        #get the mu and Sigma, and then optimal weights:
        inds = np.where(all_states==predicted_state) #all the indices for this state

        #if this particular state occurred only once or none, pick the most dominant state (a rare case):
        if len(inds) <= 1:
            predicted_state = np.bincount(all_states).argmax()
            inds = np.where(all_states==predicted_state)

        past_ret = np.take(mu_temp, inds, axis=0)[0] #the returns for this state
        mu = np.mean(past_ret, axis=0)
        Sigma = np.cov(past_ret, rowvar=False)
        
        #get the optimal weights and try 4 solvers 
        try:
            w = opt_w(mu, Sigma, mu.shape[0], 'ECOS')
        except:
            try:
                w = opt_w(mu, Sigma, mu.shape[0], 'ECOS_BB')
            except:
                try:
                    w = opt_w(mu, Sigma, mu.shape[0], 'OSQP')
                except:
                    try:
                        w = opt_w(mu, Sigma, mu.shape[0], 'SCS')
                    except:
                        opt_failed_dates.append(dates[i*freq+train_days])
            
            
        #backtest:
        nav_temp = np.cumprod(np.dot(np.exp(mu_test),w)) #net asset value in the test set
        nav_append = nav_temp*nav[-1]
        nav = np.hstack((nav, nav_append))
        
    return nav, opt_failed_dates

In [92]:
#define the candidate configurations
cand_states = [2,3,4]
cand_mu = [vw_mu, vw_rsi]
mu_str = ['vw_mu', 'vw_rsi']
cand_train_len = [0.5, 1, 2, 3, 4] #training set length or lookback period
cand_freq = [21, 63, 252] #rebalancing 
tasks = []
for state in cand_states:
    for i in range(len(cand_mu)):
        for train_len in cand_train_len:
            for freq in cand_freq:
                if freq <= train_len * 253:
                    tasks.append([state, i, train_len, freq])

print(len(tasks))

84


In [93]:
count = 0
for i in range(len(tasks)):
    task = tasks[i]
    state = task[0]
    feature = cand_mu[task[1]]
    train_len = task[2]
    freq = task[3]
    print('State:'+str(state))
    print('Feature:'+mu_str[task[1]])
    print('Train_Len'+str(train_len))
    print('Frequency'+str(freq))
    nav, opt_failed_dates = backtest(state, feature, train_len, freq)
    nan_arr = np.empty(len(nav)-1)
    nan_arr_dates = np.empty(len(nav)-len(opt_failed_dates))
    nan_arr[:] = np.nan
    nan_arr_dates[:] = np.nan
    yearly_ret = np.hstack((np.power(nav[-1], 253/len(nav))-1, nan_arr))
    ftse350_nav = ftse350_close[-len(nav):]/ftse350_close[-len(nav)]
    ftse350_ret = np.hstack((np.power(ftse350_nav[-1], 253/len(ftse350_nav))-1, nan_arr))
    opt_failed_dates_output = np.hstack((opt_failed_dates, nan_arr_dates))
    df_output = pd.DataFrame({'Dates': dates[-len(nav):], 'NAV':nav, 'FTSE350 NAV':ftse350_nav, 'Yearly Ret':yearly_ret, 
                              'FTSE Yearly Ret':ftse350_ret, 'Opt Failed Dates':opt_failed_dates_output})
    df_output.to_csv('ftse350_backtest\FTSE350_{}_{}_{}_{}.csv'.format(str(state), mu_str[task[1]], str(train_len), str(freq)), index=False)
    count += 1
    print(str(count/len(tasks))+'completed')
    

# yearly_vol = np.hstack((np.std((nav[1:]/nav[:-1]-nav[:-1]))*np.sqrt(253), nan_arr))
# yearly_sr = np.hstack((yearly_ret[0]/yearly_vol[0], nan_arr))
# sp500_vol = np.hstack((np.std((sp500_nav[1:]/sp500_nav[:-1]-nav[:-1]))*np.sqrt(253), nan_arr))
# sp500_sr = np.hstack((sp500_ret[0]/sp500_vol[0], nan_arr))


State:2
Feature:vw_mu
Train_Len0.5
Frequency21
0.011904761904761904completed
State:2
Feature:vw_mu
Train_Len0.5
Frequency63
0.023809523809523808completed
State:2
Feature:vw_mu
Train_Len1
Frequency21




0.03571428571428571completed
State:2
Feature:vw_mu
Train_Len1
Frequency63




0.047619047619047616completed
State:2
Feature:vw_mu
Train_Len1
Frequency252
0.05952380952380952completed
State:2
Feature:vw_mu
Train_Len2
Frequency21
0.07142857142857142completed
State:2
Feature:vw_mu
Train_Len2
Frequency63
0.08333333333333333completed
State:2
Feature:vw_mu
Train_Len2
Frequency252
0.09523809523809523completed
State:2
Feature:vw_mu
Train_Len3
Frequency21
0.10714285714285714completed
State:2
Feature:vw_mu
Train_Len3
Frequency63
0.11904761904761904completed
State:2
Feature:vw_mu
Train_Len3
Frequency252
0.13095238095238096completed
State:2
Feature:vw_mu
Train_Len4
Frequency21
0.14285714285714285completed
State:2
Feature:vw_mu
Train_Len4
Frequency63
0.15476190476190477completed
State:2
Feature:vw_mu
Train_Len4
Frequency252
0.16666666666666666completed
State:2
Feature:vw_rsi
Train_Len0.5
Frequency21
0.17857142857142858completed
State:2
Feature:vw_rsi
Train_Len0.5
Frequency63
0.19047619047619047completed
State:2
Feature:vw_rsi
Train_Len1
Frequency21
0.20238095238095238complet



0.27380952380952384completed
State:2
Feature:vw_rsi
Train_Len3
Frequency63




0.2857142857142857completed
State:2
Feature:vw_rsi
Train_Len3
Frequency252
0.2976190476190476completed
State:2
Feature:vw_rsi
Train_Len4
Frequency21
0.30952380952380953completed
State:2
Feature:vw_rsi
Train_Len4
Frequency63
0.32142857142857145completed
State:2
Feature:vw_rsi
Train_Len4
Frequency252
0.3333333333333333completed
State:3
Feature:vw_mu
Train_Len0.5
Frequency21




0.34523809523809523completed
State:3
Feature:vw_mu
Train_Len0.5
Frequency63




0.35714285714285715completed
State:3
Feature:vw_mu
Train_Len1
Frequency21




0.36904761904761907completed
State:3
Feature:vw_mu
Train_Len1
Frequency63
0.38095238095238093completed
State:3
Feature:vw_mu
Train_Len1
Frequency252
0.39285714285714285completed
State:3
Feature:vw_mu
Train_Len2
Frequency21




0.40476190476190477completed
State:3
Feature:vw_mu
Train_Len2
Frequency63
0.4166666666666667completed
State:3
Feature:vw_mu
Train_Len2
Frequency252
0.42857142857142855completed
State:3
Feature:vw_mu
Train_Len3
Frequency21
0.44047619047619047completed
State:3
Feature:vw_mu
Train_Len3
Frequency63
0.4523809523809524completed
State:3
Feature:vw_mu
Train_Len3
Frequency252
0.4642857142857143completed
State:3
Feature:vw_mu
Train_Len4
Frequency21
0.47619047619047616completed
State:3
Feature:vw_mu
Train_Len4
Frequency63
0.4880952380952381completed
State:3
Feature:vw_mu
Train_Len4
Frequency252
0.5completed
State:3
Feature:vw_rsi
Train_Len0.5
Frequency21




0.5119047619047619completed
State:3
Feature:vw_rsi
Train_Len0.5
Frequency63
0.5238095238095238completed
State:3
Feature:vw_rsi
Train_Len1
Frequency21
0.5357142857142857completed
State:3
Feature:vw_rsi
Train_Len1
Frequency63
0.5476190476190477completed
State:3
Feature:vw_rsi
Train_Len1
Frequency252
0.5595238095238095completed
State:3
Feature:vw_rsi
Train_Len2
Frequency21
0.5714285714285714completed
State:3
Feature:vw_rsi
Train_Len2
Frequency63
0.5833333333333334completed
State:3
Feature:vw_rsi
Train_Len2
Frequency252
0.5952380952380952completed
State:3
Feature:vw_rsi
Train_Len3
Frequency21
0.6071428571428571completed
State:3
Feature:vw_rsi
Train_Len3
Frequency63
0.6190476190476191completed
State:3
Feature:vw_rsi
Train_Len3
Frequency252
0.6309523809523809completed
State:3
Feature:vw_rsi
Train_Len4
Frequency21
0.6428571428571429completed
State:3
Feature:vw_rsi
Train_Len4
Frequency63
0.6547619047619048completed
State:3
Feature:vw_rsi
Train_Len4
Frequency252
0.6666666666666666completed
Stat



0.7738095238095238completed
State:4
Feature:vw_mu
Train_Len3
Frequency63
0.7857142857142857completed
State:4
Feature:vw_mu
Train_Len3
Frequency252
0.7976190476190477completed
State:4
Feature:vw_mu
Train_Len4
Frequency21
0.8095238095238095completed
State:4
Feature:vw_mu
Train_Len4
Frequency63
0.8214285714285714completed
State:4
Feature:vw_mu
Train_Len4
Frequency252
0.8333333333333334completed
State:4
Feature:vw_rsi
Train_Len0.5
Frequency21
0.8452380952380952completed
State:4
Feature:vw_rsi
Train_Len0.5
Frequency63
0.8571428571428571completed
State:4
Feature:vw_rsi
Train_Len1
Frequency21




0.8690476190476191completed
State:4
Feature:vw_rsi
Train_Len1
Frequency63




0.8809523809523809completed
State:4
Feature:vw_rsi
Train_Len1
Frequency252




0.8928571428571429completed
State:4
Feature:vw_rsi
Train_Len2
Frequency21




0.9047619047619048completed
State:4
Feature:vw_rsi
Train_Len2
Frequency63




0.9166666666666666completed
State:4
Feature:vw_rsi
Train_Len2
Frequency252
0.9285714285714286completed
State:4
Feature:vw_rsi
Train_Len3
Frequency21
0.9404761904761905completed
State:4
Feature:vw_rsi
Train_Len3
Frequency63
0.9523809523809523completed
State:4
Feature:vw_rsi
Train_Len3
Frequency252
0.9642857142857143completed
State:4
Feature:vw_rsi
Train_Len4
Frequency21
0.9761904761904762completed
State:4
Feature:vw_rsi
Train_Len4
Frequency63
0.9880952380952381completed
State:4
Feature:vw_rsi
Train_Len4
Frequency252
1.0completed
