
# Running the Experiments

# 1. Read input files

In [1]:
import time
import math

import numpy as np
import pandas as pd
from itertools import product
from Database.services.database import Database
from sqlalchemy import inspect
from services.data_retreival import *
from scipy.stats import gmean
import matplotlib.pyplot as plt
from services.strategies import *
from services.optimization_layers import *
from services.optimization import *
from services.binary_optimization import *
from services.environment_functions import *
from services.big_m_strategies import *
from services.experiments import *
from services.class_weighted_bigM import *
from services.class_weighted_models import *

Database.initialize("sqlite:///Database//financial_db.sqlite") #for courses replace this with a flat file
inspector = inspect(Database.engine)
print(inspector.get_table_names())

universe = 'ETF'
imputation_method = 'Univariate'
ticker_str = 'SPY'
#exponential params


adjClose = pd.read_csv("flat_files/MonthlyAssetPrices"+ universe + ".csv", index_col=0, header = 1).iloc[1:,:]

daily_adjClose =  pd.read_csv("flat_files/DailyAssetPrices"+universe + ".csv", index_col=0, header = 1).iloc[1:,:]

TechnicalAnalysis = pd.read_csv("flat_files/TechnicalAnalysis" + universe + ".csv", index_col=[0,1])

if universe ==  'SP':
    FinancialRatios = pd.read_csv("flat_files/" + imputation_method + "WRDS.csv", index_col=[0,1])
    TechnicalAnalysis.index.names = FinancialRatios.index.names
    ContextualInfo = TechnicalAnalysis.merge(FinancialRatios, how = 'left', left_index=True, right_index=True)

else:
    TechnicalAnalysis.index.names = ['date', 'ticker']
    ContextualInfo = TechnicalAnalysis


['asset_id', 'currency_id', 'equities_series', 'equity_index_id', 'exchange_id', 'factor_id', 'factors', 'frequency_id', 'fundamentals', 'metric_id', 'source_id', 'ticker_id', 'ticker_index_membership', 'tiingo_tickers']


In [2]:
adjClose.index = pd.to_datetime(adjClose.index, format='%Y-%m-%d').tz_localize(None)
daily_adjClose.index = pd.to_datetime(daily_adjClose.index, format='%Y-%m-%d').tz_localize(None)
ContextualInfo.index = ContextualInfo.index.set_levels([pd.to_datetime(ContextualInfo.index.levels[0]).tz_localize(None), ContextualInfo.index.levels[1]])

In [3]:
print("The earliest possible start date")
max(daily_adjClose.index.unique()[0], ContextualInfo.index.get_level_values('date').unique()[0], adjClose.index.unique()[0])

The earliest possible start date


Timestamp('2006-01-31 00:00:00')

In [4]:
print("The latest possible start date")
min(daily_adjClose.index.unique()[-1], ContextualInfo.index.get_level_values('date').unique()[-1], adjClose.index.unique()[-1])

The latest possible start date


Timestamp('2023-04-21 00:00:00')

In [5]:
# start_date = pd.to_datetime('1996-11-30', format = '%Y-%m-%d')
# 
# end_date = pd.to_datetime('2022-12-31', format = '%Y-%m-%d')

start_date = pd.to_datetime('2006-05-31', format = '%Y-%m-%d')

end_date = pd.to_datetime('2022-12-31', format = '%Y-%m-%d')

factors = ('Mkt-RF', 'RF')

factorRet = get_monthly_factors(factors, start_date, end_date, Database)

factorRet = factorRet.droplevel(0, axis = 1)
factorRet.columns = [col.strip() for col in factorRet.columns]

In [6]:
adjClose = adjClose[start_date:end_date]
daily_adjClose = daily_adjClose[start_date:end_date]
idx = pd.IndexSlice
ContextualInfo = ContextualInfo.loc[idx[start_date:end_date, :], :]

In [7]:
# Initial budget to invest ($100,000)
initialVal = 100000

# Length of investment period (in months) - but actually units of data
investPeriod = 6

# divide the factor returns by 100
factorRet = factorRet/100

#rf and factor returns
#drop the first observation to align the dataset with the returns data that will
#result
factorRet = factorRet.iloc[1:, :]

#rf and factor returns
riskFree = factorRet['RF']
factorRet = factorRet.loc[:,factorRet.columns != 'RF'];

In [8]:
#Identify the tickers and the dates
tickers = adjClose.columns
dates   = factorRet.index

In [9]:
# Calculate the stocks monthly excess returns
# pct change and drop the first null observation
returns = adjClose.pct_change(1).iloc[1:, :]
returns = returns  - np.diag(riskFree.values) @ np.ones_like(returns.values) # risk adjusted returns
# Align the price table to the asset and factor returns tables by discarding the first observation.
adjClose = adjClose.iloc[1:,:]

In [10]:
assert adjClose.index[0] == returns.index[0]
assert adjClose.index[0] == factorRet.index[0]

# 2. Run your program

This section will run your Project1_Function in a loop. The data will be loaded progressively as a growing window of historical observations.
Rebalancing will take place after every loop

### 2.1 Some Standard Allocation Strategies

In [11]:
# Some strategies (uncomment this or the next cell to pick a strategy to test)
# Strategy = general_strategy(sample_estimator, MVO, NumObs=48)  #the general strategy object takes an estimator function and an optimizer function
#                                                                #the only constraint is that the estimators outputs are the optimizers inputs
# investor preferences (static throughout the backtest)
run = 'SunJan212024'
hyperparam_search = False # True if model has hyperparams
optimizer = ClassWgtSVMMVO # CardMVO , MVO
estimator = exponential_weighted_estimator_shrinkage #
EstNumObs = 750
k = 6*20 #estimation horizon
alpha= 1 - 0.990 #estimation decay
return_premium = 0.25 #optimization premium

turnover_constraints = True
turnover_limit = 0.75 # 0.75, 1, 1.25
cardinality_ratio = 0.1 #optimization

MipGap = 0.05 # for portfolio optimizer
limit_time = 20 # for portfolio optimizer

target_return_strategy = premium_target # for portfolio optimizer to calculate return target
# mean_target, premium_target, ticker_return_target
target_return_strategy_args = ['premium'] # args for the return strategy

bigMStrategy = ClassWgtHyperparameterBigMStrategyV2 #objectiveBigMStrategy #objectiveBigMStrategyTightening , objectiveBigMStrategy

bigM_limit_time=20 # for the feasible solution used to calculate big M
bigM_MipGap=0.05 # for the feasible solution used to calculate big M
bigM_SolutionLimit=10 # for the feasible solution used to calculate big <

SkipSOCP = True # flag to skip the SOCP
tightening_iter_lim = 1 # number of iterations of tightening to do

SVMMVO_bigM_time_limit = 5 # for computing a better solution in the tightening strategy
SVMMVO_MipGap=0.1 # for computing a better solution in the tightening strategy
SVMMVO_SolutionLimit=5 # for computing a better solution in the tightening strategy

SOCP_limit_time=10 # time limit for the SOCP's

ticker_index = tickers.get_loc(ticker_str) # the ticker we want to compare to if
#  we are using the ticker_return_target strategy

q = 20 #subset of features

C = 0.1 # separation importance
epsilon = 0.1 # regularization amount for SVM

separable = False # do not restrict the hyperplane to be separable

LogToConsole = False # display optimization results in the console
Verbose = False # print statements

hyperparams = {'C': list(np.geomspace(0.1, 100, 4)), 'epsilon': list(np.geomspace(0.1, 100, 4))} # hyperparams for hyperparam search
#q_alpha = 0.3187 # alpha for Q(t, a) = (1 - q_alpha) Q(t-1. a) + R(t, a)
q_alpha = 0.6838

kappa = 1.6 # 10 % risk/turnover tolerance plan: 0.1, 0.2, 0.4, 0.8, 1.6 (0.6, 1.0)

investor_preferences = {'k':k, # horizon
                        'alpha':alpha, # decay
                        'premium':return_premium, # return premium preference
                        'EstNumObs':EstNumObs,
                        'ticker_index':ticker_index, # benchmark index for comparison used in place of return premium depending on the investors function to set r_min
                        'target_return_strategy':target_return_strategy, # strategy for setting return
                        'target_return_strategy_args': target_return_strategy_args, # args for return setting strategy
                        'turnover_limit':turnover_limit, # turnover limit
                        'turnover_constraints':turnover_constraints, # turnover limit choice (Y/N)
                        'cardinality_ratio': cardinality_ratio, # Pct of assets to invest in
                        'MipGap' : MipGap, 'limit_time':limit_time, # problem solving limits
    
                        'bigMStrategy':bigMStrategy,
                        'bigM_limit_time':bigM_limit_time,
                        'bigM_MipGap':bigM_MipGap,
                        'bigM_SolutionLimit':bigM_SolutionLimit,

                        'SkipSOCP':SkipSOCP,
                        'tightening_iter_lim':tightening_iter_lim,

                        'SVMMVO_bigM_time_limit':SVMMVO_bigM_time_limit,
                        'SVMMVO_MipGap':SVMMVO_MipGap,
                        'SVMMVO_SolutionLimit':SVMMVO_SolutionLimit,

                        'SOCP_limit_time':SOCP_limit_time,
                        'q':q,
                        'C':C,
                        'epsilon':epsilon,
                        'separable':separable,
                        'LogToConsole':LogToConsole,
                        'Verbose':Verbose,
                        'hyperparams':hyperparams,
                        'q_alpha':q_alpha,
                        'kappa':kappa}


In [12]:
if optimizer.__name__ == 'CardMVO':
    path = "Experiments//"+universe+"//"+optimizer.__name__
    print(path)
elif optimizer.__name__ == 'SVMMVO':
    path = "Experiments//"+universe+"//"+optimizer.__name__+"//"+str(q)
    print(path)
elif optimizer.__name__ == 'ClassWgtSVMMVO':
    path = "Experiments//"+universe+"//"+optimizer.__name__+"//"+str(q)
    print(path)
else:
    raise Exception("Model Type not supported for these experiments")
df = init_dataframe(investor_preferences, path)

Experiments//ETF//ClassWgtSVMMVO//20


In [17]:
from IPython.display import clear_output

kappas = [0.05, 0.1]
cardinality_ratios = [0.05, 0.07] #[0.07, 0.1, 0.2, 0.4, 0.6, 0.8] # includes mvo
turnover_limits = [0.75, 1, 1.25]
return_premiums = [0.25, 0.5, 1, 1.5]

for cardinality_ratio, turnover_limit, kappa, premium in product(cardinality_ratios, turnover_limits, kappas, return_premiums):
    
    clear_output(wait=True)

    # initialize strategy
    investor_preferences['turnover_limit'] = turnover_limit # turnover limit
    investor_preferences['cardinality_ratio'] = cardinality_ratio
    investor_preferences['kappa'] = kappa
    investor_preferences['premium'] = premium
    
    Strategy = general_strategy(estimator, optimizer,
                                investor_preferences = investor_preferences)

    Strategy.extract_estimation_info = populate_exponential_weighted_estimator_shrinkage
    Strategy.extract_optimization_info = populate_kwargs

    env = environment()

    # Start of out-of-sample test period
    testStart = returns.index[0] + pd.offsets.DateOffset(years=2)

    # End of the first investment period
    # testEnd = testStart + pd.offsets.DateOffset(months=investPeriod) - pd.offsets.DateOffset(days = 1)
    testEnd = testStart + pd.offsets.MonthBegin(investPeriod) - pd.offsets.DateOffset(days = 1)

    # End of calibration period
    calEnd = testStart -  pd.offsets.DateOffset(days = 1)
    print("Testing configuration ", (cardinality_ratio, turnover_limit))
    print("Test Start", testStart)
    print("Test End", testEnd)
    print("Calibration End", calEnd)

    # Total number of investment periods
    NoPeriods = math.ceil((returns.index[-1].to_period('M') - testStart.to_period('M')).n / investPeriod)
    n = len(tickers)

    portfValue, elapsed_time, turnover, x, backtest_results, hyperparam_hist = execute_backtest(env, Strategy, tickers, returns, factorRet, ContextualInfo,
                     adjClose, daily_adjClose, NoPeriods, testStart, testEnd, calEnd, initialVal,
                     investPeriod, hyperparam_search = hyperparam_search)
    print("Completed configuration ", (cardinality_ratio, turnover_limit))
    
    df, uid = add_to_data_dict(path, df, run, estimator,
                 optimizer, universe,
                 imputation_method, ticker_str,
                 hyperparam_search, NoPeriods, investor_preferences)

    export_experimental_results(path, uid, portfValue, elapsed_time, x, turnover)
    export_dict(path, uid, backtest_results, 'backtest_results')
    export_dict(path, uid, hyperparam_hist, 'hyperparam_hist')

    del env
    del Strategy

Testing configuration  (0.07, 1.25)
Test Start 2008-06-30 00:00:00
Test End 2008-11-30 00:00:00
Calibration End 2008-06-29 00:00:00
Set parameter TimeLimit to value 20
Set parameter MIPGap to value 0.05
Set parameter TimeLimit to value 20
Set parameter FeasibilityTol to value 1e-08
Set parameter TimeLimit to value 20
Set parameter FeasibilityTol to value 1e-08
Set parameter MIPGap to value 0.05
Set parameter TimeLimit to value 20
Set parameter MIPGap to value 0.05
Set parameter TimeLimit to value 20
Set parameter FeasibilityTol to value 1e-08
Set parameter TimeLimit to value 20
Set parameter FeasibilityTol to value 1e-08
Set parameter MIPGap to value 0.05
Set parameter TimeLimit to value 20
Set parameter MIPGap to value 0.05
Set parameter TimeLimit to value 20
Set parameter FeasibilityTol to value 1e-08
Set parameter TimeLimit to value 20
Set parameter FeasibilityTol to value 1e-08
Set parameter MIPGap to value 0.05
Set parameter TimeLimit to value 20
Set parameter MIPGap to value 0.05

In [18]:
curr_df = pd.read_pickle(path+"//data_dictionary.pkl")
# add experiment info to the data dict
df = pd.concat([df, curr_df], axis = 0)

df.drop_duplicates(inplace = True)
df.to_pickle(path+"//data_dictionary.pkl")

In [19]:
df.cardinality_ratio.unique()

array(['0.05', '0.07', '0.01', '0.03', '0.6', '0.8', '0.2', '0.4', '0.09',
       '0.1'], dtype=object)

In [20]:
df

Unnamed: 0,uid,estimator,optimizer,universe,imputation_method,ticker_str,hyperparam_search,NoPeriods,run,C,...,premium,q,q_alpha,separable,target_return_strategy,target_return_strategy_args,ticker_index,tightening_iter_lim,turnover_constraints,turnover_limit
d5137f5ff88d4f9d79843651d8c159445e2aecde,d5137f5ff88d4f9d79843651d8c159445e2aecde,exponential_weighted_estimator_shrinkage,ClassWgtSVMMVO,ETF,Univariate,SPY,False,29,SunJan212024,0.1,...,0.25,20,0.6838,False,premium_target,['premium'],250,1,True,0.75
157e0d4942e654688354e91bb47d6226641b6060,157e0d4942e654688354e91bb47d6226641b6060,exponential_weighted_estimator_shrinkage,ClassWgtSVMMVO,ETF,Univariate,SPY,False,29,SunJan212024,0.1,...,0.5,20,0.6838,False,premium_target,['premium'],250,1,True,0.75
bf03ec6a54a2fd0e5a6f496ab8ae279f3a086869,bf03ec6a54a2fd0e5a6f496ab8ae279f3a086869,exponential_weighted_estimator_shrinkage,ClassWgtSVMMVO,ETF,Univariate,SPY,False,29,SunJan212024,0.1,...,1,20,0.6838,False,premium_target,['premium'],250,1,True,0.75
81f0e92d514155bff3905dedffaf234ebd0862ea,81f0e92d514155bff3905dedffaf234ebd0862ea,exponential_weighted_estimator_shrinkage,ClassWgtSVMMVO,ETF,Univariate,SPY,False,29,SunJan212024,0.1,...,1.5,20,0.6838,False,premium_target,['premium'],250,1,True,0.75
c255bdc5cca1bab5042aea5f5678363e732cc0d5,c255bdc5cca1bab5042aea5f5678363e732cc0d5,exponential_weighted_estimator_shrinkage,ClassWgtSVMMVO,ETF,Univariate,SPY,False,29,SunJan212024,0.1,...,0.25,20,0.6838,False,premium_target,['premium'],250,1,True,0.75
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
a8a28da8816092ef262d86950c42559dad59d1ea,a8a28da8816092ef262d86950c42559dad59d1ea,exponential_weighted_estimator_shrinkage,ClassWgtSVMMVO,ETF,Univariate,SPY,False,29,SunJan212024,0.1,...,1.5,20,0.6838,False,premium_target,['premium'],250,1,True,1.25
bedd90e47492837b5e0baf06e0731075cde6222e,bedd90e47492837b5e0baf06e0731075cde6222e,exponential_weighted_estimator_shrinkage,ClassWgtSVMMVO,ETF,Univariate,SPY,False,29,SunJan212024,0.1,...,0.25,20,0.6838,False,premium_target,['premium'],250,1,True,1.25
e4d01350e28559e245084f74030b24a876e96b40,e4d01350e28559e245084f74030b24a876e96b40,exponential_weighted_estimator_shrinkage,ClassWgtSVMMVO,ETF,Univariate,SPY,False,29,SunJan212024,0.1,...,0.5,20,0.6838,False,premium_target,['premium'],250,1,True,1.25
488ade0ae795b876fb7cf5d0a9a96a19671c365c,488ade0ae795b876fb7cf5d0a9a96a19671c365c,exponential_weighted_estimator_shrinkage,ClassWgtSVMMVO,ETF,Univariate,SPY,False,29,SunJan212024,0.1,...,1,20,0.6838,False,premium_target,['premium'],250,1,True,1.25
