In [1]:
import pandas as pd
import numpy as np
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
warnings.filterwarnings('ignore')

sns.set_style('whitegrid')
plt.rcParams['lines.linewidth'] = 1
plt.rcParams['axes.edgecolor'] = 'k'

In [2]:
df = pd.read_csv('data/master.csv', parse_dates=True)
df['Date'] = pd.to_datetime(df['Date'])

In [3]:
def rolling_window_train_test_split(stock_data, start_date, end_date, train_len):
    start_date = end_date - train_len
    training_set = stock_data[start_date:end_date]
    testing_set = stock_data[end_date:end_date+1]
    X_train = training_set.drop(['Date', 'stock', 'q_log_return', 'q_price'], axis=1)
    y_train = training_set['q_log_return']
    X_test = testing_set.drop(['Date', 'stock', 'q_log_return', 'q_price'], axis=1)
    y_test = testing_set['q_log_return']
    return X_train, X_test, y_train, y_test

In [None]:
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import mean_squared_error

# Initialize models
linear_model = LinearRegression()
ridge_model = Ridge(alpha=1.0)

# Prepare a dataframe to store MSE for each model and trading period
mse_df = pd.DataFrame(columns=['Date', 'MSE_Linear', 'MSE_Ridge'])

# Loop through each unique stock
for stock in tqdm(df['stock'].unique()):
    stock_data = df[df['stock'] == stock].sort_values('Date')
    
    # Loop through each rolling window
    for end_date in pd.date_range(start=stock_data['Date'].min(), end=stock_data['Date'].max(), freq='Q'):
        start_date = end_date - pd.DateOffset(years=4)
        
        # Filter data for the rolling window
        mask = (stock_data['Date'] >= start_date) & (stock_data['Date'] <= end_date)
        train_data = stock_data.loc[mask]
        
        # Extract features and target
        X_train = train_data.drop(['Date', 'stock', 'q_price', 'q_log_return'], axis=1)
        y_train = train_data['q_log_return']
        
        # Train models
        linear_model.fit(X_train, y_train)
        ridge_model.fit(X_train, y_train)
        
        # Predict and calculate MSE for each model
        y_pred_linear = linear_model.predict(X_train)
        y_pred_ridge = ridge_model.predict(X_train)
        
        mse_linear = mean_squared_error(y_train, y_pred_linear)
        mse_ridge = mean_squared_error(y_train, y_pred_ridge)
        
        # Add to MSE dataframe
        mse_df = mse_df.append({
            'Date': end_date,
            'MSE_Linear': mse_linear,
            'MSE_Ridge': mse_ridge
        }, ignore_index=True)

# Identify model with minimum MSE for each trading period
mse_df['Best_Model'] = mse_df[['MSE_Linear', 'MSE_Ridge']].idxmin(axis=1)

In [67]:
stocks = df.stock.unique()
x_cols_drop = ['Date', 'q_price', 'q_log_return', 'stock']
target = ['q_log_return']

len_train = 16   # 16 quarters, 4 years
len_test = 4     # 4 quarters, 1 year
len_trade = 1    # 1 quarter, 3 mos

chunk = len_train + len_test + len_trade
stock_preds = {stock: {} for stock in stocks}

for stock in tqdm(stocks):
    stock_data = df[df.stock == stock].copy()
    for i in range(chunk, len(stock_data), chunk):
        data = stock_data.iloc[i-chunk:i]
        train = data.iloc[:len_train]
        test = data.iloc[len(train):len(train)+len_test]
        X_train = train.drop(x_cols_drop, axis=1).iloc[:-1]
        y_train = train[target].iloc[1:]
        X_test = test.drop(x_cols_drop, axis=1)
        model = Ridge(alpha=1, random_state=42)
        model.fit(X_train, y_train)
        preds = model.predict(X_test)
        stock_preds[stock][f"{test['Date'].min().strftime('%Y-%m-%d')}"] = preds

  0%|          | 0/133 [00:00<?, ?it/s]

100%|██████████| 133/133 [00:05<00:00, 24.34it/s]


In [73]:
preds_df = pd.DataFrame.from_dict(stock_preds, orient='index').T
preds_df

Unnamed: 0,ABT,ADP,AEP,ALK,AMGN,AON,AOS,APA,APD,ATO,...,VRTX,VZ,WDC,WEC,WELL,WM,WMB,XEL,XOM,XRAY
2004-06-30,"[[0.05986199958084015], [0.055243785330769124]...","[[0.09357294229046872], [0.06951778182296228],...","[[0.23356085778374203], [0.04984212649134023],...","[[-0.9006338454964502], [0.6702327165678421], ...","[[-0.2920465006132731], [-0.098678028620653], ...","[[0.20698326454606986], [0.06771067610324799],...","[[0.219813318259942], [-0.16157024855111335], ...","[[0.1733592274966667], [0.16933048546466778], ...","[[0.10266069992780263], [0.10670657419039659],...","[[-0.1698364864334827], [-0.33302095237508256]...",...,"[[0.8380816890470317], [1.7468967191881846], [...","[[0.031287596274247924], [0.04186433892172661]...","[[0.2829780569137937], [0.3106134304434313], [...","[[0.12416794981570473], [0.44804655046446507],...","[[0.13981524924045163], [-0.04204691026710128]...","[[0.19119673913615134], [0.20761679673534697],...","[[0.6344802852233877], [-0.8905388745185925], ...","[[0.27498895021072145], [-0.6208212290140274],...","[[0.05894080869331575], [0.06007302878985121],...","[[0.09647842861462574], [0.0952743377936934], ..."
2009-09-30,"[[-0.04456590389224602], [-0.07888255547866158...","[[-0.06496008777985007], [-0.02231544195852208...","[[0.01988280191135247], [0.10389520048186507],...","[[0.011096720610650312], [7.238090917848858], ...","[[0.48596873600356716], [0.33427524945179465],...","[[0.07194716737416176], [0.08260543907739601],...","[[0.023215931845358362], [0.07755885916828426]...","[[0.26873108331387957], [0.31107701007266847],...","[[0.07222194595020202], [0.16171499268624653],...","[[-0.0014899614460223143], [0.0289695609479904...",...,"[[-0.13998527482913214], [0.1601117395541507],...","[[0.05764675569854691], [0.8392522925571048], ...","[[-0.04876242246205473], [-0.05574071730131472...","[[-0.004959487963473716], [0.05942806559348544...","[[-0.162850803067366], [0.07244043058544347], ...","[[-0.019090481506238766], [-0.0278173829101673...","[[-0.35170506296057247], [-0.24524263363783055...","[[0.06670594098729987], [0.04814790142305836],...","[[-0.1566633831326512], [-0.10368189275351361]...","[[-0.2162518491416295], [-0.17999535297335365]..."
2014-12-31,"[[0.11303290405999195], [-0.8560094463530423],...","[[0.22305214177858065], [0.12144448808129046],...","[[0.20555308350332757], [0.09356563796079886],...","[[0.4039752611175791], [0.08227009693881926], ...","[[0.0518726542545261], [0.05487583703405319], ...","[[-0.08517398419370084], [0.1272360682813154],...","[[0.05101291216325016], [0.0037162031876517565...","[[1.6835582118363415], [1.7068101565821818], [...","[[0.19975972187236485], [0.2833010017314531], ...","[[0.08779745451519548], [0.12117216994865783],...",...,"[[0.36396423384915866], [0.3061769833519216], ...","[[-0.008427834744515222], [-0.0311807379420999...","[[0.22477231322889013], [0.2440136541472251], ...","[[0.11608329586968986], [0.10996672961205992],...","[[0.21907617698508886], [0.26260658911559154],...","[[0.21980651959220787], [-1.0692655218081852],...","[[0.3076083637785383], [4.9649666164071355], [...","[[0.15724078895140187], [0.2597212741204163], ...","[[0.02634385001314825], [-0.04694054675385352]...","[[0.03928986425984937], [0.08281522266469654],..."
2020-03-31,"[[0.10956714103668042], [0.20755426339481378],...","[[-0.03001698591217311], [-0.06242856431050725...","[[0.07480670844540643], [0.07119435914633082],...","[[0.23321050387259962], [0.34340460469502704],...","[[0.011324934151294874], [0.12183183819255206]...","[[0.03239550111496438], [-0.07328071389838473]...","[[0.20759918492445362], [0.23523578641507037],...","[[-1.4566097050435785], [-1.162064568188478], ...","[[0.1287103284566942], [0.1770856770171868], [...","[[0.06313979654102202], [0.05200373306882761],...",...,"[[0.6483527056147782], [0.7210434853986862], [...","[[-0.016281868735047333], [0.08862741042524447...","[[0.0017940303918297928], [0.0057987983172616]...","[[0.20417490176484115], [0.1296626988291048], ...","[[-0.04653982670725074], [0.061929197220030485...","[[0.07858243081732474], [0.12556414491203904],...","[[-0.33162762873672524], [-0.23962550460232734...","[[0.06853805072708463], [0.11443472226116777],...","[[-1.1099146270712965], [-0.617145515115783], ...","[[-0.4996299641428317], [0.614831793253322], [..."
