In [15]:
import pandas as pd
import random
import numpy as np

from sklearn.model_selection import TimeSeriesSplit
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

from tqdm.auto import tqdm

In [7]:
random.seed(42)

kwargs = {
    "index_col": 0,
    "parse_dates": True
}

# Load in prices
stock_prices = pd.read_csv("workflow/data/constituents.csv", **kwargs)
index_prices = pd.read_csv("workflow/data/indices.csv", **kwargs)

all_stocks = stock_prices.columns.to_list()
indices = index_prices.columns.to_list()

pct = .02
stocks = random.sample(all_stocks, int(len(all_stocks)*pct))
stock_prices = stock_prices[stocks]

stock_rets = np.log(stock_prices).diff()[1:]
index_rets = np.log(index_prices).diff()[1:]

print(stock_prices.shape, index_prices.shape)

(1232, 9) (1232, 4)


In [59]:
window = 60
dates = stock_rets.index[window:]
scaler = StandardScaler()
mses = pd.DataFrame(index=dates, columns=stocks)

for i in tqdm(range(window, len(dates))):
    X_train = scaler.fit_transform(index_rets.iloc[i-window:i])
    X_test = scaler.transform(index_rets.iloc[i:i+window])
    
    Rs_train = stock_rets.iloc[i-window:i]
    Rs_test = stock_rets.iloc[i:i+window]

    for stock in stocks:

        y_train = Rs_train[stock]
        y_test = Rs_test[stock]

        lr = LinearRegression().fit(X_train, y_train)
        y_hat = lr.predict(X_test)
        mse = mean_squared_error(y_test, y_hat)
        
        mses.loc[mses.index[i], stock] = mse

mses = mses.shift(window).dropna()

  0%|          | 0/1111 [00:00<?, ?it/s]

In [61]:
mses2 = pd.DataFrame(index=dates, columns=stocks)

for i in tqdm(range(window, len(dates))):
    X_train = scaler.fit_transform(index_rets["^GSPC"].iloc[i-window:i].values.reshape(-1, 1))
    X_test = scaler.transform(index_rets["^GSPC"].iloc[i:i+window].values.reshape(-1, 1))
    
    Rs_train = stock_rets.iloc[i-window:i]
    Rs_test = stock_rets.iloc[i:i+window]

    for stock in stocks:

        y_train = Rs_train[stock]
        y_test = Rs_test[stock]

        lr = LinearRegression().fit(X_train, y_train)
        y_hat = lr.predict(X_test)
        mse = mean_squared_error(y_test, y_hat)
        
        mses2.loc[mses2.index[i], stock] = mse

mses2 = mses2.shift(window).dropna()

  0%|          | 0/1111 [00:00<?, ?it/s]

In [68]:
mses.var()

NWS          0.0
BF-B         0.0
AEP          0.0
ROK          0.0
DXCM    0.000001
DFS     0.000001
CTAS         0.0
BX           0.0
RL      0.000001
dtype: object