In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf

from sklearn.linear_model import LinearRegression
from workflow.tools.utils import set_plot_style
from tqdm.auto import tqdm
from datetime import datetime as dt

from statsmodels.tsa.stattools import adfuller

from warnings import filterwarnings

filterwarnings("ignore")
set_plot_style()

In [3]:
stocks = [
    "AAPL",   # Apple Inc.
    "MSFT",   # Microsoft Corporation
    "AMZN",   # Amazon.com Inc.
    "GOOGL",  # Alphabet Inc. (Google) - Class A shares
    "META",   # Meta Platforms, Inc. (formerly Facebook)
    "BRK-B",  # Berkshire Hathaway Inc. - Class B shares
    "JNJ",    # Johnson & Johnson
    "V",      # Visa Inc.
    "WMT",    # Walmart Inc.
    "PG",     # Procter & Gamble Co.
    "JPM",    # JPMorgan Chase & Co.
    "NVDA",   # NVIDIA Corporation
    "DIS",    # The Walt Disney Company
    "HD",     # The Home Depot, Inc.
    "MA"      # Mastercard Incorporated
]

etfs = ["SPY"]

start = "2015-10-11"
end = "2017-10-11"

stock_prices = yf.download(stocks, start, end)["Adj Close"].dropna(axis=1)
etf_prices = yf.download(etfs, start, end)["Adj Close"].reindex(stock_prices.index)

stock_rets = np.log(stock_prices).diff()[1:]
etf_rets = np.log(etf_prices).diff()[1:]

for i in range(len(stock_rets.index)):
    assert stock_rets.index[i] == etf_rets.index[i]

stock_rets.shape, etf_rets.shape

stock_rets.to_csv("workflow/data/stock_rets.csv")
etf_rets.to_csv("workflow/data/etf_rets.csv")

[*********************100%%**********************]  15 of 15 completed
[*********************100%%**********************]  1 of 1 completed


In [4]:
stock_rets = pd.read_csv("workflow/data/stock_rets.csv", index_col=0, parse_dates=True)
etf_rets = pd.read_csv("workflow/data/etf_rets.csv", index_col=0, parse_dates=True)

stocks = stock_rets.columns
etfs = etf_rets.columns

In [5]:
window = 60
betas = pd.DataFrame(columns=stock_rets.columns, index=stock_rets.index[window:])

for date in tqdm(stock_rets.index[window:]):
    Rs = stock_rets.loc[:date][-window:]
    Rm = etf_rets.loc[:date][-window:].values.reshape(-1, 1)

    for stock in stocks:
        Ri = Rs[stock]
        market_model = LinearRegression().fit(Rm, Ri)
        betas.loc[date, stock] = market_model.coef_.item()

  0%|          | 0/443 [00:00<?, ?it/s]

In [59]:
trading_days = 252
reversion_window = 30

s_scores = {}

for date in tqdm(stock_rets.index[window:]):
    s_scores[date] = {}
    for i in range(len(stocks)):

        ou_params = pd.DataFrame(
            index=[stock for stock in stocks if stock != stocks[i]],
            columns=["a", "b", "var(z)", "sigma_eq", "m"]
        )

        for j in range(i + 1, len(stocks)):

            Ri = stock_rets[stocks[i]].loc[:date][-window:].values
            Rj = stock_rets[stocks[j]].loc[:date][-window:].values.reshape(-1, 1)

            lr1 = LinearRegression().fit(Rj, Ri)
            
            e = Ri - lr1.predict(Rj)
            B = lr1.coef_
            E = e.cumsum()

            E0 = E[:-1].reshape(-1, 1)
            E1 = E[1:]
            lr2 = LinearRegression().fit(E0, E1)

            a = lr2.intercept_
            b = lr2.coef_

            k = -np.log(b) * trading_days

            if k > trading_days / reversion_window:
                m = a / (1 - b)
                z = E1 = lr2.predict(E0)
                sigma_eq = np.sqrt(np.var(z) / (1 - b**2))
                
                s = (E[-1] - m) / sigma_eq

                s_scores[date][(stocks[i], stocks[j])] = s.item()

s_scores = pd.DataFrame(s_scores).T

  0%|          | 0/443 [00:00<?, ?it/s]

In [67]:
s_scores.loc[date][("AAPL", "AMZN")]

-0.38773653714577877

In [62]:
signals = pd.DataFrame(columns=stock_rets.columns, index=stock_rets.index[window:])

for date in tqdm(signals.index):

    positions = {stock: 0 for stock in stocks}
    pos = 0

    for i in range(len(stocks)):
        for j in range(i+1, len(stocks)):

            s = s_scores.loc[date, stock]
            
            if s > 1.25:
                pos = -1
                positions[stock] = pos
            elif s < -1.25:
                pos = 1
                positions[stock] = pos
            elif s > -0.5 and pos == 1:
                pos = 0
                positions[stock] = pos
            elif s < 0.75 and pos == -1:
                pos = 0
                positions[stock] = pos
            else:
                positions[stock] = pos
            
        signals.loc[date] = positions

Unnamed: 0_level_0,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,...,MSFT,MSFT,MSFT,MSFT,NVDA,NVDA,NVDA,PG,PG,V
Unnamed: 0_level_1,AMZN,BRK-B,DIS,GOOGL,HD,JNJ,JPM,MA,META,MSFT,...,NVDA,PG,V,WMT,PG,V,WMT,V,WMT,WMT
2016-01-08,-1.129511,-0.820396,-1.094693,-1.138134,-0.913265,-0.905975,-0.736223,-0.671554,-0.928907,-0.929122,...,-1.007624,-0.661761,-1.005034,-1.349556,-1.151927,-1.371726,-1.099627,-0.069747,-1.279128,-2.019007
2016-01-11,-0.995575,-0.733444,-1.020770,-0.913000,-0.735393,-0.750351,-0.505437,-0.586453,-0.781695,-0.708107,...,-0.967073,-0.762911,-1.280608,-1.333783,-0.775450,-0.736233,-0.913911,-0.021400,-1.033462,-1.420406
2016-01-12,-0.773925,-0.664552,-0.924304,-0.844686,-0.724812,-0.669220,-0.360829,-0.551597,-0.742285,-0.567041,...,-0.920148,-0.664639,-1.279751,-1.180607,-0.723019,-0.779656,-0.833191,-0.159723,-0.871162,-1.085529
2016-01-13,-0.724643,-0.678963,-1.097392,-0.877375,-0.569776,-0.761579,-0.379938,-0.543709,-0.791451,-0.689568,...,-1.194988,-0.864542,-1.578611,-1.470776,-0.707392,-0.723726,-0.759801,-0.133892,-0.729733,-1.070212
2016-01-14,-0.359356,-0.504654,-0.657881,-0.467092,-0.250656,-0.559967,-0.173707,-0.374338,-0.525324,-0.464223,...,-0.621558,-0.465307,-0.925645,-1.072483,-0.744113,-0.810331,-0.784276,-0.203903,-0.771368,-0.988190
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-10-04,-0.493214,-0.714916,-0.733358,-0.539651,-0.727454,-0.738377,-0.593275,-0.463713,-0.588872,-0.481537,...,-0.743591,-0.515216,-0.183459,-0.298595,0.085003,0.434300,0.444014,-0.715171,-0.779128,-1.212456
2017-10-05,-0.352380,-0.534531,-0.526856,-0.420640,-0.526564,-0.538534,-0.480299,-0.337081,-0.490406,-0.447251,...,-0.076370,-0.004698,0.320333,0.240514,0.533074,0.577572,0.726982,-0.927793,-1.083357,-0.812691
2017-10-06,-0.359003,-0.481835,-0.507317,-0.456696,-0.499908,-0.513730,-0.425133,-0.349761,-0.443355,-0.462083,...,0.127850,0.205767,0.643269,0.400018,0.400544,0.527802,0.648977,-0.762656,-0.977631,-0.605079
2017-10-09,-0.365358,-0.423800,-0.464063,-0.423421,-0.447606,-0.459885,-0.365986,-0.344541,-0.368430,-0.475160,...,0.512867,0.708561,1.069514,0.757163,0.957905,0.857355,0.933732,-0.839340,-1.198004,-0.608757


In [54]:
pd.DataFrame(s_scores).T.astype(float)

Unnamed: 0_level_0,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,...,MSFT,MSFT,MSFT,MSFT,NVDA,NVDA,NVDA,PG,PG,V
Unnamed: 0_level_1,AMZN,BRK-B,DIS,GOOGL,HD,JNJ,JPM,MA,META,MSFT,...,NVDA,PG,V,WMT,PG,V,WMT,V,WMT,WMT
2016-01-08,-1.129511,-0.820396,-1.094693,-1.138134,-0.913265,-0.905975,-0.736223,-0.671554,-0.928907,-0.929122,...,-1.007624,-0.661761,-1.005034,-1.349556,-1.151927,-1.371726,-1.099627,-0.069747,-1.279128,-2.019007
2016-01-11,-0.995575,-0.733444,-1.020770,-0.913000,-0.735393,-0.750351,-0.505437,-0.586453,-0.781695,-0.708107,...,-0.967073,-0.762911,-1.280608,-1.333783,-0.775450,-0.736233,-0.913911,-0.021400,-1.033462,-1.420406
2016-01-12,-0.773925,-0.664552,-0.924304,-0.844686,-0.724812,-0.669220,-0.360829,-0.551597,-0.742285,-0.567041,...,-0.920148,-0.664639,-1.279751,-1.180607,-0.723019,-0.779656,-0.833191,-0.159723,-0.871162,-1.085529
2016-01-13,-0.724643,-0.678963,-1.097392,-0.877375,-0.569776,-0.761579,-0.379938,-0.543709,-0.791451,-0.689568,...,-1.194988,-0.864542,-1.578611,-1.470776,-0.707392,-0.723726,-0.759801,-0.133892,-0.729733,-1.070212
2016-01-14,-0.359356,-0.504654,-0.657881,-0.467092,-0.250656,-0.559967,-0.173707,-0.374338,-0.525324,-0.464223,...,-0.621558,-0.465307,-0.925645,-1.072483,-0.744113,-0.810331,-0.784276,-0.203903,-0.771368,-0.988190
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-10-04,-0.493214,-0.714916,-0.733358,-0.539651,-0.727454,-0.738377,-0.593275,-0.463713,-0.588872,-0.481537,...,-0.743591,-0.515216,-0.183459,-0.298595,0.085003,0.434300,0.444014,-0.715171,-0.779128,-1.212456
2017-10-05,-0.352380,-0.534531,-0.526856,-0.420640,-0.526564,-0.538534,-0.480299,-0.337081,-0.490406,-0.447251,...,-0.076370,-0.004698,0.320333,0.240514,0.533074,0.577572,0.726982,-0.927793,-1.083357,-0.812691
2017-10-06,-0.359003,-0.481835,-0.507317,-0.456696,-0.499908,-0.513730,-0.425133,-0.349761,-0.443355,-0.462083,...,0.127850,0.205767,0.643269,0.400018,0.400544,0.527802,0.648977,-0.762656,-0.977631,-0.605079
2017-10-09,-0.365358,-0.423800,-0.464063,-0.423421,-0.447606,-0.459885,-0.365986,-0.344541,-0.368430,-0.475160,...,0.512867,0.708561,1.069514,0.757163,0.957905,0.857355,0.933732,-0.839340,-1.198004,-0.608757
