In [None]:
import pandas as pd

prices = pd.read_csv('../data/raw/stock_data.csv')
prices.set_index('Date', inplace=True)

price_window = prices.loc['2015-01-06':'2016-04-04'] # arbitrary approx. 1 year chosen
dates = price_window.index
ticker_A, ticker_B = 'KO', 'PEP' # Coca-Cola and PepsiCo

prices_A = price_window[ticker_A] # just price levels
prices_B = price_window[ticker_B]

Index(['2015-01-06', '2015-01-07', '2015-01-08', '2015-01-09', '2015-01-12',
       '2015-01-13', '2015-01-14', '2015-01-15', '2015-01-16', '2015-01-20',
       ...
       '2016-03-21', '2016-03-22', '2016-03-23', '2016-03-24', '2016-03-28',
       '2016-03-29', '2016-03-30', '2016-03-31', '2016-04-01', '2016-04-04'],
      dtype='object', name='Date', length=313)


In [102]:
# if linear combination of 2 price levels is stationary, likely good candidates
# so cointegration test
from statsmodels.tsa.stattools import coint
coint_t, p_value, _ = coint(prices_A, prices_B)
print(f'Cointegration test p-value: {p_value}')
# A p-value < 0.05 typically indicates cointegration

Cointegration test p-value: 0.008710306507991792


In [103]:
'''
priceA = alpha + beta * priceB + residuals
alpha is constant offset, beta is hedge ratio, residuals are spread
so spread = priceA - (alpha + beta * priceB)

We want to estimate alpha and beta using linear regression
Then we can compute spread and see if it's stationary
'''

import statsmodels.api as sm
X = sm.add_constant(prices_B) # add column of 1s for intercept
model = sm.OLS(prices_A, X).fit() # ordinary least squares regression
alpha, beta = model.params
print(f'Alpha: {alpha}, Beta: {beta}')
spread = model.resid
# print(spread)


Alpha: -7.6018041492785, Beta: 0.5254467300514323


In [104]:
# ADF test to check if spread actually mean-reverting
from statsmodels.tsa.stattools import adfuller
adf_result = adfuller(spread)
print(f"p-value: {adf_result[1]}")
# p-value < 0.05 indicates stationarity

p-value: 0.0017811801487220308


In [None]:
# compute z scores
z_scores = (spread - spread.mean()) / spread.std()
print(z_scores)

'''
z_scores = z_scores.values.tolist()
too_big = 0
too_small = 0
fine = 0
for i in z_scores:
    if abs(i) > 2:
        too_big += 1
    if abs(i) < 0.5:
        too_small += 1
    else:
        fine += 1
print(too_big, too_small, fine)
'''

Date
2015-01-06    3.564468
2015-01-07    2.497577
2015-01-08    2.022542
2015-01-09    1.878567
2015-01-12    1.677685
                ...   
2016-03-29    2.957954
2016-03-30    2.235037
2016-03-31    2.142186
2016-04-01    1.837961
2016-04-04    1.884064
Length: 313, dtype: float64


'\nz_scores = z_scores.values.tolist()\ntoo_big = 0\ntoo_small = 0\nfine = 0\nfor i in z_scores:\n    if abs(i) > 2:\n        too_big += 1\n    if abs(i) < 0.5:\n        too_small += 1\n    else:\n        fine += 1\nprint(too_big, too_small, fine)\n'

In [None]:
# everyday, look at today's z score
for date in dates:
    score = z_scores.loc[date]
    if score > 2: # A overpriced relative to B
        print(f"Day {date}: Short A, Long B")
        signal = 1
    elif score < -2: # B overpriced relative to A
        print(f"Day {date}: Long A, Short B")
        signal = -1
    elif abs(score) < 0.5: # spread has reverted to mean
        print(f"Day {date}: Close positions")
        signal = 0
    else:
        print(f"Day {date}: Hold positions")

Day 2015-01-06: Short A, Long B
Day 2015-01-07: Short A, Long B
Day 2015-01-08: Short A, Long B
Day 2015-01-09: Hold positions
Day 2015-01-12: Hold positions
Day 2015-01-13: Hold positions
Day 2015-01-14: Hold positions
Day 2015-01-15: Hold positions
Day 2015-01-16: Hold positions
Day 2015-01-20: Hold positions
Day 2015-01-21: Hold positions
Day 2015-01-22: Hold positions
Day 2015-01-23: Hold positions
Day 2015-01-26: Hold positions
Day 2015-01-27: Hold positions
Day 2015-01-28: Hold positions
Day 2015-01-29: Hold positions
Day 2015-01-30: Hold positions
Day 2015-02-02: Hold positions
Day 2015-02-03: Hold positions
Day 2015-02-04: Close positions
Day 2015-02-05: Hold positions
Day 2015-02-06: Close positions
Day 2015-02-09: Close positions
Day 2015-02-10: Close positions
Day 2015-02-11: Hold positions
Day 2015-02-12: Hold positions
Day 2015-02-13: Hold positions
Day 2015-02-17: Close positions
Day 2015-02-18: Hold positions
Day 2015-02-19: Close positions
Day 2015-02-20: Hold positions

In [None]:
# executing trades
cash = 100,000
price_A = prices_A.iloc[-1] # for now
price_B = prices_B.iloc[-1] # ""

# long/short in porportion to Beta

quantity_A = cash // (price_A + abs(beta) * price_B)
quantity_B = abs(beta) * quantity_A # else make it 0

print(quantity_A, quantity_B)
# something wrong

35.01972579956055 78.87175750732422
[1. 0.] [0.52544673 0.        ]


In [None]:
from collections import defaultdict
from src.portfolio import Portfolio

pf = Portfolio()
shortings = defaultdict(int)

# say we want to short A, long B
pf.cash += price_A * quantity_A
shortings[ticker_A] += quantity_A

pf.buy(ticker_B, price_B, quantity_B)

new_price_A = price_A - 1
new_price_B = price_B + 1

# to close deal:
shortings[ticker_A] -= quantity_A # are we always selling all?
pf.cash -= new_price_A * quantity_A 

pf.sell(ticker_B, new_price_B, quantity_B)

ModuleNotFoundError: No module named 'src'