In [1]:
import yfinance as yf
import pandas as pd
import time
from datetime import datetime
import numpy as np

stocks = ['AMD', 'NVDA', 'META', 'TSLA']
start = datetime(2019, 2, 1)
end = datetime(2020, 4, 30)

stock_prices         = yf.download(stocks, start , end, auto_adjust = False)
stock_prices = stock_prices.resample('ME').last()
stock_prices.index   = stock_prices.index.tz_localize(None)      # change yf date format to match pdr
stock_prices         = stock_prices.filter(like='Adj Close')     # reduce to just columns with this in the name
stock_prices.columns = stock_prices.columns.get_level_values(1)  # tickers as col names, works no matter order of tics

stock_prices = stock_prices.unstack(level=0).reset_index()
stock_prices.columns = ['Ticker', 'Date', 'Adj Close']
stock_prices['Return'] = stock_prices.groupby('Ticker')['Adj Close'].pct_change()
stock_prices.set_index('Date', inplace=True) 
print(stock_prices.head(10))
stock_prices.to_csv("outputs/returns.csv", index=True)

[*********************100%***********************]  4 of 4 completed

           Ticker  Adj Close    Return
Date                                  
2019-02-28    AMD  23.530001       NaN
2019-03-31    AMD  25.520000  0.084573
2019-04-30    AMD  27.629999  0.082680
2019-05-31    AMD  27.410000 -0.007962
2019-06-30    AMD  30.370001  0.107990
2019-07-31    AMD  30.450001  0.002634
2019-08-31    AMD  31.450001  0.032841
2019-09-30    AMD  28.990000 -0.078219
2019-10-31    AMD  33.930000  0.170404
2019-11-30    AMD  39.150002  0.153846





In [2]:
pivot_data = stock_prices.pivot(columns='Ticker', values='Adj Close')
print(pivot_data)

Ticker            AMD        META      NVDA       TSLA
Date                                                  
2019-02-28  23.530001  160.692657  3.829051  21.325333
2019-03-31  25.520000  165.908096  4.457048  18.657333
2019-04-30  27.629999  192.492783  4.492791  15.912667
2019-05-31  27.410000  176.637512  3.366233  12.344000
2019-06-30  30.370001  192.094650  4.081179  14.897333
2019-07-31  30.450001  193.318893  4.192758  16.107332
2019-08-31  31.450001  184.799042  4.166809  15.040667
2019-09-30  28.990000  177.244644  4.329989  16.058001
2019-10-31  33.930000  190.750992  5.000370  20.994667
2019-11-30  39.150002  200.694122  5.395382  21.996000
2019-12-31  45.860001  204.287201  5.857404  27.888666
2020-01-31  47.000000  200.962891  5.885533  43.371334
2020-02-29  45.480000  191.567139  6.726964  44.532665
2020-03-31  45.480000  166.017548  6.565811  34.933334
2020-04-30  53.660000  193.279068  7.434112  53.367332


In [3]:
pairs = [('AMD', 'NVDA'), ('AMD', 'META'), ('NVDA','TSLA')]

spread_list = []

for pair in pairs:
    ticker_1, ticker_2 = pair
    spread = np.log(pivot_data[ticker_1]) - np.log(pivot_data[ticker_2])

    spread_mean = spread.mean()
    spread_std = spread.std()
    z_spread = (spread - spread_mean) / spread_std
    
    pair_df = z_spread.reset_index()
    pair_df['Date'] = pd.to_datetime(pair_df['Date'])  # Convert Date column to datetime
    pair_df.set_index('Date', inplace=True)
    pair_df['Ticker Pair'] = f'{ticker_1}-{ticker_2}'
    pair_df.columns = ['Spread', 'Ticker Pair']
    spread_list.append(pair_df[['Ticker Pair', 'Spread']])

spread_df = pd.concat(spread_list)

In [4]:
pivot_data = stock_prices.pivot(columns='Ticker', values='Return')
print(pivot_data)

Ticker           AMD      META      NVDA      TSLA
Date                                              
2019-02-28       NaN       NaN       NaN       NaN
2019-03-31  0.084573  0.032456  0.164009 -0.125109
2019-04-30  0.082680  0.160237  0.008019 -0.147109
2019-05-31 -0.007962 -0.082368 -0.250748 -0.224266
2019-06-30  0.107990  0.087508  0.212387  0.206848
2019-07-31  0.002634  0.006373  0.027340  0.081223
2019-08-31  0.032841 -0.044071 -0.006189 -0.066222
2019-09-30 -0.078219 -0.040879  0.039162  0.067639
2019-10-31  0.170404  0.076202  0.154823  0.307427
2019-11-30  0.153846  0.052126  0.078997  0.047695
2019-12-31  0.171392  0.017903  0.085633  0.267897
2020-01-31  0.024858 -0.016273  0.004802  0.555160
2020-02-29 -0.032340 -0.046754  0.142966  0.026776
2020-03-31  0.000000 -0.133371 -0.023956 -0.215557
2020-04-30  0.179859  0.164209  0.132246  0.527691


In [5]:
pairs = [('AMD', 'NVDA'), ('AMD', 'META'), ('NVDA','TSLA')]

spread_list = []

for pair in pairs:
    ticker_1, ticker_2 = pair
    spread = pivot_data[ticker_1] - pivot_data[ticker_2]
    pair_df = spread.reset_index()
    pair_df['Date'] = pd.to_datetime(pair_df['Date'])  # Convert Date column to datetime
    pair_df.set_index('Date', inplace=True)
    pair_df['Ticker Pair'] = f'{ticker_1}-{ticker_2}'
    pair_df.columns = ['Return', 'Ticker Pair']
    spread_list.append(pair_df[['Ticker Pair', 'Return']])

return_spread_df = pd.concat(spread_list)

print(return_spread_df)

           Ticker Pair    Return
Date                            
2019-02-28    AMD-NVDA       NaN
2019-03-31    AMD-NVDA -0.079436
2019-04-30    AMD-NVDA  0.074661
2019-05-31    AMD-NVDA  0.242785
2019-06-30    AMD-NVDA -0.104398
2019-07-31    AMD-NVDA -0.024706
2019-08-31    AMD-NVDA  0.039030
2019-09-30    AMD-NVDA -0.117381
2019-10-31    AMD-NVDA  0.015581
2019-11-30    AMD-NVDA  0.074849
2019-12-31    AMD-NVDA  0.085759
2020-01-31    AMD-NVDA  0.020056
2020-02-29    AMD-NVDA -0.175307
2020-03-31    AMD-NVDA  0.023956
2020-04-30    AMD-NVDA  0.047613
2019-02-28    AMD-META       NaN
2019-03-31    AMD-META  0.052117
2019-04-30    AMD-META -0.077557
2019-05-31    AMD-META  0.074406
2019-06-30    AMD-META  0.020482
2019-07-31    AMD-META -0.003739
2019-08-31    AMD-META  0.076912
2019-09-30    AMD-META -0.037340
2019-10-31    AMD-META  0.094202
2019-11-30    AMD-META  0.101720
2019-12-31    AMD-META  0.153489
2020-01-31    AMD-META  0.041131
2020-02-29    AMD-META  0.014413
2020-03-31

In [6]:
final_df = spread_df.merge(return_spread_df, how = 'inner', left_on = ['Date', 'Ticker Pair'], right_on = ['Date', 'Ticker Pair'],validate = 'one_to_one')

In [7]:
final_df.to_csv("outputs/spreads.csv", index=True)