In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import os   
import sys
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression

from scipy.optimize import minimize
from scipy import interpolate

from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.offsets import CustomBusinessDay
import warnings
from scipy.optimize import fsolve
from scipy.stats import norm

from sklearn.decomposition import PCA
import warnings
from scipy.optimize import fsolve
import numpy.polynomial.polynomial as poly
warnings.filterwarnings(action='ignore')

In [2]:
tickers = pd.read_csv('top_50_tickers.csv')

In [4]:
tickers.ticker.unique()

array(['TSLA', 'NVDA', 'SMCI', 'PLTR', 'AMZN', 'META', 'BABA', 'LCID',
       'NIO', 'IBIT', 'AMD', 'GOOGL', 'MSFT', 'APP', 'MSTR', 'HOOD',
       'HIMS', 'F', 'GOOG', 'NKLA', 'AVGO', 'INTC', 'SOFI', 'MU', 'AMC',
       'WMT', 'SAVE', 'GM', 'SQ', 'TSM', 'COIN', 'MARA', 'ZI', 'PYPL',
       'ASTS', 'FSLR', 'IREN', 'BAC', 'CAVA', 'DKNG', 'NLY', 'XPEV',
       'NYCB', 'SNOW', 'FCX', 'TEM', 'SOUN', 'VRT', 'NFLX', 'RIVN'],
      dtype=object)

In [42]:
vols = pd.read_csv('./daily_variance_series_wrds.csv')

In [43]:
vols.SYM_ROOT.nunique()

48

In [44]:
vols = vols[vols['SYM_ROOT'] == vols['symbol']]

In [45]:
vols['month'] = pd.to_datetime(vols['DATE']).dt.month
vols['year'] = pd.to_datetime(vols['DATE']).dt.year

In [46]:
vols['ivol_q_log'] = np.log(vols['ivol_q'])

In [47]:
tmp = vols.groupby(['SYM_ROOT', 'year', 'month'])['ivol_q_log']\
    .apply(lambda x: (((x - x.shift(1))**2).sum() / x.count()) ).to_frame('lag1_mom')

In [48]:
tmp.reset_index()

Unnamed: 0,SYM_ROOT,year,month,lag1_mom
0,AMC,2013,12,1.083376
1,AMC,2014,1,0.426377
2,AMC,2014,2,0.443239
3,AMC,2014,3,0.434471
4,AMC,2014,4,0.450979
...,...,...,...,...
4882,ZI,2023,4,0.129572
4883,ZI,2023,5,0.342768
4884,ZI,2023,6,0.097355
4885,ZI,2023,7,0.105272


In [49]:
def moments(df):
    final_dfs = []
    for lag in range(1, 11):
        tmp = df.groupby(['SYM_ROOT', 'year', 'month'])['ivol_q_log']\
            .apply(lambda x: (((x - x.shift(lag))**2).sum() / (x.count() - lag)) ).to_frame(f'lag{lag}_mom')
        final_dfs.append(tmp)
    final_df = pd.concat(final_dfs, axis=1)
    return final_df

In [50]:
final = moments(vols)

In [51]:
final.dropna(inplace=True)

In [63]:
betas = pd.DataFrame(columns=['Betas'], index=final.index)
for idx,_ in final.iterrows():
    y = final.loc[idx].values
    X = np.log(range(1, 11)).reshape(-1,1)
    print(idx)
    print(y)
    print(X)
    mod = LinearRegression()
    mod.fit(X,y)
    betas.loc[idx, 'Betas'] = mod.coef_


('AMC', 2014, 1)
[0.44769548 0.63671562 0.65189745 0.88418699 0.98757458 1.08267835
 1.11954784 1.51859969 1.49192285 1.85879175]
[[0.        ]
 [0.69314718]
 [1.09861229]
 [1.38629436]
 [1.60943791]
 [1.79175947]
 [1.94591015]
 [2.07944154]
 [2.19722458]
 [2.30258509]]
('AMC', 2014, 2)
[0.46786336 0.77605268 0.90571311 0.58504847 0.57692376 0.54623066
 0.59993779 0.27391067 0.38842499 0.6848729 ]
[[0.        ]
 [0.69314718]
 [1.09861229]
 [1.38629436]
 [1.60943791]
 [1.79175947]
 [1.94591015]
 [2.07944154]
 [2.19722458]
 [2.30258509]]
('AMC', 2014, 3)
[0.45619442 0.42900541 0.27040976 0.54042979 0.54825449 0.5638115
 0.81812751 0.84301174 1.12422519 1.34004594]
[[0.        ]
 [0.69314718]
 [1.09861229]
 [1.38629436]
 [1.60943791]
 [1.79175947]
 [1.94591015]
 [2.07944154]
 [2.19722458]
 [2.30258509]]
('AMC', 2014, 4)
[0.4735284  0.46442448 0.52590392 0.65111812 0.39567226 0.73347092
 0.61925833 0.67668456 0.82009699 0.86867288]
[[0.        ]
 [0.69314718]
 [1.09861229]
 [1.38629436]
 [

In [60]:
betas.reset_index(inplace=True)

In [61]:
betas[betas['SYM_ROOT'] == 'TSLA']

Unnamed: 0,SYM_ROOT,year,month,Betas
4221,TSLA,2010,7,0.005040552806035276
4222,TSLA,2010,8,0.044566420876930184
4223,TSLA,2010,9,0.19245853388156764
4224,TSLA,2010,10,-0.11727641014185236
4225,TSLA,2010,11,0.07370322228222803
...,...,...,...,...
4373,TSLA,2023,3,-0.07662925049547699
4374,TSLA,2023,4,0.0038797793494168903
4375,TSLA,2023,5,0.014522306763279712
4376,TSLA,2023,6,0.01672431437047438
