Replicating Idiosyncratic Volatility Result.

In [41]:
#import statements
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
from finance_byu.summarize import summary
from finance_byu.regtables import Regtable
from finance_byu.rolling import roll_idio

In [42]:
#load daily CRSP price data
df = pd.read_csv("crsp_daily.csv",parse_dates=['caldt'])
df

Unnamed: 0,permno,caldt,shrcd,excd,siccd,prc,ret,vol,shr
0,10000,1986-01-07,10,3,3990,-2.5625,,1000.0,3680.0
1,10000,1986-01-08,10,3,3990,-2.5000,-0.024390,12800.0,3680.0
2,10000,1986-01-09,10,3,3990,-2.5000,0.000000,1400.0,3680.0
3,10000,1986-01-10,10,3,3990,-2.5000,0.000000,8500.0,3680.0
4,10000,1986-01-13,10,3,3990,-2.6250,0.050000,5450.0,3680.0
...,...,...,...,...,...,...,...,...,...
105258375,93436,2023-12-22,11,3,9999,252.5400,-0.007701,93148500.0,3178920.0
105258376,93436,2023-12-26,11,3,9999,256.6100,0.016116,86700700.0,3178920.0
105258377,93436,2023-12-27,11,3,9999,261.4400,0.018822,106251000.0,3178920.0
105258378,93436,2023-12-28,11,3,9999,253.1800,-0.031594,113251000.0,3178920.0


In [43]:
df['prclag'] = df.groupby('permno')['prc'].shift()
df['logret'] = np.log(1 + df['ret'])
# df = df.query("caldt >= '1963-01-01'").reset_index(drop=True)
# df = df.query("caldt <= '2000-12-30'").reset_index(drop=True)
df = df.query("'1963-01-01' <= caldt and caldt <= '2000-12-30'").reset_index(drop=True)
df = df.query("prclag >= 5").reset_index(drop=True)
df

Unnamed: 0,permno,caldt,shrcd,excd,siccd,prc,ret,vol,shr,prclag,logret
0,10001,1986-09-03,11,3,4920,7.0000,0.037037,5350.0,985.0,6.7500,0.036368
1,10001,1986-09-04,11,3,4920,6.7500,-0.035714,7200.0,985.0,7.0000,-0.036367
2,10001,1986-09-05,11,3,4920,6.5000,-0.037037,3940.0,985.0,6.7500,-0.037740
3,10001,1986-09-08,11,3,4920,6.7500,0.054615,1610.0,985.0,6.5000,0.053176
4,10001,1986-09-09,11,3,4920,6.3750,-0.055556,1400.0,985.0,6.7500,-0.057159
...,...,...,...,...,...,...,...,...,...,...,...
30950690,93316,2000-03-10,11,3,5060,15.2500,0.008264,8555.0,3082.0,15.1250,0.008230
30950691,93316,2000-03-13,11,3,5060,15.3750,0.008197,10685.0,3082.0,15.2500,0.008164
30950692,93316,2000-03-14,11,3,5060,15.0625,-0.020325,1800.0,3082.0,15.3750,-0.020534
30950693,93316,2000-03-15,11,3,5060,15.0625,0.000000,2100.0,3082.0,15.0625,0.000000


In [44]:
#import daily market returns, both equal and value weighted
market_returns = pd.read_csv("dsi.csv",parse_dates=['caldt'])
market_returns

Unnamed: 0,caldt,mkt,ewmkt
0,1926-01-02,0.005689,0.009516
1,1926-01-04,0.000706,0.005780
2,1926-01-05,-0.004821,-0.001927
3,1926-01-06,-0.000423,0.001182
4,1926-01-07,0.004988,0.008453
...,...,...,...
25793,2023-12-22,0.002440,0.007676
25794,2023-12-26,0.005218,0.009674
25795,2023-12-27,0.001995,0.005276
25796,2023-12-28,-0.000108,0.002042


In [45]:
#merge market returns to overall stock dataframe
df = pd.merge(df, market_returns, on = ['caldt'], how = 'left')
df

Unnamed: 0,permno,caldt,shrcd,excd,siccd,prc,ret,vol,shr,prclag,logret,mkt,ewmkt
0,10001,1986-09-03,11,3,4920,7.0000,0.037037,5350.0,985.0,6.7500,0.036368,0.002953,-0.001926
1,10001,1986-09-04,11,3,4920,6.7500,-0.035714,7200.0,985.0,7.0000,-0.036367,0.012870,0.007118
2,10001,1986-09-05,11,3,4920,6.5000,-0.037037,3940.0,985.0,6.7500,-0.037740,-0.011584,-0.002484
3,10001,1986-09-08,11,3,4920,6.7500,0.054615,1610.0,985.0,6.5000,0.053176,-0.010801,-0.008489
4,10001,1986-09-09,11,3,4920,6.3750,-0.055556,1400.0,985.0,6.7500,-0.057159,-0.002734,-0.004689
...,...,...,...,...,...,...,...,...,...,...,...,...,...
30950690,93316,2000-03-10,11,3,5060,15.2500,0.008264,8555.0,3082.0,15.1250,0.008230,-0.002837,0.000308
30950691,93316,2000-03-13,11,3,5060,15.3750,0.008197,10685.0,3082.0,15.2500,0.008164,-0.014556,-0.008059
30950692,93316,2000-03-14,11,3,5060,15.0625,-0.020325,1800.0,3082.0,15.3750,-0.020534,-0.022405,-0.012365
30950693,93316,2000-03-15,11,3,5060,15.0625,0.000000,2100.0,3082.0,15.0625,0.000000,0.006799,-0.005430


In [52]:
#should we use log ret? also how do we get excess market return? ddof = 0?

df['idio_vol'] = df.groupby('permno')[['ret', 'mkt']].apply(lambda x: roll_idio(x['ret'], x['mkt'], win = 30, minp =30, ddof = 0)).reset_index(drop=True)
df = df.query("idio_vol == idio_vol").reset_index(drop=True)
df

Unnamed: 0,permno,caldt,shrcd,excd,siccd,prc,ret,vol,shr,prclag,logret,mkt,ewmkt,idio_vol
0,10001,1986-10-17,11,3,4920,-6.5625,-0.027778,0.0,991.0,6.7500,-0.028171,-0.002519,-0.000335,0.031562
1,10001,1986-10-21,11,3,4920,6.7500,0.058824,100.0,991.0,6.3750,0.057159,-0.000263,-0.000121,0.032630
2,10001,1986-10-22,11,3,4920,7.0000,0.037037,100.0,991.0,6.7500,0.036368,0.001406,0.000868,0.032581
3,10001,1986-10-23,11,3,4920,7.0000,0.000000,2500.0,991.0,7.0000,0.000000,0.010694,0.004210,0.031726
4,10001,1986-10-24,11,3,4920,6.7500,-0.035714,6600.0,991.0,7.0000,-0.036367,-0.002450,0.001582,0.031073
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30253556,93316,2000-03-10,11,3,5060,15.2500,0.008264,8555.0,3082.0,15.1250,0.008230,-0.002837,0.000308,0.008796
30253557,93316,2000-03-13,11,3,5060,15.3750,0.008197,10685.0,3082.0,15.2500,0.008164,-0.014556,-0.008059,0.008045
30253558,93316,2000-03-14,11,3,5060,15.0625,-0.020325,1800.0,3082.0,15.3750,-0.020534,-0.022405,-0.012365,0.008413
30253559,93316,2000-03-15,11,3,5060,15.0625,0.000000,2100.0,3082.0,15.0625,0.000000,0.006799,-0.005430,0.008282


In [None]:
factors = pd.read_csv('factors.csv',parse_dates=['caldt'])
factors

Unnamed: 0,caldt,exmkt,smb,hml,umd,rf
0,1927-01-31,-0.06,-0.37,4.54,0.36,0.25
1,1927-02-28,4.18,0.04,2.94,-2.14,0.26
2,1927-03-31,0.13,-1.65,-2.61,3.61,0.30
3,1927-04-30,0.46,0.30,0.81,4.30,0.25
4,1927-05-31,5.44,1.53,4.73,3.00,0.30
...,...,...,...,...,...,...
1156,2023-05-31,0.35,1.61,-7.72,-0.63,0.36
1157,2023-06-30,6.46,1.54,-0.26,-2.37,0.40
1158,2023-07-31,3.21,2.08,4.11,-3.98,0.45
1159,2023-08-31,-2.39,-3.16,-1.06,3.77,0.45
