In [1]:
#import the dataset and show the first 5 lines of CRSP 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
#Handle compustat data
compu = pd.read_csv("/content/drive/My Drive/FINA3327_Final/compu_uodated.csv")
compu = compu.rename(columns={'LPERMNO':'PERMNO','datadate':'date'},inplace = False)
compu = compu[['PERMNO','date','fyear','at','cogs','revt','seq']]
compu['date'] = compu['date']//100
compu.head()

Unnamed: 0,PERMNO,date,fyear,at,cogs,revt,seq
0,25881,197012,1970.0,33.45,30.529,45.335,10.544
1,25881,197112,1971.0,29.33,33.973,47.033,8.382
2,25881,197212,1972.0,19.907,22.702,34.362,7.021
3,25881,197312,1973.0,21.771,24.704,37.75,8.567
4,25881,197412,1974.0,25.638,36.646,50.325,10.257


In [4]:
#Handle CRSP data
crsp = pd.read_csv("/content/drive/My Drive/FINA3327_Final/crsp.csv")
crsp['fyear'] = crsp['date']//10000.0
crsp['date'] = crsp['date']//100
crsp['RET'] = crsp['RET'].replace('C',np.nan)
crsp['RET'] = crsp['RET'].replace('B',np.nan)
crsp['mkt_cap'] = crsp['PRC']*crsp['SHROUT']
crsp.head()

Unnamed: 0,PERMNO,date,PERMCO,PRC,RET,SHROUT,fyear,mkt_cap
0,10000,198512,7952,,,,1985,
1,10000,198601,7952,-4.375,,3680.0,1986,-16100.0
2,10000,198602,7952,-3.25,-0.257143,3680.0,1986,-11960.0
3,10000,198603,7952,-4.4375,0.365385,3680.0,1986,-16330.0
4,10000,198604,7952,-4.0,-0.098592,3793.0,1986,-15172.0


In [5]:
#Merge compustat and crsp
whole_data = pd.merge(crsp, compu, 'left')
whole_data['LPERMNO'] = whole_data['PERMNO'] 
whole_data = whole_data.groupby(whole_data['PERMNO']).ffill()
whole_data = whole_data.rename({'LPERMNO':'PERMNO'})
whole_data.head()

Unnamed: 0,date,PERMCO,PRC,RET,SHROUT,fyear,mkt_cap,at,cogs,revt,seq,LPERMNO
0,198512,7952,,,,1985,,,,,,10000
1,198601,7952,-4.375,,3680.0,1986,-16100.0,,,,,10000
2,198602,7952,-3.25,-0.257143,3680.0,1986,-11960.0,,,,,10000
3,198603,7952,-4.4375,0.365385,3680.0,1986,-16330.0,,,,,10000
4,198604,7952,-4.0,-0.098592,3793.0,1986,-15172.0,,,,,10000


In [6]:
#Drop rows where mkt_cap(ie. PRC) is negative because there is no trade that month
indexNames = whole_data[ whole_data['mkt_cap'] < 0 ].index
whole_data = whole_data.drop(indexNames, inplace=False)
whole_data

Unnamed: 0,date,PERMCO,PRC,RET,SHROUT,fyear,mkt_cap,at,cogs,revt,seq,LPERMNO
0,198512,7952,,,,1985,,,,,,10000
19,198512,7953,,,,1985,,,,,,10001
28,198609,7953,6.37500,-0.003077,991.0,1986,6.317625e+03,12.242,19.565,21.46,5.432,10001
29,198610,7953,6.62500,0.039216,991.0,1986,6.565375e+03,12.242,19.565,21.46,5.432,10001
30,198611,7953,7.00000,0.056604,991.0,1986,6.937000e+03,12.242,19.565,21.46,5.432,10001
...,...,...,...,...,...,...,...,...,...,...,...,...
4477710,202008,53453,498.32001,0.741452,931809.0,2020,4.643391e+08,34309.000,18402.000,24578.00,6618.000,93436
4477711,202009,53453,429.01001,-0.139087,948000.0,2020,4.067015e+08,34309.000,18402.000,24578.00,6618.000,93436
4477712,202010,53453,388.04001,-0.095499,947901.0,2020,3.678235e+08,34309.000,18402.000,24578.00,6618.000,93436
4477713,202011,53453,567.59998,0.462736,947901.0,2020,5.380286e+08,34309.000,18402.000,24578.00,6618.000,93436


In [7]:
#Drop data which has NAN in any coloums
whole_data = whole_data.dropna(axis=0,how='any')
whole_data

Unnamed: 0,date,PERMCO,PRC,RET,SHROUT,fyear,mkt_cap,at,cogs,revt,seq,LPERMNO
28,198609,7953,6.37500,-0.003077,991.0,1986,6.317625e+03,12.242,19.565,21.46,5.432,10001
29,198610,7953,6.62500,0.039216,991.0,1986,6.565375e+03,12.242,19.565,21.46,5.432,10001
30,198611,7953,7.00000,0.056604,991.0,1986,6.937000e+03,12.242,19.565,21.46,5.432,10001
31,198612,7953,7.00000,0.015000,991.0,1986,6.937000e+03,12.242,19.565,21.46,5.432,10001
32,198701,7953,6.75000,-0.035714,991.0,1987,6.689250e+03,12.242,19.565,21.46,5.432,10001
...,...,...,...,...,...,...,...,...,...,...,...,...
4477710,202008,53453,498.32001,0.741452,931809.0,2020,4.643391e+08,34309.000,18402.000,24578.00,6618.000,93436
4477711,202009,53453,429.01001,-0.139087,948000.0,2020,4.067015e+08,34309.000,18402.000,24578.00,6618.000,93436
4477712,202010,53453,388.04001,-0.095499,947901.0,2020,3.678235e+08,34309.000,18402.000,24578.00,6618.000,93436
4477713,202011,53453,567.59998,0.462736,947901.0,2020,5.380286e+08,34309.000,18402.000,24578.00,6618.000,93436


In [8]:
#Calculate profitability and BM_ratio and double sorting
whole_data['Profitability'] = (whole_data['revt']-whole_data['cogs'])/whole_data['at']
whole_data['BM_ratio'] = (whole_data['seq']/whole_data['mkt_cap'])
whole_data['rankPro'] = whole_data['Profitability'].groupby(whole_data['date']).rank(ascending=False,method='dense')
whole_data['rankBM'] = whole_data['BM_ratio'].groupby(whole_data['date']).rank(ascending=False,method='dense')
whole_data['rank'] = (whole_data['rankPro']*0+ whole_data['rankBM']*1).groupby(whole_data['date']).rank(ascending=True,method='dense')
whole_data

Unnamed: 0,date,PERMCO,PRC,RET,SHROUT,fyear,mkt_cap,at,cogs,revt,seq,LPERMNO,Profitability,BM_ratio,rankPro,rankBM,rank
28,198609,7953,6.37500,-0.003077,991.0,1986,6.317625e+03,12.242,19.565,21.46,5.432,10001,0.154795,0.000860,2151.0,913.0,913.0
29,198610,7953,6.62500,0.039216,991.0,1986,6.565375e+03,12.242,19.565,21.46,5.432,10001,0.154795,0.000827,2181.0,948.0,948.0
30,198611,7953,7.00000,0.056604,991.0,1986,6.937000e+03,12.242,19.565,21.46,5.432,10001,0.154795,0.000783,2114.0,1013.0,1013.0
31,198612,7953,7.00000,0.015000,991.0,1986,6.937000e+03,12.242,19.565,21.46,5.432,10001,0.154795,0.000783,2322.0,1310.0,1310.0
32,198701,7953,6.75000,-0.035714,991.0,1987,6.689250e+03,12.242,19.565,21.46,5.432,10001,0.154795,0.000812,2216.0,944.0,944.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4477710,202008,53453,498.32001,0.741452,931809.0,2020,4.643391e+08,34309.000,18402.000,24578.00,6618.000,93436,0.180011,0.000014,1889.0,4106.0,4106.0
4477711,202009,53453,429.01001,-0.139087,948000.0,2020,4.067015e+08,34309.000,18402.000,24578.00,6618.000,93436,0.180011,0.000016,1880.0,4093.0,4093.0
4477712,202010,53453,388.04001,-0.095499,947901.0,2020,3.678235e+08,34309.000,18402.000,24578.00,6618.000,93436,0.180011,0.000018,1867.0,4069.0,4069.0
4477713,202011,53453,567.59998,0.462736,947901.0,2020,5.380286e+08,34309.000,18402.000,24578.00,6618.000,93436,0.180011,0.000012,1866.0,4076.0,4076.0


In [9]:
#strategy
whole_data['strategy_rule'] = (whole_data['rank']<=50) & (whole_data['PRC'].notnull())
whole_data['strategy_rule'] = whole_data['strategy_rule'].astype(int) # convert T/F to 1/0
whole_data['market_value'] = whole_data.PRC * whole_data.SHROUT
whole_data

Unnamed: 0,date,PERMCO,PRC,RET,SHROUT,fyear,mkt_cap,at,cogs,revt,seq,LPERMNO,Profitability,BM_ratio,rankPro,rankBM,rank,strategy_rule,market_value
28,198609,7953,6.37500,-0.003077,991.0,1986,6.317625e+03,12.242,19.565,21.46,5.432,10001,0.154795,0.000860,2151.0,913.0,913.0,0,6.317625e+03
29,198610,7953,6.62500,0.039216,991.0,1986,6.565375e+03,12.242,19.565,21.46,5.432,10001,0.154795,0.000827,2181.0,948.0,948.0,0,6.565375e+03
30,198611,7953,7.00000,0.056604,991.0,1986,6.937000e+03,12.242,19.565,21.46,5.432,10001,0.154795,0.000783,2114.0,1013.0,1013.0,0,6.937000e+03
31,198612,7953,7.00000,0.015000,991.0,1986,6.937000e+03,12.242,19.565,21.46,5.432,10001,0.154795,0.000783,2322.0,1310.0,1310.0,0,6.937000e+03
32,198701,7953,6.75000,-0.035714,991.0,1987,6.689250e+03,12.242,19.565,21.46,5.432,10001,0.154795,0.000812,2216.0,944.0,944.0,0,6.689250e+03
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4477710,202008,53453,498.32001,0.741452,931809.0,2020,4.643391e+08,34309.000,18402.000,24578.00,6618.000,93436,0.180011,0.000014,1889.0,4106.0,4106.0,0,4.643391e+08
4477711,202009,53453,429.01001,-0.139087,948000.0,2020,4.067015e+08,34309.000,18402.000,24578.00,6618.000,93436,0.180011,0.000016,1880.0,4093.0,4093.0,0,4.067015e+08
4477712,202010,53453,388.04001,-0.095499,947901.0,2020,3.678235e+08,34309.000,18402.000,24578.00,6618.000,93436,0.180011,0.000018,1867.0,4069.0,4069.0,0,3.678235e+08
4477713,202011,53453,567.59998,0.462736,947901.0,2020,5.380286e+08,34309.000,18402.000,24578.00,6618.000,93436,0.180011,0.000012,1866.0,4076.0,4076.0,0,5.380286e+08


In [10]:
#portfolio build
def eq_weight(df):
    stock_number = df['strategy_rule'].sum() 
    # we divided 12 because we need monthly statistics
    try:
        df['eq_weight'] = df['strategy_rule']/stock_number
    except:
        weights = np.nan
    return df
def val_weight(df):
    total_mv = df.loc[df['strategy_rule']==1,'market_value'].sum()
    try:
        df['val_weight']= df.loc[df['strategy_rule']==1,'market_value']/total_mv
    except:
        weights = np.nan
    return df

In [11]:
def sort_rank(df):
    sorted_list = df.sort_values(by=['rank','date'],ascending=[True,True])
    return sorted_list

In [12]:
whole_data_w = whole_data.groupby('date').apply(eq_weight)
whole_data_w = whole_data_w.groupby('date').apply(val_weight) 
whole_data_w.groupby('date').apply(sort_rank)

# we define a new dataframe "show" here to present the result of weighting
show = whole_data_w[(whole_data_w['strategy_rule']==1.0) 
          & (whole_data_w['date'] == 200703) 
          & (whole_data_w['fyear'] == 2007)]
show.sort_values(by=['rank'],ascending=[True]).head(10)

Unnamed: 0,date,PERMCO,PRC,RET,SHROUT,fyear,mkt_cap,at,cogs,revt,seq,LPERMNO,Profitability,BM_ratio,rankPro,rankBM,rank,strategy_rule,market_value,eq_weight,val_weight
4096129,200703,44403,73.4,-0.040648,628.0,2007,46095.2,227954.115,6858.166,13800.795,12835.902,89856,0.030456,0.278465,4481.0,1.0,1.0,1,46095.2,0.02,0.002955
3893760,200703,18704,9.19,0.190414,821.0,2007,7544.99,3004.218,647.504,1573.609,1552.634,87773,0.308268,0.205783,1849.0,2.0,2.0,1,7544.99,0.02,0.000484
2671134,200703,1720,25.36,0.065546,2723.0,2007,69055.28,76942.469,55305.988,68402.69,12355.031,75456,0.170214,0.178915,2946.0,3.0,3.0,1,69055.28,0.02,0.004427
3872053,200703,17423,3.89,-0.185681,434.0,2007,1688.26,388.689,186.86,327.576,290.462,87458,0.362027,0.172048,1491.0,4.0,4.0,1,1688.26,0.02,0.000108
4007123,200703,42121,13.52,-0.008798,833.0,2007,11262.16,5513.707,5903.017,8322.028,1930.721,89151,0.438727,0.171434,1011.0,5.0,5.0,1,11262.16,0.02,0.000722
3854720,200703,36344,17.5,0.082251,248.0,2007,4340.0,1794.631,1206.506,1661.339,730.383,87272,0.253441,0.168291,2305.0,6.0,6.0,1,4340.0,0.02,0.000278
3971673,200703,41011,50.44,0.006385,455.0,2007,22950.2,9976.522,3856.67,6359.281,3726.194,88826,0.25085,0.16236,2335.0,7.0,7.0,1,22950.2,0.02,0.001471
4038480,200703,43153,65.28,0.076695,740.0,2007,48307.2,8367.235,1013.953,1733.703,7597.109,89402,0.08602,0.157267,3565.0,8.0,8.0,1,48307.2,0.02,0.003097
3516373,200703,14605,4.02,0.069149,1629.0,2007,6548.58,1233.761,329.187,721.503,957.708,83533,0.317984,0.146247,1784.0,9.0,9.0,1,6548.58,0.02,0.00042
4189665,200703,7282,18.0,0.094225,452.0,2007,8136.0,3455.379,2262.663,4124.599,907.737,90660,0.538851,0.11157,628.0,10.0,10.0,1,8136.0,0.02,0.000522


In [13]:
#calculate return
def calculate_return(df):
    df['RET'] = df['RET'].astype(float)
    df['eq_weighted_r'] = df['eq_weight'] * df['RET']
    df['val_weighted_r'] = df['val_weight'] * df['RET']
    eq_return = pd.pivot_table(df,index='date',values='eq_weighted_r',aggfunc=np.sum)
    val_return = pd.pivot_table(df,index='date',values='val_weighted_r',aggfunc=np.sum)
    return_dataset = pd.concat([eq_return,val_return],axis=1)
    return return_dataset

In [14]:
def shift_return(df):
    df['val_weight'] = df['val_weight'].shift(1)
    return df

In [15]:
# shift return
portfolio = whole_data_w[whole_data_w.columns].groupby('LPERMNO').apply(shift_return)
portfolio = portfolio.dropna(axis=0,how='any')
portfolio.head()

Unnamed: 0,date,PERMCO,PRC,RET,SHROUT,fyear,mkt_cap,at,cogs,revt,seq,LPERMNO,Profitability,BM_ratio,rankPro,rankBM,rank,strategy_rule,market_value,eq_weight,val_weight
6312,198711,7988,1.125,-0.625,733.0,1987,824.625,54.939,344.501,347.746,12.863,10040,0.059066,0.015599,2907.0,24.0,24.0,1,824.625,0.02,0.000369
6313,198712,7988,1.375,0.222222,733.0,1987,1007.875,54.939,344.501,347.746,12.863,10040,0.059066,0.012762,3183.0,33.0,33.0,1,1007.875,0.02,0.000153
6314,198801,7988,2.0,0.454545,733.0,1988,1466.0,54.939,344.501,347.746,12.863,10040,0.059066,0.008774,2907.0,37.0,37.0,1,1466.0,0.02,0.000218
6315,198802,7988,2.0,0.0,733.0,1988,1466.0,54.939,344.501,347.746,12.863,10040,0.059066,0.008774,3003.0,38.0,38.0,1,1466.0,0.02,0.000307
6319,198806,7988,2.0,-0.2,733.0,1988,1466.0,54.939,344.501,347.746,12.863,10040,0.059066,0.008774,2961.0,39.0,39.0,1,1466.0,0.02,0.000181


In [16]:
portfolio.head()

Unnamed: 0,date,PERMCO,PRC,RET,SHROUT,fyear,mkt_cap,at,cogs,revt,seq,LPERMNO,Profitability,BM_ratio,rankPro,rankBM,rank,strategy_rule,market_value,eq_weight,val_weight
6312,198711,7988,1.125,-0.625,733.0,1987,824.625,54.939,344.501,347.746,12.863,10040,0.059066,0.015599,2907.0,24.0,24.0,1,824.625,0.02,0.000369
6313,198712,7988,1.375,0.222222,733.0,1987,1007.875,54.939,344.501,347.746,12.863,10040,0.059066,0.012762,3183.0,33.0,33.0,1,1007.875,0.02,0.000153
6314,198801,7988,2.0,0.454545,733.0,1988,1466.0,54.939,344.501,347.746,12.863,10040,0.059066,0.008774,2907.0,37.0,37.0,1,1466.0,0.02,0.000218
6315,198802,7988,2.0,0.0,733.0,1988,1466.0,54.939,344.501,347.746,12.863,10040,0.059066,0.008774,3003.0,38.0,38.0,1,1466.0,0.02,0.000307
6319,198806,7988,2.0,-0.2,733.0,1988,1466.0,54.939,344.501,347.746,12.863,10040,0.059066,0.008774,2961.0,39.0,39.0,1,1466.0,0.02,0.000181


In [17]:
#yearly return
return_dataset = calculate_return(portfolio)
return_dataset.head(5)

Unnamed: 0_level_0,eq_weighted_r,val_weighted_r
date,Unnamed: 1_level_1,Unnamed: 2_level_1
196001,0.025,0.025
196002,-0.018018,-0.018018
196003,-0.050459,-0.050459
196004,0.009756,0.009756
196005,-0.057971,-0.057971


In [18]:
#cumulative return
final = return_dataset.copy()
final['eq_weighted_R'] = return_dataset['eq_weighted_r']+1
final['val_weighted_R'] = return_dataset['val_weighted_r']+1
final['eq_cum_R'] = final['eq_weighted_R'].cumprod()
final['val_cum_R'] = final['val_weighted_R'].cumprod()
final.head()

Unnamed: 0_level_0,eq_weighted_r,val_weighted_r,eq_weighted_R,val_weighted_R,eq_cum_R,val_cum_R
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
196001,0.025,0.025,1.025,1.025,1.025,1.025
196002,-0.018018,-0.018018,0.981982,0.981982,1.006532,1.006532
196003,-0.050459,-0.050459,0.949541,0.949541,0.955743,0.955743
196004,0.009756,0.009756,1.009756,1.009756,0.965067,0.965067
196005,-0.057971,-0.057971,0.942029,0.942029,0.909121,0.909121


In [19]:
final.to_excel('3327 final Profit Double Sorting_BM only.xlsx', sheet_name = 'return')

In [20]:
#data analysis

In [21]:
reg_raw = pd.read_excel("/content/drive/My Drive/FINA3327_Final/stat_data_BM_only.xlsx")

In [22]:
reg_raw.head()

Unnamed: 0,Date,eq_weighted_r,val_weighted_r,eq_cum_r,val_cum_r,market_mon_r,market_mon_R,mkt_cum-r,eq_cum_r*,val_cum_r*,Mkt-rf,SMB,HML,Momentum,rf,eq_r-rf,val_r-rf
0,196001,0.025,0.025,1.025,1.025,-0.0665,0.9335,0.9335,1.025,1.025,-0.0698,0.0205,0.0269,-0.0349,0.0033,0.0217,0.0217
1,196002,-0.018018,-0.018018,1.006532,1.006532,0.0146,1.0146,0.947129,1.006532,1.006532,0.0117,0.0056,-0.0203,0.0386,0.0029,-0.020918,-0.020918
2,196003,-0.050459,-0.050459,0.955743,0.955743,-0.0128,0.9872,0.935006,0.955743,0.955743,-0.0163,-0.0047,-0.0284,0.0143,0.0035,-0.053959,-0.053959
3,196004,0.009756,0.009756,0.965067,0.965067,-0.0152,0.9848,0.920794,0.965067,0.965067,-0.0171,0.0039,-0.0237,0.0281,0.0019,0.007856,0.007856
4,196005,-0.057971,-0.057971,0.909121,0.909121,0.0339,1.0339,0.952009,0.909121,0.909121,0.0312,0.0127,-0.0372,0.0481,0.0027,-0.060671,-0.060671


In [23]:
#equal weight carhart

In [37]:
y_e_Ca = reg_raw[['eq_r-rf']]
x_Ca = reg_raw[['HML','SMB','Mkt-rf','Momentum']]
x_Ca.head()

Unnamed: 0,HML,SMB,Mkt-rf,Momentum
0,0.0269,0.0205,-0.0698,-0.0349
1,-0.0203,0.0056,0.0117,0.0386
2,-0.0284,-0.0047,-0.0163,0.0143
3,-0.0237,0.0039,-0.0171,0.0281
4,-0.0372,0.0127,0.0312,0.0481


In [38]:
y_e_Ca.head()

Unnamed: 0,eq_r-rf
0,0.0217
1,-0.020918
2,-0.053959
3,0.007856
4,-0.060671


In [39]:
import statsmodels.api as sm

In [40]:
x1 = sm.add_constant(x_Ca)
lm = sm.OLS(y_e_Ca.astype(float),x1.astype(float)).fit()
print(lm.summary())

                            OLS Regression Results                            
Dep. Variable:                eq_r-rf   R-squared:                       0.652
Model:                            OLS   Adj. R-squared:                  0.650
Method:                 Least Squares   F-statistic:                     318.7
Date:                Wed, 21 Apr 2021   Prob (F-statistic):          2.73e-154
Time:                        05:42:32   Log-Likelihood:                 1306.4
No. Observations:                 685   AIC:                            -2603.
Df Residuals:                     680   BIC:                            -2580.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0087      0.001     -6.018      0.0

In [41]:
#value weighted carhart

In [42]:
y_v_Ca = reg_raw[['val_r-rf']]

In [43]:
lm = sm.OLS(y_v_Ca.astype(float),x1.astype(float)).fit()
print(lm.summary())

                            OLS Regression Results                            
Dep. Variable:               val_r-rf   R-squared:                       0.645
Model:                            OLS   Adj. R-squared:                  0.643
Method:                 Least Squares   F-statistic:                     309.3
Date:                Wed, 21 Apr 2021   Prob (F-statistic):          1.91e-151
Time:                        05:42:36   Log-Likelihood:                 1288.6
No. Observations:                 685   AIC:                            -2567.
Df Residuals:                     680   BIC:                            -2545.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0031      0.001      2.102      0.0

In [31]:
#equal weighted CAPM

In [44]:
y_e_CA = reg_raw[['eq_r-rf']]
x_CA = reg_raw[['Mkt-rf']]

In [45]:
x2 = sm.add_constant(x_CA)
lm = sm.OLS(y_e_CA.astype(float),x2.astype(float)).fit()
print(lm.summary())

                            OLS Regression Results                            
Dep. Variable:                eq_r-rf   R-squared:                       0.500
Model:                            OLS   Adj. R-squared:                  0.499
Method:                 Least Squares   F-statistic:                     683.0
Date:                Wed, 21 Apr 2021   Prob (F-statistic):          6.85e-105
Time:                        05:42:40   Log-Likelihood:                 1182.1
No. Observations:                 685   AIC:                            -2360.
Df Residuals:                     683   BIC:                            -2351.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0086      0.002     -5.208      0.0

In [34]:
#value weighted CAPM

In [46]:
y_v_CA = reg_raw[['val_r-rf']]

In [47]:
lm = sm.OLS(y_v_CA.astype(float),x2.astype(float)).fit()
print(lm.summary())

                            OLS Regression Results                            
Dep. Variable:               val_r-rf   R-squared:                       0.554
Model:                            OLS   Adj. R-squared:                  0.554
Method:                 Least Squares   F-statistic:                     849.6
Date:                Wed, 21 Apr 2021   Prob (F-statistic):          5.51e-122
Time:                        05:42:44   Log-Likelihood:                 1210.4
No. Observations:                 685   AIC:                            -2417.
Df Residuals:                     683   BIC:                            -2408.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0033      0.002      2.057      0.0