In [None]:
#import the dataset and show the first 5 lines of CRSP 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#Handle compustat data
compu = pd.read_csv("/content/drive/My Drive/FINA3327_Final/compu_uodated.csv")
compu = compu.rename(columns={'LPERMNO':'PERMNO','datadate':'date'},inplace = False)
compu = compu[['PERMNO','date','fyear','at','cogs','revt','seq']]
compu['date'] = compu['date']//100
compu.head()

Unnamed: 0,PERMNO,date,fyear,at,cogs,revt,seq
0,25881,197012,1970.0,33.45,30.529,45.335,10.544
1,25881,197112,1971.0,29.33,33.973,47.033,8.382
2,25881,197212,1972.0,19.907,22.702,34.362,7.021
3,25881,197312,1973.0,21.771,24.704,37.75,8.567
4,25881,197412,1974.0,25.638,36.646,50.325,10.257


In [None]:
#Handle CRSP data
crsp = pd.read_csv("/content/drive/My Drive/FINA3327_Final/crsp.csv")
crsp['fyear'] = crsp['date']//10000.0
crsp['date'] = crsp['date']//100
crsp['RET'] = crsp['RET'].replace('C',np.nan)
crsp['RET'] = crsp['RET'].replace('B',np.nan)
crsp['mkt_cap'] = crsp['PRC']*crsp['SHROUT']
crsp.head()

Unnamed: 0,PERMNO,date,PERMCO,PRC,RET,SHROUT,fyear,mkt_cap
0,10000,198512,7952,,,,1985,
1,10000,198601,7952,-4.375,,3680.0,1986,-16100.0
2,10000,198602,7952,-3.25,-0.257143,3680.0,1986,-11960.0
3,10000,198603,7952,-4.4375,0.365385,3680.0,1986,-16330.0
4,10000,198604,7952,-4.0,-0.098592,3793.0,1986,-15172.0


In [None]:
#Merge compustat and crsp
whole_data = pd.merge(crsp, compu, 'left')
whole_data['LPERMNO'] = whole_data['PERMNO'] 
whole_data = whole_data.groupby(whole_data['PERMNO']).ffill()
whole_data = whole_data.rename({'LPERMNO':'PERMNO'})
whole_data.head()

Unnamed: 0,date,PERMCO,PRC,RET,SHROUT,fyear,mkt_cap,at,cogs,revt,seq,LPERMNO
0,198512,7952,,,,1985,,,,,,10000
1,198601,7952,-4.375,,3680.0,1986,-16100.0,,,,,10000
2,198602,7952,-3.25,-0.257143,3680.0,1986,-11960.0,,,,,10000
3,198603,7952,-4.4375,0.365385,3680.0,1986,-16330.0,,,,,10000
4,198604,7952,-4.0,-0.098592,3793.0,1986,-15172.0,,,,,10000


In [None]:
#Drop rows where mkt_cap(ie. PRC) is negative because there is no trade that month
indexNames = whole_data[ whole_data['mkt_cap'] < 0 ].index
whole_data = whole_data.drop(indexNames, inplace=False)
whole_data

Unnamed: 0,date,PERMCO,PRC,RET,SHROUT,fyear,mkt_cap,at,cogs,revt,seq,LPERMNO
0,198512,7952,,,,1985,,,,,,10000
19,198512,7953,,,,1985,,,,,,10001
28,198609,7953,6.37500,-0.003077,991.0,1986,6.317625e+03,12.242,19.565,21.46,5.432,10001
29,198610,7953,6.62500,0.039216,991.0,1986,6.565375e+03,12.242,19.565,21.46,5.432,10001
30,198611,7953,7.00000,0.056604,991.0,1986,6.937000e+03,12.242,19.565,21.46,5.432,10001
...,...,...,...,...,...,...,...,...,...,...,...,...
4477710,202008,53453,498.32001,0.741452,931809.0,2020,4.643391e+08,34309.000,18402.000,24578.00,6618.000,93436
4477711,202009,53453,429.01001,-0.139087,948000.0,2020,4.067015e+08,34309.000,18402.000,24578.00,6618.000,93436
4477712,202010,53453,388.04001,-0.095499,947901.0,2020,3.678235e+08,34309.000,18402.000,24578.00,6618.000,93436
4477713,202011,53453,567.59998,0.462736,947901.0,2020,5.380286e+08,34309.000,18402.000,24578.00,6618.000,93436


In [None]:
#Drop data which has NAN in any coloums
whole_data = whole_data.dropna(axis=0,how='any')
whole_data

Unnamed: 0,date,PERMCO,PRC,RET,SHROUT,fyear,mkt_cap,at,cogs,revt,seq,LPERMNO
28,198609,7953,6.37500,-0.003077,991.0,1986,6.317625e+03,12.242,19.565,21.46,5.432,10001
29,198610,7953,6.62500,0.039216,991.0,1986,6.565375e+03,12.242,19.565,21.46,5.432,10001
30,198611,7953,7.00000,0.056604,991.0,1986,6.937000e+03,12.242,19.565,21.46,5.432,10001
31,198612,7953,7.00000,0.015000,991.0,1986,6.937000e+03,12.242,19.565,21.46,5.432,10001
32,198701,7953,6.75000,-0.035714,991.0,1987,6.689250e+03,12.242,19.565,21.46,5.432,10001
...,...,...,...,...,...,...,...,...,...,...,...,...
4477710,202008,53453,498.32001,0.741452,931809.0,2020,4.643391e+08,34309.000,18402.000,24578.00,6618.000,93436
4477711,202009,53453,429.01001,-0.139087,948000.0,2020,4.067015e+08,34309.000,18402.000,24578.00,6618.000,93436
4477712,202010,53453,388.04001,-0.095499,947901.0,2020,3.678235e+08,34309.000,18402.000,24578.00,6618.000,93436
4477713,202011,53453,567.59998,0.462736,947901.0,2020,5.380286e+08,34309.000,18402.000,24578.00,6618.000,93436


In [None]:
#Calculate profitability and BM_ratio and double sorting
whole_data['Profitability'] = (whole_data['revt']-whole_data['cogs'])/whole_data['at']
whole_data['BM_ratio'] = (whole_data['seq']/whole_data['mkt_cap'])
whole_data['rankPro'] = whole_data['Profitability'].groupby(whole_data['date']).rank(ascending=False,method='dense')
whole_data['rankBM'] = whole_data['BM_ratio'].groupby(whole_data['date']).rank(ascending=False,method='dense')
whole_data['rank'] = (whole_data['rankPro']*0.2 + whole_data['rankBM']*0.8).groupby(whole_data['date']).rank(ascending=True,method='dense')
whole_data

Unnamed: 0,date,PERMCO,PRC,RET,SHROUT,fyear,mkt_cap,at,cogs,revt,seq,LPERMNO,Profitability,BM_ratio,rankPro,rankBM,rank
28,198609,7953,6.37500,-0.003077,991.0,1986,6.317625e+03,12.242,19.565,21.46,5.432,10001,0.154795,0.000860,2151.0,913.0,957.0
29,198610,7953,6.62500,0.039216,991.0,1986,6.565375e+03,12.242,19.565,21.46,5.432,10001,0.154795,0.000827,2181.0,948.0,975.0
30,198611,7953,7.00000,0.056604,991.0,1986,6.937000e+03,12.242,19.565,21.46,5.432,10001,0.154795,0.000783,2114.0,1013.0,1023.0
31,198612,7953,7.00000,0.015000,991.0,1986,6.937000e+03,12.242,19.565,21.46,5.432,10001,0.154795,0.000783,2322.0,1310.0,1312.0
32,198701,7953,6.75000,-0.035714,991.0,1987,6.689250e+03,12.242,19.565,21.46,5.432,10001,0.154795,0.000812,2216.0,944.0,945.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4477710,202008,53453,498.32001,0.741452,931809.0,2020,4.643391e+08,34309.000,18402.000,24578.00,6618.000,93436,0.180011,0.000014,1889.0,4106.0,3754.0
4477711,202009,53453,429.01001,-0.139087,948000.0,2020,4.067015e+08,34309.000,18402.000,24578.00,6618.000,93436,0.180011,0.000016,1880.0,4093.0,3693.0
4477712,202010,53453,388.04001,-0.095499,947901.0,2020,3.678235e+08,34309.000,18402.000,24578.00,6618.000,93436,0.180011,0.000018,1867.0,4069.0,3712.0
4477713,202011,53453,567.59998,0.462736,947901.0,2020,5.380286e+08,34309.000,18402.000,24578.00,6618.000,93436,0.180011,0.000012,1866.0,4076.0,3696.0


In [None]:
#strategy
whole_data['strategy_rule'] = (whole_data['rank']<=50) & (whole_data['PRC'].notnull())
whole_data['strategy_rule'] = whole_data['strategy_rule'].astype(int) # convert T/F to 1/0
whole_data['market_value'] = whole_data.PRC * whole_data.SHROUT
whole_data

Unnamed: 0,date,PERMCO,PRC,RET,SHROUT,fyear,mkt_cap,at,cogs,revt,seq,LPERMNO,Profitability,BM_ratio,rankPro,rankBM,rank,strategy_rule,market_value
28,198609,7953,6.37500,-0.003077,991.0,1986,6.317625e+03,12.242,19.565,21.46,5.432,10001,0.154795,0.000860,2151.0,913.0,957.0,0,6.317625e+03
29,198610,7953,6.62500,0.039216,991.0,1986,6.565375e+03,12.242,19.565,21.46,5.432,10001,0.154795,0.000827,2181.0,948.0,975.0,0,6.565375e+03
30,198611,7953,7.00000,0.056604,991.0,1986,6.937000e+03,12.242,19.565,21.46,5.432,10001,0.154795,0.000783,2114.0,1013.0,1023.0,0,6.937000e+03
31,198612,7953,7.00000,0.015000,991.0,1986,6.937000e+03,12.242,19.565,21.46,5.432,10001,0.154795,0.000783,2322.0,1310.0,1312.0,0,6.937000e+03
32,198701,7953,6.75000,-0.035714,991.0,1987,6.689250e+03,12.242,19.565,21.46,5.432,10001,0.154795,0.000812,2216.0,944.0,945.0,0,6.689250e+03
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4477710,202008,53453,498.32001,0.741452,931809.0,2020,4.643391e+08,34309.000,18402.000,24578.00,6618.000,93436,0.180011,0.000014,1889.0,4106.0,3754.0,0,4.643391e+08
4477711,202009,53453,429.01001,-0.139087,948000.0,2020,4.067015e+08,34309.000,18402.000,24578.00,6618.000,93436,0.180011,0.000016,1880.0,4093.0,3693.0,0,4.067015e+08
4477712,202010,53453,388.04001,-0.095499,947901.0,2020,3.678235e+08,34309.000,18402.000,24578.00,6618.000,93436,0.180011,0.000018,1867.0,4069.0,3712.0,0,3.678235e+08
4477713,202011,53453,567.59998,0.462736,947901.0,2020,5.380286e+08,34309.000,18402.000,24578.00,6618.000,93436,0.180011,0.000012,1866.0,4076.0,3696.0,0,5.380286e+08


In [None]:
#portfolio build
def eq_weight(df):
    stock_number = df['strategy_rule'].sum()
    # we divided 12 because we need monthly statistics
    try:
        df['eq_weight'] = df['strategy_rule']/stock_number
    except:
        weights = np.nan
    return df
def val_weight(df):
    total_mv = df.loc[df['strategy_rule']==1,'market_value'].sum()
    try:
        df['val_weight']= df.loc[df['strategy_rule']==1,'market_value']/total_mv
    except:
        weights = np.nan
    return df

In [None]:
def sort_rank(df):
    sorted_list = df.sort_values(by=['rank','date'],ascending=[True,True])
    return sorted_list

In [None]:
whole_data_w = whole_data.groupby('date').apply(eq_weight)
whole_data_w = whole_data_w.groupby('date').apply(val_weight) 
whole_data_w.groupby('date').apply(sort_rank)

# we define a new dataframe "show" here to present the result of weighting
show = whole_data_w[(whole_data_w['strategy_rule']==1.0) 
          & (whole_data_w['date'] == 200703) 
          & (whole_data_w['fyear'] == 2007)]
show.sort_values(by=['rank'],ascending=[True]).head(10)

Unnamed: 0,date,PERMCO,PRC,RET,SHROUT,fyear,mkt_cap,at,cogs,revt,seq,LPERMNO,Profitability,BM_ratio,rankPro,rankBM,rank,strategy_rule,market_value,eq_weight,val_weight
4189665,200703,7282,18.0,0.094225,452.0,2007,8136.0,3455.379,2262.663,4124.599,907.737,90660,0.538851,0.11157,628.0,10.0,1.0,1,8136.0,0.019608,0.000218
44728,200703,2089,14.0,-0.030841,4195.0,2007,58730.0,469.754,411.283,861.924,291.923,10294,0.959313,0.004971,105.0,178.0,2.0,1,58730.0,0.019608,0.001571
3936807,200703,37906,22.58,0.004448,6540.0,2007,147673.2,3921.235,396.168,2586.351,2170.031,88410,0.558544,0.014695,557.0,97.0,3.0,1,147673.2,0.019608,0.003949
3528319,200703,14777,53.85,0.059205,1633.0,2007,87937.05,2449.415,219.935,1527.946,1464.927,83647,0.53401,0.016659,654.0,87.0,4.0,1,87937.05,0.019608,0.002352
4007123,200703,42121,13.52,-0.008798,833.0,2007,11262.16,5513.707,5903.017,8322.028,1930.721,89151,0.438727,0.171434,1011.0,5.0,5.0,1,11262.16,0.019608,0.000301
3544470,200703,31702,16.3494,0.08996,373.0,2007,6098.326,963.725,1108.118,1542.021,235.011,83817,0.450235,0.038537,975.0,29.0,6.0,1,6098.326,0.019608,0.000163
3761217,200703,16207,0.34,-0.276596,22480.0,2007,7643.2,55.603,41.969,76.03,41.869,86237,0.612575,0.005478,423.0,169.0,7.0,1,7643.2,0.019608,0.000204
2184885,200703,4373,1.22,-0.054264,36374.0,2007,44376.28,180.548,179.023,317.452,147.006,58800,0.766716,0.003313,212.0,224.0,8.0,1,44376.28,0.019608,0.001187
2328826,200703,3230,90.53,0.073191,14954.0,2007,1353786.0,7913.03,1349.528,6898.139,5333.31,63263,0.701199,0.00394,295.0,207.0,9.0,1,1353786.0,0.019608,0.036206
3987648,200703,41451,91.7,0.100576,9295.0,2007,852351.5,12267.139,18304.239,25473.641,4699.98,88958,0.58444,0.005514,484.0,167.0,10.0,1,852351.5,0.019608,0.022795


In [None]:
#calculate return
def calculate_return(df):
    df['RET'] = df['RET'].astype(float)
    df['eq_weighted_r'] = df['eq_weight'] * df['RET']
    df['val_weighted_r'] = df['val_weight'] * df['RET']
    eq_return = pd.pivot_table(df,index='date',values='eq_weighted_r',aggfunc=np.sum)
    val_return = pd.pivot_table(df,index='date',values='val_weighted_r',aggfunc=np.sum)
    return_dataset = pd.concat([eq_return,val_return],axis=1)
    return return_dataset

In [None]:
def shift_return(df):
    df['val_weight'] = df['val_weight'].shift(1)
    return df

In [None]:
# shift return
portfolio = whole_data_w[whole_data_w.columns].groupby('LPERMNO').apply(shift_return)
portfolio = portfolio.dropna(axis=0,how='any')
portfolio.head()

Unnamed: 0,date,PERMCO,PRC,RET,SHROUT,fyear,mkt_cap,at,cogs,revt,seq,LPERMNO,Profitability,BM_ratio,rankPro,rankBM,rank,strategy_rule,market_value,eq_weight,val_weight
5680,199707,7983,5.625,0.0,13620.0,1997,76612.5,246.015,702.007,925.714,129.268,10035,0.909323,0.001687,206.0,437.0,43.0,1,76612.5,0.02,0.012177
5681,199708,7983,7.875,0.4,13620.0,1997,107257.5,246.015,702.007,925.714,129.268,10035,0.909323,0.001205,197.0,677.0,96.0,0,107257.5,0.0,0.011182
5700,199903,7983,3.875,0.078261,14483.0,1999,56121.625,250.858,689.213,928.49,112.007,10035,0.953834,0.001996,199.0,652.0,73.0,0,56121.625,0.0,0.005549
5702,199905,7983,3.125,-0.137931,14483.0,1999,45259.375,250.858,689.213,928.49,112.007,10035,0.953834,0.002475,184.0,404.0,21.0,1,45259.375,0.02,0.007414
5703,199906,7983,7.5,1.4,14483.0,1999,108622.5,250.858,689.213,928.49,112.007,10035,0.953834,0.001031,176.0,1407.0,600.0,0,108622.5,0.0,0.008821


In [None]:
portfolio.head()

Unnamed: 0,date,PERMCO,PRC,RET,SHROUT,fyear,mkt_cap,at,cogs,revt,seq,LPERMNO,Profitability,BM_ratio,rankPro,rankBM,rank,strategy_rule,market_value,eq_weight,val_weight
5680,199707,7983,5.625,0.0,13620.0,1997,76612.5,246.015,702.007,925.714,129.268,10035,0.909323,0.001687,206.0,437.0,43.0,1,76612.5,0.02,0.012177
5681,199708,7983,7.875,0.4,13620.0,1997,107257.5,246.015,702.007,925.714,129.268,10035,0.909323,0.001205,197.0,677.0,96.0,0,107257.5,0.0,0.011182
5700,199903,7983,3.875,0.078261,14483.0,1999,56121.625,250.858,689.213,928.49,112.007,10035,0.953834,0.001996,199.0,652.0,73.0,0,56121.625,0.0,0.005549
5702,199905,7983,3.125,-0.137931,14483.0,1999,45259.375,250.858,689.213,928.49,112.007,10035,0.953834,0.002475,184.0,404.0,21.0,1,45259.375,0.02,0.007414
5703,199906,7983,7.5,1.4,14483.0,1999,108622.5,250.858,689.213,928.49,112.007,10035,0.953834,0.001031,176.0,1407.0,600.0,0,108622.5,0.0,0.008821


In [None]:
#yearly return
return_dataset = calculate_return(portfolio)
return_dataset.head(5)

Unnamed: 0_level_0,eq_weighted_r,val_weighted_r
date,Unnamed: 1_level_1,Unnamed: 2_level_1
196001,0.025,0.025
196002,-0.018018,-0.018018
196003,-0.050459,-0.050459
196004,0.009756,0.009756
196005,-0.057971,-0.057971


In [None]:
#cumulative return
final = return_dataset.copy()
final['eq_weighted_R'] = return_dataset['eq_weighted_r']+1
final['val_weighted_R'] = return_dataset['val_weighted_r']+1
final['eq_cum_R'] = final['eq_weighted_R'].cumprod()
final['val_cum_R'] = final['val_weighted_R'].cumprod()
final.head()

Unnamed: 0_level_0,eq_weighted_r,val_weighted_r,eq_weighted_R,val_weighted_R,eq_cum_R,val_cum_R
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
196001,0.025,0.025,1.025,1.025,1.025,1.025
196002,-0.018018,-0.018018,0.981982,0.981982,1.006532,1.006532
196003,-0.050459,-0.050459,0.949541,0.949541,0.955743,0.955743
196004,0.009756,0.009756,1.009756,1.009756,0.965067,0.965067
196005,-0.057971,-0.057971,0.942029,0.942029,0.909121,0.909121


In [None]:
final.to_excel('3327 final Profit Double Sorting_BM.xlsx', sheet_name = 'return')

In [None]:
#data analysis

In [None]:
reg_raw = pd.read_excel("/content/drive/My Drive/FINA3327_Final/stat_data_BM.xlsx").iloc[:,1:]

In [None]:
reg_raw.head()

Unnamed: 0,Unnamed: 1,eq_weighted_r,val_weighted_r,eq_cum_r,val_cum_r,market_mon_r,market_mon_R,mkt_cum-r,eq_cum_r*,val_cum_r*,Mkt-rf,SMB,HML,Momentum,rf,eq_r-rf,val_r-rf
0,196001,0.025,0.025,1.025,1.025,-0.0665,0.9335,0.9335,1.025,1.025,-0.0698,0.0205,0.0269,-0.0349,0.0033,0.0217,0.0217
1,196002,-0.018018,-0.018018,1.006532,1.006532,0.0146,1.0146,0.947129,1.006532,1.006532,0.0117,0.0056,-0.0203,0.0386,0.0029,-0.020918,-0.020918
2,196003,-0.050459,-0.050459,0.955743,0.955743,-0.0128,0.9872,0.935006,0.955743,0.955743,-0.0163,-0.0047,-0.0284,0.0143,0.0035,-0.053959,-0.053959
3,196004,0.009756,0.009756,0.965067,0.965067,-0.0152,0.9848,0.920794,0.965067,0.965067,-0.0171,0.0039,-0.0237,0.0281,0.0019,0.007856,0.007856
4,196005,-0.057971,-0.057971,0.909121,0.909121,0.0339,1.0339,0.952009,0.909121,0.909121,0.0312,0.0127,-0.0372,0.0481,0.0027,-0.060671,-0.060671


In [None]:
#equal weight carhart

In [32]:
y_e_Ca = reg_raw['eq_r-rf']
x_Ca = reg_raw[['HML','SMB','Mkt-rf','Momentum']]
x_Ca.head()

Unnamed: 0,HML,SMB,Mkt-rf,Momentum
0,0.0269,0.0205,-0.0698,-0.0349
1,-0.0203,0.0056,0.0117,0.0386
2,-0.0284,-0.0047,-0.0163,0.0143
3,-0.0237,0.0039,-0.0171,0.0281
4,-0.0372,0.0127,0.0312,0.0481


In [33]:
y_e_Ca.head()

0    0.021700
1   -0.020918
2   -0.053959
3    0.007856
4   -0.060671
Name: eq_r-rf, dtype: float64

In [34]:
import statsmodels.api as sm

In [35]:
x1 = sm.add_constant(x_Ca)
lm = sm.OLS(y_e_Ca.astype(float),x1.astype(float)).fit()
print(lm.summary())

                            OLS Regression Results                            
Dep. Variable:                eq_r-rf   R-squared:                       0.673
Model:                            OLS   Adj. R-squared:                  0.671
Method:                 Least Squares   F-statistic:                     350.4
Date:                Tue, 20 Apr 2021   Prob (F-statistic):          1.49e-163
Time:                        06:48:54   Log-Likelihood:                 1420.7
No. Observations:                 685   AIC:                            -2831.
Df Residuals:                     680   BIC:                            -2809.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0116      0.001     -9.479      0.0

In [None]:
#value weighted carhart

In [37]:
y_v_Ca = reg_raw['val_r-rf']

In [38]:
lm = sm.OLS(y_v_Ca.astype(float),x1.astype(float)).fit()
print(lm.summary())

                            OLS Regression Results                            
Dep. Variable:               val_r-rf   R-squared:                       0.621
Model:                            OLS   Adj. R-squared:                  0.619
Method:                 Least Squares   F-statistic:                     278.9
Date:                Tue, 20 Apr 2021   Prob (F-statistic):          9.05e-142
Time:                        06:49:23   Log-Likelihood:                 1321.2
No. Observations:                 685   AIC:                            -2632.
Df Residuals:                     680   BIC:                            -2610.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0050      0.001      3.535      0.0

In [None]:
#equal weighted CAPM

In [39]:
y_e_CA = reg_raw[['eq_r-rf']]
x_CA = reg_raw[['Mkt-rf']]

In [40]:
x2 = sm.add_constant(x_CA)
lm = sm.OLS(y_e_CA.astype(float),x2.astype(float)).fit()
print(lm.summary())

                            OLS Regression Results                            
Dep. Variable:                eq_r-rf   R-squared:                       0.490
Model:                            OLS   Adj. R-squared:                  0.489
Method:                 Least Squares   F-statistic:                     655.4
Date:                Tue, 20 Apr 2021   Prob (F-statistic):          7.25e-102
Time:                        06:49:36   Log-Likelihood:                 1268.0
No. Observations:                 685   AIC:                            -2532.
Df Residuals:                     683   BIC:                            -2523.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0118      0.001     -8.046      0.0

In [None]:
#value weighted CAPM

In [41]:
y_v_CA = reg_raw[['val_r-rf']]

In [42]:
lm = sm.OLS(y_v_CA.astype(float),x2.astype(float)).fit()
print(lm.summary())

                            OLS Regression Results                            
Dep. Variable:               val_r-rf   R-squared:                       0.576
Model:                            OLS   Adj. R-squared:                  0.575
Method:                 Least Squares   F-statistic:                     927.1
Date:                Tue, 20 Apr 2021   Prob (F-statistic):          2.64e-129
Time:                        06:49:49   Log-Likelihood:                 1282.4
No. Observations:                 685   AIC:                            -2561.
Df Residuals:                     683   BIC:                            -2552.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0051      0.001      3.550      0.0