In [1]:
#import the dataset and show the first 5 lines of CRSP 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
#Handle compustat data
compu = pd.read_csv("/content/drive/My Drive/FINA3327_Final/compu_uodated.csv")
compu = compu.rename(columns={'LPERMNO':'PERMNO','datadate':'date'},inplace = False)
compu = compu[['PERMNO','date','fyear','at','cogs','revt','seq']]
compu['date'] = compu['date']//100
compu.head()

Unnamed: 0,PERMNO,date,fyear,at,cogs,revt,seq
0,25881,197012,1970.0,33.45,30.529,45.335,10.544
1,25881,197112,1971.0,29.33,33.973,47.033,8.382
2,25881,197212,1972.0,19.907,22.702,34.362,7.021
3,25881,197312,1973.0,21.771,24.704,37.75,8.567
4,25881,197412,1974.0,25.638,36.646,50.325,10.257


In [4]:
#Handle CRSP data
crsp = pd.read_csv("/content/drive/My Drive/FINA3327_Final/crsp.csv")
crsp['fyear'] = crsp['date']//10000.0
crsp['date'] = crsp['date']//100
crsp['RET'] = crsp['RET'].replace('C',np.nan)
crsp['RET'] = crsp['RET'].replace('B',np.nan)
crsp['mkt_cap'] = crsp['PRC']*crsp['SHROUT']
crsp.head()

Unnamed: 0,PERMNO,date,PERMCO,PRC,RET,SHROUT,fyear,mkt_cap
0,10000,198512,7952,,,,1985,
1,10000,198601,7952,-4.375,,3680.0,1986,-16100.0
2,10000,198602,7952,-3.25,-0.257143,3680.0,1986,-11960.0
3,10000,198603,7952,-4.4375,0.365385,3680.0,1986,-16330.0
4,10000,198604,7952,-4.0,-0.098592,3793.0,1986,-15172.0


In [5]:
#Merge compustat and crsp
whole_data = pd.merge(crsp, compu, 'left')
whole_data['LPERMNO'] = whole_data['PERMNO'] 
whole_data = whole_data.groupby(whole_data['PERMNO']).ffill()
whole_data = whole_data.rename({'LPERMNO':'PERMNO'})
whole_data.head()

Unnamed: 0,date,PERMCO,PRC,RET,SHROUT,fyear,mkt_cap,at,cogs,revt,seq,LPERMNO
0,198512,7952,,,,1985,,,,,,10000
1,198601,7952,-4.375,,3680.0,1986,-16100.0,,,,,10000
2,198602,7952,-3.25,-0.257143,3680.0,1986,-11960.0,,,,,10000
3,198603,7952,-4.4375,0.365385,3680.0,1986,-16330.0,,,,,10000
4,198604,7952,-4.0,-0.098592,3793.0,1986,-15172.0,,,,,10000


In [6]:
#Drop rows where mkt_cap(ie. PRC) is negative because there is no trade that month
indexNames = whole_data[ whole_data['mkt_cap'] < 0 ].index
whole_data = whole_data.drop(indexNames, inplace=False)
whole_data

Unnamed: 0,date,PERMCO,PRC,RET,SHROUT,fyear,mkt_cap,at,cogs,revt,seq,LPERMNO
0,198512,7952,,,,1985,,,,,,10000
19,198512,7953,,,,1985,,,,,,10001
28,198609,7953,6.37500,-0.003077,991.0,1986,6.317625e+03,12.242,19.565,21.46,5.432,10001
29,198610,7953,6.62500,0.039216,991.0,1986,6.565375e+03,12.242,19.565,21.46,5.432,10001
30,198611,7953,7.00000,0.056604,991.0,1986,6.937000e+03,12.242,19.565,21.46,5.432,10001
...,...,...,...,...,...,...,...,...,...,...,...,...
4477710,202008,53453,498.32001,0.741452,931809.0,2020,4.643391e+08,34309.000,18402.000,24578.00,6618.000,93436
4477711,202009,53453,429.01001,-0.139087,948000.0,2020,4.067015e+08,34309.000,18402.000,24578.00,6618.000,93436
4477712,202010,53453,388.04001,-0.095499,947901.0,2020,3.678235e+08,34309.000,18402.000,24578.00,6618.000,93436
4477713,202011,53453,567.59998,0.462736,947901.0,2020,5.380286e+08,34309.000,18402.000,24578.00,6618.000,93436


In [7]:
#Drop data which has NAN in any coloums
whole_data = whole_data.dropna(axis=0,how='any')
whole_data

Unnamed: 0,date,PERMCO,PRC,RET,SHROUT,fyear,mkt_cap,at,cogs,revt,seq,LPERMNO
28,198609,7953,6.37500,-0.003077,991.0,1986,6.317625e+03,12.242,19.565,21.46,5.432,10001
29,198610,7953,6.62500,0.039216,991.0,1986,6.565375e+03,12.242,19.565,21.46,5.432,10001
30,198611,7953,7.00000,0.056604,991.0,1986,6.937000e+03,12.242,19.565,21.46,5.432,10001
31,198612,7953,7.00000,0.015000,991.0,1986,6.937000e+03,12.242,19.565,21.46,5.432,10001
32,198701,7953,6.75000,-0.035714,991.0,1987,6.689250e+03,12.242,19.565,21.46,5.432,10001
...,...,...,...,...,...,...,...,...,...,...,...,...
4477710,202008,53453,498.32001,0.741452,931809.0,2020,4.643391e+08,34309.000,18402.000,24578.00,6618.000,93436
4477711,202009,53453,429.01001,-0.139087,948000.0,2020,4.067015e+08,34309.000,18402.000,24578.00,6618.000,93436
4477712,202010,53453,388.04001,-0.095499,947901.0,2020,3.678235e+08,34309.000,18402.000,24578.00,6618.000,93436
4477713,202011,53453,567.59998,0.462736,947901.0,2020,5.380286e+08,34309.000,18402.000,24578.00,6618.000,93436


In [8]:
#Calculate profitability and BM_ratio and double sorting
whole_data['Profitability'] = (whole_data['revt']-whole_data['cogs'])/whole_data['at']
whole_data['BM_ratio'] = (whole_data['seq']/whole_data['mkt_cap'])
whole_data['rankPro'] = whole_data['Profitability'].groupby(whole_data['date']).rank(ascending=False,method='dense')
whole_data['rankBM'] = whole_data['BM_ratio'].groupby(whole_data['date']).rank(ascending=False,method='dense')
whole_data['rank'] = (whole_data['rankPro']*0.8 + whole_data['rankBM']*0.2).groupby(whole_data['date']).rank(ascending=True,method='dense')
whole_data

Unnamed: 0,date,PERMCO,PRC,RET,SHROUT,fyear,mkt_cap,at,cogs,revt,seq,LPERMNO,Profitability,BM_ratio,rankPro,rankBM,rank
28,198609,7953,6.37500,-0.003077,991.0,1986,6.317625e+03,12.242,19.565,21.46,5.432,10001,0.154795,0.000860,2151.0,913.0,1845.0
29,198610,7953,6.62500,0.039216,991.0,1986,6.565375e+03,12.242,19.565,21.46,5.432,10001,0.154795,0.000827,2181.0,948.0,1914.0
30,198611,7953,7.00000,0.056604,991.0,1986,6.937000e+03,12.242,19.565,21.46,5.432,10001,0.154795,0.000783,2114.0,1013.0,1842.0
31,198612,7953,7.00000,0.015000,991.0,1986,6.937000e+03,12.242,19.565,21.46,5.432,10001,0.154795,0.000783,2322.0,1310.0,2067.0
32,198701,7953,6.75000,-0.035714,991.0,1987,6.689250e+03,12.242,19.565,21.46,5.432,10001,0.154795,0.000812,2216.0,944.0,1908.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4477710,202008,53453,498.32001,0.741452,931809.0,2020,4.643391e+08,34309.000,18402.000,24578.00,6618.000,93436,0.180011,0.000014,1889.0,4106.0,2308.0
4477711,202009,53453,429.01001,-0.139087,948000.0,2020,4.067015e+08,34309.000,18402.000,24578.00,6618.000,93436,0.180011,0.000016,1880.0,4093.0,2274.0
4477712,202010,53453,388.04001,-0.095499,947901.0,2020,3.678235e+08,34309.000,18402.000,24578.00,6618.000,93436,0.180011,0.000018,1867.0,4069.0,2269.0
4477713,202011,53453,567.59998,0.462736,947901.0,2020,5.380286e+08,34309.000,18402.000,24578.00,6618.000,93436,0.180011,0.000012,1866.0,4076.0,2265.0


In [9]:
#strategy
whole_data['strategy_rule'] = (whole_data['rank']<=50) & (whole_data['PRC'].notnull())
whole_data['strategy_rule'] = whole_data['strategy_rule'].astype(int) # convert T/F to 1/0
whole_data['market_value'] = whole_data.PRC * whole_data.SHROUT
whole_data

Unnamed: 0,date,PERMCO,PRC,RET,SHROUT,fyear,mkt_cap,at,cogs,revt,seq,LPERMNO,Profitability,BM_ratio,rankPro,rankBM,rank,strategy_rule,market_value
28,198609,7953,6.37500,-0.003077,991.0,1986,6.317625e+03,12.242,19.565,21.46,5.432,10001,0.154795,0.000860,2151.0,913.0,1845.0,0,6.317625e+03
29,198610,7953,6.62500,0.039216,991.0,1986,6.565375e+03,12.242,19.565,21.46,5.432,10001,0.154795,0.000827,2181.0,948.0,1914.0,0,6.565375e+03
30,198611,7953,7.00000,0.056604,991.0,1986,6.937000e+03,12.242,19.565,21.46,5.432,10001,0.154795,0.000783,2114.0,1013.0,1842.0,0,6.937000e+03
31,198612,7953,7.00000,0.015000,991.0,1986,6.937000e+03,12.242,19.565,21.46,5.432,10001,0.154795,0.000783,2322.0,1310.0,2067.0,0,6.937000e+03
32,198701,7953,6.75000,-0.035714,991.0,1987,6.689250e+03,12.242,19.565,21.46,5.432,10001,0.154795,0.000812,2216.0,944.0,1908.0,0,6.689250e+03
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4477710,202008,53453,498.32001,0.741452,931809.0,2020,4.643391e+08,34309.000,18402.000,24578.00,6618.000,93436,0.180011,0.000014,1889.0,4106.0,2308.0,0,4.643391e+08
4477711,202009,53453,429.01001,-0.139087,948000.0,2020,4.067015e+08,34309.000,18402.000,24578.00,6618.000,93436,0.180011,0.000016,1880.0,4093.0,2274.0,0,4.067015e+08
4477712,202010,53453,388.04001,-0.095499,947901.0,2020,3.678235e+08,34309.000,18402.000,24578.00,6618.000,93436,0.180011,0.000018,1867.0,4069.0,2269.0,0,3.678235e+08
4477713,202011,53453,567.59998,0.462736,947901.0,2020,5.380286e+08,34309.000,18402.000,24578.00,6618.000,93436,0.180011,0.000012,1866.0,4076.0,2265.0,0,5.380286e+08


In [10]:
#portfolio build
def eq_weight(df):
    stock_number = df['strategy_rule'].sum() 
    try:
        df['eq_weight'] = df['strategy_rule']/stock_number
    except:
        weights = np.nan
    return df
def val_weight(df):
    total_mv = df.loc[df['strategy_rule']==1,'market_value'].sum()
    try:
        df['val_weight']= df.loc[df['strategy_rule']==1,'market_value']/total_mv
    except:
        weights = np.nan
    return df

In [11]:
def sort_rank(df):
    sorted_list = df.sort_values(by=['rank','date'],ascending=[True,True])
    return sorted_list

In [12]:
whole_data_w = whole_data.groupby('date').apply(eq_weight)
whole_data_w = whole_data_w.groupby('date').apply(val_weight) 
whole_data_w.groupby('date').apply(sort_rank)

# we define a new dataframe "show" here to present the result of weighting
show = whole_data_w[(whole_data_w['strategy_rule']==1.0) 
          & (whole_data_w['date'] == 200703) 
          & (whole_data_w['fyear'] == 2007)]
show.sort_values(by=['rank'],ascending=[True]).head(10)

Unnamed: 0,date,PERMCO,PRC,RET,SHROUT,fyear,mkt_cap,at,cogs,revt,seq,LPERMNO,Profitability,BM_ratio,rankPro,rankBM,rank,strategy_rule,market_value,eq_weight,val_weight
3391170,200703,13796,1.85,-0.075,8200.0,2007,15170.0,48.585,31.756,133.428,26.975,81728,2.092662,0.001778,10.0,327.0,1.0,1,15170.0,0.019231,0.000323
3767510,200703,16212,1.62,0.109589,25564.0,2007,41413.68,258.573,430.553,775.287,68.156,86298,1.333217,0.001646,33.0,349.0,2.0,1,41413.68,0.019231,0.000882
4151698,200703,35220,6.0,-0.041534,72000.0,2007,432000.0,3137.2,1790.0,5522.2,742.4,90337,1.18966,0.001719,48.0,337.0,3.0,1,432000.0,0.019231,0.009196
44728,200703,2089,14.0,-0.030841,4195.0,2007,58730.0,469.754,411.283,861.924,291.923,10294,0.959313,0.004971,105.0,178.0,4.0,1,58730.0,0.019231,0.00125
3801036,200703,16421,0.53,0.325,12690.0,2007,6725.7,41.684,23.77,65.446,13.468,86718,0.999808,0.002002,95.0,295.0,5.0,1,6725.7,0.019231,0.000143
3277897,200703,13073,1.25,0.25,1409.0,2007,1761.25,6.802,3.889,10.056,3.071,80524,0.906645,0.001744,121.0,333.0,6.0,1,1761.25,0.019231,3.7e-05
3842190,200703,35220,6.44,-0.03012,120143.0,2007,773720.92,3137.2,1790.0,5522.2,742.4,87134,1.18966,0.00096,48.0,671.0,7.0,1,773720.92,0.019231,0.016471
1620911,200703,2089,14.0,-0.037132,18493.0,2007,258902.0,469.754,411.283,861.924,291.923,41217,0.959313,0.001128,105.0,520.0,8.0,1,258902.0,0.019231,0.005511
2957886,200703,11568,2.072,-0.013333,3978.0,2007,8242.416,15.45,48.713,63.597,8.329,77673,0.963366,0.001011,103.0,611.0,9.0,1,8242.416,0.019231,0.000175
3313560,200703,4815,95.49,0.226432,1647.0,2007,157272.03,271.475,747.32,1016.817,150.505,80857,0.992714,0.000957,99.0,676.0,10.0,1,157272.03,0.019231,0.003348


In [13]:
#calculate return
def calculate_return(df):
    df['RET'] = df['RET'].astype(float)
    df['eq_weighted_r'] = df['eq_weight'] * df['RET']
    df['val_weighted_r'] = df['val_weight'] * df['RET']
    eq_return = pd.pivot_table(df,index='date',values='eq_weighted_r',aggfunc=np.sum)
    val_return = pd.pivot_table(df,index='date',values='val_weighted_r',aggfunc=np.sum)
    return_dataset = pd.concat([eq_return,val_return],axis=1)
    return return_dataset

In [14]:
def shift_return(df):
    df['val_weight'] = df['val_weight'].shift(1)
    return df

In [15]:
# shift return
portfolio = whole_data_w[whole_data_w.columns].groupby('LPERMNO').apply(shift_return)
portfolio = portfolio.dropna(axis=0,how='any')
portfolio.head()

Unnamed: 0,date,PERMCO,PRC,RET,SHROUT,fyear,mkt_cap,at,cogs,revt,seq,LPERMNO,Profitability,BM_ratio,rankPro,rankBM,rank,strategy_rule,market_value,eq_weight,val_weight
3905,199502,7976,11.5625,0.075581,9408.0,1995,108780.0,127.366,69.79,174.425,100.545,10026,0.82153,0.000924,276.0,1411.0,64.0,0,108780.0,0.0,0.042969
3907,199504,7976,11.875,0.144578,9359.0,1995,111138.125,127.366,69.79,174.425,100.545,10026,0.82153,0.000905,273.0,1399.0,60.0,0,111138.125,0.0,0.036953
3912,199509,7976,11.75,-0.040816,9126.0,1995,107230.5,123.309,78.694,185.362,96.084,10026,0.865046,0.000896,237.0,1153.0,46.0,1,107230.5,0.02,0.024179
3913,199510,7976,11.125,-0.053191,9126.0,1995,101526.75,123.309,78.694,185.362,96.084,10026,0.865046,0.000946,244.0,1145.0,50.0,1,101526.75,0.02,0.043122
3914,199511,7976,12.25,0.101124,9126.0,1995,111793.5,123.309,78.694,185.362,96.084,10026,0.865046,0.000859,246.0,1438.0,64.0,0,111793.5,0.0,0.034919


In [16]:
portfolio.head()

Unnamed: 0,date,PERMCO,PRC,RET,SHROUT,fyear,mkt_cap,at,cogs,revt,seq,LPERMNO,Profitability,BM_ratio,rankPro,rankBM,rank,strategy_rule,market_value,eq_weight,val_weight
3905,199502,7976,11.5625,0.075581,9408.0,1995,108780.0,127.366,69.79,174.425,100.545,10026,0.82153,0.000924,276.0,1411.0,64.0,0,108780.0,0.0,0.042969
3907,199504,7976,11.875,0.144578,9359.0,1995,111138.125,127.366,69.79,174.425,100.545,10026,0.82153,0.000905,273.0,1399.0,60.0,0,111138.125,0.0,0.036953
3912,199509,7976,11.75,-0.040816,9126.0,1995,107230.5,123.309,78.694,185.362,96.084,10026,0.865046,0.000896,237.0,1153.0,46.0,1,107230.5,0.02,0.024179
3913,199510,7976,11.125,-0.053191,9126.0,1995,101526.75,123.309,78.694,185.362,96.084,10026,0.865046,0.000946,244.0,1145.0,50.0,1,101526.75,0.02,0.043122
3914,199511,7976,12.25,0.101124,9126.0,1995,111793.5,123.309,78.694,185.362,96.084,10026,0.865046,0.000859,246.0,1438.0,64.0,0,111793.5,0.0,0.034919


In [17]:
#yearly return
return_dataset = calculate_return(portfolio)
return_dataset.head(5)

Unnamed: 0_level_0,eq_weighted_r,val_weighted_r
date,Unnamed: 1_level_1,Unnamed: 2_level_1
196001,0.025,0.025
196002,-0.018018,-0.018018
196003,-0.050459,-0.050459
196004,0.009756,0.009756
196005,-0.057971,-0.057971


In [18]:
#cumulative return
final = return_dataset.copy()
final['eq_weighted_R'] = return_dataset['eq_weighted_r']+1
final['val_weighted_R'] = return_dataset['val_weighted_r']+1
final['eq_cum_R'] = final['eq_weighted_R'].cumprod()
final['val_cum_R'] = final['val_weighted_R'].cumprod()
final.head()

Unnamed: 0_level_0,eq_weighted_r,val_weighted_r,eq_weighted_R,val_weighted_R,eq_cum_R,val_cum_R
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
196001,0.025,0.025,1.025,1.025,1.025,1.025
196002,-0.018018,-0.018018,0.981982,0.981982,1.006532,1.006532
196003,-0.050459,-0.050459,0.949541,0.949541,0.955743,0.955743
196004,0.009756,0.009756,1.009756,1.009756,0.965067,0.965067
196005,-0.057971,-0.057971,0.942029,0.942029,0.909121,0.909121


In [19]:
final.to_excel('3327 final Profit Double Sorting.xlsx', sheet_name = 'return')

In [None]:
#data analysis

In [20]:
reg_raw = pd.read_excel("/content/drive/My Drive/FINA3327_Final/stat_data_Prof.xlsx").iloc[1:,:]

In [21]:
reg_raw.head()

Unnamed: 0,Date,eq_weighted_r,val_weighted_r,eq_cum_r,val_cum_r,market_mon_r,market_mon_R,mkt_cum-r,eq_cum_r*,val_cum_r*,Mkt-rf,SMB,HML,Momentum,rf,eq_r-rf,val_r-rf
1,196001.0,0.025,0.025,1.025,1.025,-0.0665,0.9335,0.9335,1.025,1.025,-0.0698,0.0205,0.0269,-0.0349,0.0033,0.0217,0.0217
2,196002.0,-0.018018,-0.018018,1.006532,1.006532,0.0146,1.0146,0.947129,1.006532,1.006532,0.0117,0.0056,-0.0203,0.0386,0.0029,-0.020918,-0.020918
3,196003.0,-0.050459,-0.050459,0.955743,0.955743,-0.0128,0.9872,0.935006,0.955743,0.955743,-0.0163,-0.0047,-0.0284,0.0143,0.0035,-0.053959,-0.053959
4,196004.0,0.009756,0.009756,0.965067,0.965067,-0.0152,0.9848,0.920794,0.965067,0.965067,-0.0171,0.0039,-0.0237,0.0281,0.0019,0.007856,0.007856
5,196005.0,-0.057971,-0.057971,0.909121,0.909121,0.0339,1.0339,0.952009,0.909121,0.909121,0.0312,0.0127,-0.0372,0.0481,0.0027,-0.060671,-0.060671


In [None]:
#equal weight carhart

In [22]:
y_e_Ca = reg_raw[['eq_r-rf']]
x_Ca = reg_raw[['HML','SMB','Mkt-rf','Momentum']]
x_Ca.head()

Unnamed: 0,HML,SMB,Mkt-rf,Momentum
1,0.0269,0.0205,-0.0698,-0.0349
2,-0.0203,0.0056,0.0117,0.0386
3,-0.0284,-0.0047,-0.0163,0.0143
4,-0.0237,0.0039,-0.0171,0.0281
5,-0.0372,0.0127,0.0312,0.0481


In [23]:
y_e_Ca.head()

Unnamed: 0,eq_r-rf
1,0.0217
2,-0.020918
3,-0.053959
4,0.007856
5,-0.060671


In [24]:
import statsmodels.api as sm

In [25]:
x1 = sm.add_constant(x_Ca)
lm = sm.OLS(y_e_Ca.astype(float),x1.astype(float)).fit()
print(lm.summary())

                            OLS Regression Results                            
Dep. Variable:                eq_r-rf   R-squared:                       0.718
Model:                            OLS   Adj. R-squared:                  0.716
Method:                 Least Squares   F-statistic:                     431.8
Date:                Tue, 20 Apr 2021   Prob (F-statistic):          5.27e-185
Time:                        06:23:38   Log-Likelihood:                 1422.3
No. Observations:                 685   AIC:                            -2835.
Df Residuals:                     680   BIC:                            -2812.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0099      0.001     -8.131      0.0

In [None]:
#value weighted carhart

In [29]:
y_v_Ca = reg_raw['val_r-rf']

In [30]:
lm = sm.OLS(y_v_Ca.astype(float),x1.astype(float)).fit()
print(lm.summary())

                            OLS Regression Results                            
Dep. Variable:               val_r-rf   R-squared:                       0.644
Model:                            OLS   Adj. R-squared:                  0.642
Method:                 Least Squares   F-statistic:                     307.7
Date:                Tue, 20 Apr 2021   Prob (F-statistic):          5.98e-151
Time:                        06:24:46   Log-Likelihood:                 1305.8
No. Observations:                 685   AIC:                            -2602.
Df Residuals:                     680   BIC:                            -2579.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0074      0.001      5.133      0.0

In [None]:
#equal weighted CAPM

In [31]:
y_e_CA = reg_raw[['eq_r-rf']]
x_CA = reg_raw[['Mkt-rf']]

In [32]:
x2 = sm.add_constant(x_CA)
lm = sm.OLS(y_e_CA.astype(float),x2.astype(float)).fit()
print(lm.summary())

                            OLS Regression Results                            
Dep. Variable:                eq_r-rf   R-squared:                       0.501
Model:                            OLS   Adj. R-squared:                  0.500
Method:                 Least Squares   F-statistic:                     685.8
Date:                Tue, 20 Apr 2021   Prob (F-statistic):          3.39e-105
Time:                        06:25:21   Log-Likelihood:                 1227.5
No. Observations:                 685   AIC:                            -2451.
Df Residuals:                     683   BIC:                            -2442.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0098      0.002     -6.320      0.0

In [None]:
#value weighted CAPM

In [33]:
y_v_CA = reg_raw[['val_r-rf']]

In [34]:
lm = sm.OLS(y_v_CA.astype(float),x2.astype(float)).fit()
print(lm.summary())

                            OLS Regression Results                            
Dep. Variable:               val_r-rf   R-squared:                       0.523
Model:                            OLS   Adj. R-squared:                  0.523
Method:                 Least Squares   F-statistic:                     750.0
Date:                Tue, 20 Apr 2021   Prob (F-statistic):          5.22e-112
Time:                        06:25:31   Log-Likelihood:                 1205.7
No. Observations:                 685   AIC:                            -2407.
Df Residuals:                     683   BIC:                            -2398.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0067      0.002      4.175      0.0