# Libraries

In [1]:
import os
import pickle
from functools import reduce
from operator import mul

import pandas as pd
import numpy as np

from statsmodels.regression.linear_model import OLS
from sklearn import linear_model
from sklearn.decomposition import PCA

import holoviews as hv
import hvplot
import hvplot.pandas

In [2]:
np.random.seed(42)
hv.extension('bokeh')

In [3]:
# There is a compatilibility issue with this library \
#and newer versions of Pandas, this is short fix to the problem, \
#if you have issues at this chunk comment it out and you should be fine.  
pd.core.common.is_list_like = pd.api.types.is_list_like

from pandas_datareader.famafrench import get_available_datasets
import pandas_datareader.data as web

  from pandas.util.testing import assert_frame_equal


# Porfolio Data

In [4]:
portfolio_path = os.path.join("100_Portfolios_10x10_Daily.p")

In [5]:
# To download the data yourself
try:
    portfolios = pickle.load(open( portfolio_path, "rb" ))
except:
    portfolios = web.DataReader('100_Portfolios_10x10_Daily', 'famafrench')
    pickle.dump(portfolio_path, open(portfolio_path, "wb"))

In [6]:
print(portfolios['DESCR'])

100 Portfolios 10x10 Daily
--------------------------

This file was created by CMPT_ME_BEME_RETS_DAILY using the 202006 CRSP database. It contains value-weighted returns for the intersections of 10 ME portfolios and 10 BE/ME portfolios. The portfolios are constructed at the end of June. ME is market cap at the end of June. BE/ME is book equity at the last fiscal year end of the prior calendar year divided by ME at the end of December of the prior year. Missing data are indicated by -99.99 or -999. The break points use Compustat firms plus the firms hand-collected from the Moodys Industrial, Transportation, Utilities, and Financials Manuals. The portfolios use Compustat firms plus the firms hand-collected from the Moodys Industrial, Transportation, Utilities, and Financials Manuals. Copyright 2020 Kenneth R. French

  0 : Average Value Weighted Returns -- Daily (1235 rows x 100 cols)
  1 : Average Equal Weighted Returns -- Daily (1235 rows x 100 cols)
  2 : Number of Firms in Portfolio

![Fama/French Benchmark Portfolio](../Media/fama-french-benchmark-portfolios.png)

In [7]:
pd.melt(portfolios[1].head(100).divide(100).add(1).cumprod().reset_index(), 
        id_vars='Date').hvplot.line(x='Date', by='variable')

In [8]:
portfolios[1] = portfolios[1].divide(100)

# Factors Data

In [9]:
five_factor_path = os.path.join("F-F_Research_Data_5_Factors_2x3_daily.p")

In [10]:
# To download the data yourself
try:
    factors = pickle.load(open( five_factor_path, "rb" ))
except:
    factors = web.DataReader('F-F_Research_Data_5_Factors_2x3_daily', 'famafrench')
    pickle.dump(five_factor_path, open( five_factor_path, "wb"))

In [11]:
print(factors['DESCR'])

F-F Research Data 5 Factors 2x3 daily
-------------------------------------

This file was created by CMPT_ME_BEME_OP_INV_RETS_DAILY using the 202006 CRSP database. The 1-month TBill return is from Ibbotson and Associates, Inc.

  0 : (1235 rows x 6 cols)


In [12]:
pd.melt(factors[0].head(100).divide(100).add(1).cumprod().reset_index(), 
        id_vars='Date').hvplot.line(x='Date', by='variable')

## i. Covariance of Factors

In [13]:
factors[0] = factors[0].loc[portfolios[1].index,:].divide(100)
hvplot.scatter_matrix(factors[0])

In [14]:
pca_factors = PCA()
pca_factors.fit(factors[0].dropna())

pd.Series(pca_factors.explained_variance_ratio_,name='Variance_Explained').hvplot.line(label='Scree Plot of PCA Variance Explaned (%)').redim(Variance_Explained={'range': (0, 1)})

# Modeling

In [15]:
# We must make sure we have an overlapping dataset
dates = np.intersect1d(factors[0].index, portfolios[1].index)
factors[0] = factors[0].loc[dates,:]
portfolios[1] = portfolios[1].loc[dates,:]

## ii. Analyzing Weight Space

In [16]:
factors[0] = factors[0].loc[~factors[0].isna().any(1)&~portfolios[1].isna().any(1),:]
portfolios[1] = portfolios[1].loc[~factors[0].isna().any(1)&~portfolios[1].isna().any(1),:]


In [17]:
lm = linear_model.LinearRegression(normalize=True)
lm.fit(X=factors[0], y=portfolios[1])

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=True)

In [18]:
pca = PCA(n_components=2)

beta_comp = pca.fit_transform(lm.coef_)
beta_comp = pd.DataFrame(beta_comp, columns=['weight_comp1','weight_comp2'], index=portfolios[1].columns)
beta_comp = beta_comp.reset_index()

labels = pd.Series(portfolios[1].columns).str.split(' ', 1, expand=True)
labels.columns = ['market equity','two']

beta_comp = pd.concat([beta_comp,labels], axis=1)

print(f'This is the feature importance of our two components: \n\n{pca.explained_variance_ratio_}s')

This is the feature importance of our two components: 

[0.98974442 0.00462441]s


In [19]:
%%opts Scatter [tools=['hover'], height=400, width=600] (size=5 alpha=0.5)
hv.Scatter(beta_comp, kdims = ['weight_comp1'], vdims = ['weight_comp2', 'market equity', 'two']).options(color_index='market equity') + \
hv.Scatter(beta_comp, kdims = ['weight_comp1'], vdims = ['weight_comp2', 'market equity', 'two']).options(color_index='two')

## iii. Testing Significance of F-F 5-Factor

In [20]:
portfolio_returns = pd.melt(portfolios[1].reset_index(), id_vars='Date').drop(columns=['variable']).merge(factors[0].dropna(), how='left', on='Date').drop(columns=['Date'])

portfolio_returns.head()

Unnamed: 0,value,Mkt-RF,SMB,HML,RMW,CMA,RF
0,-0.0029,0.0036,-0.0007,-0.0045,0.0001,-0.0006,0.0
1,-0.0194,-0.0088,-0.0037,0.0199,0.0023,0.0074,0.0
2,-0.0153,-0.0036,-0.004,-0.0035,0.0032,0.0018,0.0
3,0.0041,0.0132,0.0019,0.0069,0.0022,0.0004,0.0
4,-0.0056,-0.0098,-0.0006,0.0043,0.001,0.0008,0.0


In [21]:
model = OLS(portfolio_returns.value-portfolio_returns.RF,portfolio_returns.drop(columns=['value','RF']))

results = model.fit()

results.summary()

0,1,2,3
Dep. Variable:,y,R-squared (uncentered):,0.763
Model:,OLS,Adj. R-squared (uncentered):,0.763
Method:,Least Squares,F-statistic:,79660.0
Date:,"Mon, 03 Aug 2020",Prob (F-statistic):,0.0
Time:,16:06:06,Log-Likelihood:,416720.0
No. Observations:,123500,AIC:,-833400.0
Df Residuals:,123495,BIC:,-833400.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Mkt-RF,1.0132,0.002,489.480,0.000,1.009,1.017
SMB,0.6796,0.004,164.086,0.000,0.671,0.688
HML,0.3047,0.004,77.155,0.000,0.297,0.312
RMW,-0.0718,0.006,-11.185,0.000,-0.084,-0.059
CMA,-0.1088,0.008,-13.483,0.000,-0.125,-0.093

0,1,2,3
Omnibus:,86936.926,Durbin-Watson:,1.956
Prob(Omnibus):,0.0,Jarque-Bera (JB):,38757581.849
Skew:,2.191,Prob(JB):,0.0
Kurtosis:,89.676,Cond. No.,4.57
