In [55]:
from linearmodels.asset_pricing import LinearFactorModel
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
import statsmodels.api as sm
import matplotlib.pyplot as plt
import pandas_datareader.data as web
import seaborn as sns
import pandas as pd
import os.path
import numpy as np

START = '2000'
END = '2018'

In [56]:
ff5_monthly = pd.read_csv("regression/ff_factor_data-{}-{}.csv".format(START, END), 
                          parse_dates=['Date'], 
                          index_col=['Date'])
ff_factor_data = ff5_monthly.to_period('M')
ff_factor_data

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RMW,CMA,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-01,-4.74,4.45,-1.89,-6.29,4.74,0.41
2000-02,2.45,18.38,-9.81,-18.76,-0.35,0.43
2000-03,5.20,-15.39,8.23,11.82,-1.61,0.47
2000-04,-6.40,-4.96,7.25,7.67,5.62,0.46
2000-05,-4.42,-3.87,4.83,4.18,1.32,0.50
...,...,...,...,...,...,...
2018-08,3.44,0.63,-3.98,-0.29,-2.64,0.16
2018-09,0.06,-2.50,-1.70,0.67,1.28,0.15
2018-10,-7.68,-4.50,3.43,1.00,3.54,0.19
2018-11,1.69,-0.76,0.26,-0.62,0.40,0.18


In [57]:
ff_portfolio_data = pd.read_csv("regression/ff_portfolio_data-{}-{}.csv".format(START, END), 
                                parse_dates=['Date'], index_col=['Date'],
                                infer_datetime_format=True)

ff_portfolio_data = ff_portfolio_data.to_period('M')
ff_portfolio_data

Unnamed: 0_level_0,Food,Mines,Oil,Clths,Durbl,Chems,Cnsum,Cnstr,Steel,FabPr,Machn,Cars,Trans,Utils,Rtail,Finan,Other
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2000-01,-5.62,19.54,1.38,-11.23,-9.01,-9.39,3.15,-15.29,-3.18,-9.91,-0.25,-0.76,-10.08,5.65,-13.39,-4.82,-7.18
2000-02,-9.75,-4.08,-5.63,-11.89,-4.57,-7.81,-10.56,-1.81,6.41,-12.04,19.64,-8.34,-6.57,-7.69,-6.54,-9.02,4.67
2000-03,4.47,-3.81,12.77,21.04,2.18,12.01,0.60,11.72,3.02,12.91,7.31,9.74,11.01,5.30,14.64,16.20,2.52
2000-04,-0.27,-11.79,-1.95,4.18,1.00,-4.84,8.39,-6.59,-1.30,1.71,-3.99,9.90,4.43,7.14,-3.74,-4.36,-11.56
2000-05,12.96,-8.05,9.95,-5.22,-1.64,0.01,6.13,-10.53,-5.42,-2.92,-10.00,-14.73,-2.57,3.40,-2.19,4.62,-8.48
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-08,-0.58,-9.78,-3.07,5.00,2.17,-0.45,2.79,2.03,-3.39,2.68,3.48,1.00,0.49,0.75,9.25,2.33,5.56
2018-09,-0.50,-2.29,3.27,0.69,0.94,-3.25,2.64,0.07,-0.82,4.32,0.02,-4.08,3.29,-0.70,0.43,-2.15,0.06
2018-10,-0.16,-9.64,-12.10,-11.19,-12.69,-13.23,-2.54,-14.15,-11.81,-11.99,-11.32,-1.11,-10.21,-0.25,-8.28,-5.73,-8.05
2018-11,3.07,-2.02,-2.41,-0.99,2.77,4.24,4.34,2.81,-3.18,6.44,2.82,4.72,3.71,3.09,2.11,2.65,0.26


In [58]:
ff_factor_data = ff_factor_data.drop('RF', axis=1)

In [59]:
mod = LinearFactorModel(portfolios=ff_portfolio_data,
                        factors=ff_factor_data, risk_free=True)
res = mod.fit()
print(res)

                      LinearFactorModel Estimation Summary                      
No. Test Portfolios:                 17   R-squared:                      0.6667
No. Factors:                          5   J-statistic:                    9.8176
No. Observations:                   228   P-value                         0.5469
Date:                  Tue, Dec 28 2021   Distribution:                 chi2(11)
Time:                          11:47:31                                         
Cov. Estimator:                  robust                                         
                                                                                
                            Risk Premia Estimates                             
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
risk_free      0.5968     0.5285     1.1293     0.2588     -0.4390      1.6327
Mkt-RF        -0.1117     0.5844    

In [60]:
print(res.full_summary)

                      LinearFactorModel Estimation Summary                      
No. Test Portfolios:                 17   R-squared:                      0.6667
No. Factors:                          5   J-statistic:                    9.8176
No. Observations:                   228   P-value                         0.5469
Date:                  Tue, Dec 28 2021   Distribution:                 chi2(11)
Time:                          11:47:31                                         
Cov. Estimator:                  robust                                         
                                                                                
                            Risk Premia Estimates                             
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
risk_free      0.5968     0.5285     1.1293     0.2588     -0.4390      1.6327
Mkt-RF        -0.1117     0.5844    

In [61]:
ff_portfolio_data.columns = ff_portfolio_data.columns.str.replace(' ', '')
results = []

for i, col_name in enumerate(ff_portfolio_data.columns):
    target = ff_portfolio_data[[col_name]]  
    
    X_train, X_test, Y_train, Y_test = train_test_split(ff_factor_data, target, test_size=0.2, random_state=5)
    
    regression = LinearRegression()
    regression.fit(X_train, Y_train)
    Y_pred = regression.predict(X_test)
    
    score = regression.score(X_test, Y_test)
    mse = mean_squared_error(Y_test, Y_pred)
    mae = mean_absolute_error(Y_test, Y_pred)
    
    results.append([col_name, score,mse,mae])
    
results = sorted(results, key=lambda x:x[1], reverse=True)

df = pd.DataFrame(data=results, columns=["Sector", "R-Squared", "Mean squared error (MSE)", "Mean absolute error (MAE)" ])
df

Unnamed: 0,Sector,R-Squared,Mean squared error (MSE),Mean absolute error (MAE)
0,Other,0.924045,1.29279,0.874296
1,Finan,0.802019,5.839867,1.853386
2,Trans,0.700142,6.379993,2.027491
3,Machn,0.690256,13.265022,2.6069
4,FabPr,0.616469,8.505158,2.423954
5,Durbl,0.589091,9.173339,2.42468
6,Rtail,0.539185,5.268665,1.92101
7,Chems,0.515866,12.660237,2.672733
8,Cnstr,0.48528,10.634028,2.630454
9,Steel,0.474561,36.3249,4.773309


In [62]:
df.head(3)

Unnamed: 0,Sector,R-Squared,Mean squared error (MSE),Mean absolute error (MAE)
0,Other,0.924045,1.29279,0.874296
1,Finan,0.802019,5.839867,1.853386
2,Trans,0.700142,6.379993,2.027491
