In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

In [2]:
tickers =  {'DGS1':'1-Year Treasury Constant Maturity Rate',
            'DGS5':'5-Year Treasury Constant Maturity Rate',
            'DGS10':'10-Year Treasury Constant Maturity Rate',
            'DGS20':'20-Year Treasury Constant Maturity Rate',
            'DTWEXM':'Trade Weighted U.S. Dollar Index: Major Currencies, Goods (DISCONTINUED)',
            'DTWEXAFEGS': 'Trade Weighted U.S. Dollar Index: Advanced Foreign Economies, Goods and Services',
            'VIXCLS':'CBOE Volatility Index: VIX',
            'UMCSENT':'University of Michigan: Consumer Sentiment',
            'STLFSI2':'St. Louis Fed Financial Stress Index',
            'UNRATE':'Unemployment Rate',
            'ICSA':'Initial Claims',
            'CPIAUCSL':'Consumer Price Index for All Urban Consumers: All Items in U.S. City Average',
            'MICH':'University of Michigan: Inflation Expectation'}

factors_names = {"Mkt-RF":"Market",
                 "SMB":"Size (Small - Big)",
                 "HML":"Value",
                 "RMW":"Profitability",
                 "CMA":"Conservative Minus Aggresive Investomets",
                 "RF":"RiskFree",
                 "Mom":"Momentum"}

In [3]:
sp500_data = pd.read_csv('./data/sp500_data.csv',index_col=0,parse_dates=True)
m_sp500_data = pd.read_csv('./data/monthly_sp500_data.csv',index_col=0,parse_dates=True)
factors = pd.read_csv('./data/factor_data.csv',index_col=0,parse_dates=True)
m_factors = pd.read_csv('./data/monthly_factor_data.csv',index_col=0,parse_dates=True)
df = pd.read_csv('./data/predictor_data.csv',index_col=0,parse_dates=True)
m_df = pd.read_csv('./data/monthly_predictor_data.csv',index_col=0,parse_dates=True)

In [4]:
m_sp500_data = sp500_data[['adjclose']].resample('M').last().rename(columns={'adjclose':'SP500'}) #I dont like the dates that I got from the file
m_df = pd.concat([m_df,m_sp500_data],axis=1)

In [5]:
m_factors = m_factors.drop(columns=['Mkt-RF','RF'])

In [6]:
Y = m_factors/100
X = m_df.pct_change()

In [7]:
X

Unnamed: 0,DGS1,DGS5,DGS10,DGS20,VIXCLS,UMCSENT,STLFSI2,UNRATE,ICSA,CPIAUCSL,MICH,DTWEXAFEGS,SP500
1927-12-31,,,,,,,,,,,,,
1928-01-31,,,,,,,,,,,,,-0.005096
1928-02-29,,,,,,,,,,,,,-0.017644
1928-03-31,,,,,,,,,,,,,0.117034
1928-04-30,,,,,,,,,,,,,0.024378
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-02-28,-0.200000,0.666667,0.297297,0.238095,-0.155334,-0.027848,0.330810,-0.015873,-0.241753,0.003546,0.100000,0.000086,0.026091
2021-03-31,-0.125000,0.226667,0.208333,0.110577,-0.305903,0.105469,0.201345,-0.032258,-0.103170,0.006202,-0.060606,0.017900,0.042439
2021-04-30,-0.285714,-0.065217,-0.051724,-0.051948,-0.040722,0.040047,0.321136,0.016667,-0.139293,0.007700,0.096774,-0.019834,0.052425
2021-05-31,0.000000,-0.081395,-0.042424,-0.004566,-0.099409,0.000000,0.051808,-0.049180,-0.105475,0.006442,0.000000,-0.012928,0.005486


In [8]:
Y

Unnamed: 0_level_0,SMB,HML,RMW,CMA,Mom
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1963-07-31,-0.004612,-0.009495,0.006694,-0.011679,0.013505
1963-08-31,-0.007805,0.016792,0.003688,-0.003824,0.005967
1963-09-30,-0.004244,0.000565,-0.007802,0.001468,-0.003260
1963-10-31,-0.013304,0.000675,0.026870,-0.020881,0.030637
1963-11-30,-0.008933,0.017980,-0.005045,0.022244,-0.014942
...,...,...,...,...,...
2020-12-31,0.045497,-0.013986,-0.020471,-0.000600,-0.019549
2021-01-31,0.067684,0.028036,-0.031854,0.044802,0.016720
2021-02-28,0.044378,0.067830,-0.002360,-0.019651,-0.057739
2021-03-31,-0.008101,0.069186,0.062830,0.033906,-0.086721


In [9]:
Xs = list(X.columns)
Ys = list(Y.columns)

In [10]:
data = Y.merge(X,how='left',left_index=True, right_index=True)
data = data.loc[data.index>='1994-01-31',:]

In [11]:
Xs

['DGS1',
 'DGS5',
 'DGS10',
 'DGS20',
 'VIXCLS',
 'UMCSENT',
 'STLFSI2',
 'UNRATE',
 'ICSA',
 'CPIAUCSL',
 'MICH',
 'DTWEXAFEGS',
 'SP500']

In [12]:
beta_matrix = pd.DataFrame(index=Xs,columns=Ys)
t_stat_matrix = pd.DataFrame(index=Xs,columns=Ys)

In [13]:
for xx in Xs:
    for yy in Ys:
        temp_X = sm.add_constant(data[xx],prepend=True)
        temp_Y = data[yy]
        mod = sm.OLS(temp_Y, temp_X)
        res = mod.fit()
        t_stat_matrix.loc[xx,yy] = res.tvalues[xx] 
        if abs(res.tvalues[xx]) > 2:
            beta_matrix.loc[xx,yy] = res.params[xx]
        else:
            beta_matrix.loc[xx,yy] = np.NaN
beta_matrix = beta_matrix.rename(columns=factors_names,index=tickers)  
t_stat_matrix = t_stat_matrix.rename(columns=factors_names,index=tickers)  

In [14]:
beta_matrix 

Unnamed: 0,Size (Small - Big),Value,Profitability,Conservative Minus Aggresive Investomets,Momentum
1-Year Treasury Constant Maturity Rate,0.030948,,,,
5-Year Treasury Constant Maturity Rate,0.064115,0.058987,-0.030842,,-0.083634
10-Year Treasury Constant Maturity Rate,0.092243,0.075105,-0.041708,,-0.138659
20-Year Treasury Constant Maturity Rate,0.105273,0.078409,-0.043289,,-0.159043
CBOE Volatility Index: VIX,-0.02176,,0.023066,0.015517,0.044168
University of Michigan: Consumer Sentiment,0.106403,0.097502,,,-0.197444
St. Louis Fed Financial Stress Index,,,,,
Unemployment Rate,,,,,-0.046857
Initial Claims,-0.010567,-0.014133,,,
Consumer Price Index for All Urban Consumers: All Items in U.S. City Average,,,,,


In [15]:
t_stat_matrix

Unnamed: 0,Size (Small - Big),Value,Profitability,Conservative Minus Aggresive Investomets,Momentum
1-Year Treasury Constant Maturity Rate,2.83869,0.646879,-1.275138,-0.919497,0.497754
5-Year Treasury Constant Maturity Rate,4.865134,4.31762,-2.648884,0.19692,-3.952789
10-Year Treasury Constant Maturity Rate,4.858662,3.79265,-2.48352,0.096496,-4.585247
20-Year Treasury Constant Maturity Rate,4.336995,3.096338,-2.023761,-0.140297,-4.117715
CBOE Volatility Index: VIX,-2.848169,1.098699,3.523918,2.982539,3.672061
University of Michigan: Consumer Sentiment,3.014455,2.676149,-1.810337,0.452422,-3.542985
St. Louis Fed Financial Stress Index,0.835475,-0.697257,-0.292632,-0.556909,0.056529
Unemployment Rate,0.675182,-1.062392,0.543482,-0.511925,-2.337529
Initial Claims,-2.967086,-3.892107,-0.694732,1.544079,1.187594
Consumer Price Index for All Urban Consumers: All Items in U.S. City Average,1.386795,1.73532,-0.942772,-0.643711,1.216104
