In [2]:
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm

  from pandas.core import datetools


In [3]:
factor = pd.read_csv("factor.csv")
stock = pd.read_csv("stock_price.csv")

In [24]:
factor.rename_axis({"man_order": "cap_order"},axis=1, inplace=True)
factor.head()

Unnamed: 0,year,month,hours,claim,con_order,cap_order,permit,M2,spread,umich,sp500
0,2000,1,0.002415,0.013724,0.055633,-0.022551,0.026144,0.006093,0.234694,0.062619,-0.002169
1,2000,2,0.0,0.030932,-0.044304,-0.085627,-0.020266,0.002856,-0.347107,-0.00625,-0.025752
2,2000,3,-0.00241,-0.049299,-0.006508,0.087953,-0.024232,0.006596,-0.481013,-0.037736,0.038405
3,2000,4,0.004831,-0.037997,0.015782,0.01213,-0.032707,0.011871,-1.073171,0.019608,0.013273
4,2000,5,-0.009615,0.050186,-0.03081,-0.010262,-0.033813,-0.002628,-6.666667,0.013736,-0.02934


In [25]:
X = factor.iloc[:48, 2:]
X = sm.add_constant(X)
X.drop(0,inplace = True)
X.head()

Unnamed: 0,const,hours,claim,con_order,cap_order,permit,M2,spread,umich,sp500
1,1.0,0.0,0.030932,-0.044304,-0.085627,-0.020266,0.002856,-0.347107,-0.00625,-0.025752
2,1.0,-0.00241,-0.049299,-0.006508,0.087953,-0.024232,0.006596,-0.481013,-0.037736,0.038405
3,1.0,0.004831,-0.037997,0.015782,0.01213,-0.032707,0.011871,-1.073171,0.019608,0.013273
4,1.0,-0.009615,0.050186,-0.03081,-0.010262,-0.033813,-0.002628,-6.666667,0.013736,-0.02934
5,1.0,0.002427,0.011504,-0.020719,0.071197,0.018795,0.003752,-3.529412,-0.038844,0.030652


In [26]:
# Compute stock returns
ret = stock.diff() / stock.shift()
ret.dropna(inplace = True)
ret = ret.iloc[:, 2:]
ret.head()

Unnamed: 0,LPX,MTH,DHI,FINL,STC,RNR,FBC,RWT,CLI,HELE,...,T,PXD,DVN,CLGX,CAL,FCN,DLX,INGR,CMTL,ORCL
1,-0.071629,0.110409,-0.02893,0.043493,0.037917,-0.017358,-0.084445,-0.034993,-0.066338,-0.125,...,-0.119534,-0.029233,0.060494,-0.021059,0.012055,0.183675,-0.110254,0.024413,0.343279,0.486396
2,0.174597,-0.116014,0.161106,0.625,0.155248,0.079203,0.009709,0.258172,0.073685,-0.017857,...,0.115894,0.263151,0.305173,0.204291,0.15271,-0.068959,0.130665,0.025742,-0.180548,0.051348
3,-0.036036,0.1,-0.009542,0.083329,-0.098812,-0.100916,0.046551,-0.008408,0.033315,-0.036364,...,0.045785,-0.017806,-0.009009,0.107622,-0.145821,0.2037,-0.049528,-0.00263,-0.345772,0.024019
4,-0.150988,0.005673,0.012667,-0.337268,-0.114039,0.189435,-0.305556,-0.076618,0.048537,-0.009434,...,-0.002854,0.448484,0.242858,0.076909,0.16462,0.153859,-0.00556,0.023404,0.046636,-0.100861
5,-0.033333,-0.033881,0.038302,0.169635,0.158418,0.00577,-0.133333,0.062624,-0.048606,-0.147611,...,0.007157,-0.14539,-0.062949,-0.13577,0.098222,0.119999,-0.045572,0.083745,0.306933,0.169565


In [27]:
# Compute the mean p-value for each factors
p_values = []
for i in range(ret.shape[1]):
    Y = ret.iloc[:47,i]
    model = sm.OLS(Y, X)
    result = model.fit()
    p_values.append(result.pvalues)
p = pd.concat(p_values, axis = 1)
p.columns = ret.columns
pvalue_mean = p.T.mean(axis = 0)
pvalue_mean = pd.DataFrame(pvalue_mean.sort_values(), columns = ["p-value mean"])
pvalue_mean

Unnamed: 0,p-value mean
hours,0.32233
const,0.357615
sp500,0.405322
cap_order,0.41295
umich,0.46701
con_order,0.490015
spread,0.501685
claim,0.536206
M2,0.555731
permit,0.590011


In [63]:
p.loc["hours",p.loc["hours",:] <0.1]

BMY     0.080279
NEE     0.062584
PXD     0.013965
DLX     0.096548
INGR    0.050528
Name: hours, dtype: float64

In [64]:
p.loc["sp500",p.loc["sp500",:] <0.1]

LPX     0.003064
DHI     0.054356
FINL    0.002503
RWT     0.088831
CAL     0.047902
ORCL    0.075342
Name: sp500, dtype: float64

In [65]:
p.loc["cap_order",p.loc["cap_order",:] <0.1]

TAP     0.088918
NEE     0.013357
PXD     0.053237
DVN     0.011080
INGR    0.027860
Name: cap_order, dtype: float64

In [66]:
p.loc["umich",p.loc["umich",:] <0.1]

CLI    0.090953
UVV    0.022005
EIX    0.072139
Name: umich, dtype: float64

In [67]:
p.loc["con_order",p.loc["con_order",:] <0.1]

RWT     0.012954
HELE    0.023937
UVV     0.034273
Name: con_order, dtype: float64

In [68]:
p.loc["spread",p.loc["spread",:] <0.1]

FBC    0.022559
RWT    0.050051
UVV    0.078849
Name: spread, dtype: float64

In [69]:
p.loc["claim",p.loc["claim",:] <0.1]

T    0.038852
Name: claim, dtype: float64

In [70]:
p.loc["M2",p.loc["M2",:] <0.1]

Series([], Name: M2, dtype: float64)

In [71]:
p.loc["permit",p.loc["permit",:] <0.1]

CLI    0.009602
NEE    0.084439
Name: permit, dtype: float64

factors:

 hours - Average weekly hours

 claim - Average weekly jobless claims for unemployment insurance

 con_order - Manufacturers' new orders for consumer goods/materials

 man_order - Manufacturers' new orders for non-defense capital goods

 permit - Building permits

 M2 - Money Supply (M2)

 spread - Interest rate spread (10-year Treasury vs. Federal Funds target)

 umich - Index of consumer expectations

 sp500 - S&P 500

### From the results of the regression from 2000-1 to 2003-12, we can see that the four factors with the highest explainatory value are average working hours, S&P 500,  order for capital goods and order for consumer goods.