In [1]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import statsmodels.api as sm
from scipy import stats

### A. Calculate historical factor monthly returns for the following factors based on APT.

In [2]:
# Load data of Monthly Returns for S&P 500 Index and US Dollar Index.
df_SPD = pd.read_excel('APT&BLM.xlsx',sheet_name = 'SPX_DXY_MonthlyReturns',parse_dates=[0],index_col=[0])
df_SPD

Unnamed: 0,S&P - 500 Index,US Dollar Index
2001-07-31,-0.984416,-1.916799
2001-08-31,-6.260163,-3.208739
2001-09-30,-8.075231,-0.008817
2001-10-31,1.906875,1.278547
2001-11-30,7.670629,1.105694
...,...,...
2018-08-31,3.258530,0.619752
2018-09-30,0.569193,-0.008409
2018-10-31,-6.835016,2.097086
2018-11-30,2.037839,0.149289


In [3]:
df_500 = pd.read_excel('APT&BLM.xlsx',sheet_name = 'SP500Stocks',index_col=[0],skiprows=1)
df_500

Unnamed: 0_level_0,TICKER,SECTOR,Market Cap ($ Mil),2001-07-31 00:00:00,2001-08-31 00:00:00,2001-09-30 00:00:00,2001-10-31 00:00:00,2001-11-30 00:00:00,2001-12-31 00:00:00,2002-01-31 00:00:00,...,2018-03-31 00:00:00,2018-04-30 00:00:00,2018-05-31 00:00:00,2018-06-30 00:00:00,2018-07-31 00:00:00,2018-08-31 00:00:00,2018-09-30 00:00:00,2018-10-31 00:00:00,2018-11-30 00:00:00,2018-12-31 00:00:00
NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3m Co,MMM,INDU,110949,-1.945100,-6.446533,-5.475902,6.077282,10.339559,3.168009,-6.098369,...,-6.789357,-11.447915,2.149656,-0.258595,7.930319,0.006810,-0.099670,-9.705245,10.030197,-8.358958
Abbott Labs,ABT,HLTH,127036,12.112976,-7.258951,4.326061,2.599966,3.812730,1.363484,3.886888,...,-0.679629,-2.525512,5.848923,-0.877646,7.939856,1.983455,9.754681,-5.645529,7.412211,-2.322711
Abbvie Inc,ABBV,HLTH,138674,,,,,,,,...,-18.285435,3.070457,2.475368,-6.357350,0.546199,4.065904,-1.458604,-16.817462,21.091854,-2.206395
Abiomed Inc,ABMD,HLTH,14640,-19.423240,-1.315789,-6.826667,24.441900,-12.741490,-16.605166,-6.131479,...,8.505481,3.422798,26.645622,7.322769,-13.328444,14.681409,10.617837,-24.135631,-2.497069,-2.296501
Accenture Plc Ireland,ACN,INFT,89887,,-0.401070,-14.429530,37.803922,28.628344,19.115044,-4.234770,...,-4.664319,-0.615572,3.002638,5.040437,-2.604036,6.113071,0.668382,-6.540393,4.377624,-14.290059
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Xylem Inc,XYL,INDU,11991,,,,,,,,...,3.137550,-5.226213,-3.145842,-4.289844,13.624241,-0.578087,5.216744,-17.628674,11.283937,-8.577682
Yum Brands Inc,YUM,DSCR,28707,4.214608,-6.842003,-7.977550,28.990256,-6.206153,3.688246,13.414138,...,4.607980,2.314109,-6.221202,-3.823932,1.367929,10.055781,4.626532,-0.549981,2.423112,-0.325260
Zimmer Hldgs Inc,ZBH,HLTH,21156,,-4.895105,2.022059,11.387387,4.367519,-5.331680,6.516045,...,-5.993991,5.621732,-3.177918,0.152802,12.634576,-1.505699,6.538274,-13.600042,3.019571,-11.156387
Zions Bancorp,ZION,FINA,7830,-0.932129,-1.698511,-6.287271,-10.696854,1.164570,8.906415,-4.260132,...,-4.074956,3.830840,0.525614,-3.867899,-1.878993,3.662039,-5.892335,-6.181357,4.055791,-16.276212


In [4]:
Beta_mat1 = np.zeros(shape=(505,3))

In [5]:
# Run a regression using equity market factor and US Dollar factor. Get the Beta matrix.
for i in range(len(df_500)):
    df_stock = df_500.iloc[i]
    df_stockret = df_stock[3:].astype('float')
    df_stockret = df_stockret.to_frame()
    df_step1 = pd.concat([df_stockret,df_SPD],axis=1)
    df_step1.dropna(inplace=True)
    df_step1.columns = ['Stock','SP','USD']
    list1 = df_step1.columns.tolist()
    formula1 = str(list1[0])+' ~ '+str(list1[1])
    del list1[0:2]
    for n in list1:
        formula1 += '+'
        formula1 += str(n)

    result1 = smf.ols(formula1,data=df_step1).fit()
    Beta_mat1[i]=result1.params.values

In [6]:
pd.DataFrame(Beta_mat1)

Unnamed: 0,0,1,2
0,0.411085,0.897279,0.137059
1,0.635259,0.501031,-0.096830
2,0.662926,1.450096,-0.556901
3,1.770226,1.263956,0.059849
4,0.772685,1.152855,0.193686
...,...,...,...
500,0.539917,1.037699,-0.747278
501,1.101420,0.657877,-0.258958
502,0.431365,0.721118,-0.503893
503,-0.190218,1.136268,-0.084878


In [7]:
# Sector factors. 
sector = df_500.iloc[:,1]
sector

NAME
3m Co                    INDU
Abbott Labs              HLTH
Abbvie Inc               HLTH
Abiomed Inc              HLTH
Accenture Plc Ireland    INFT
                         ... 
Xylem Inc                INDU
Yum Brands Inc           DSCR
Zimmer Hldgs Inc         HLTH
Zions Bancorp            FINA
Zoetis Inc               HLTH
Name: SECTOR, Length: 505, dtype: object

In [8]:
# Leaving out the UTIL.
sector1 = pd.get_dummies(df_500, columns = ['SECTOR'])
sector1.drop('SECTOR_UTIL',axis=1,inplace=True)
sector1.iloc[:,-11:]

Unnamed: 0_level_0,2018-12-31 00:00:00,SECTOR_DSCR,SECTOR_ENER,SECTOR_FINA,SECTOR_HLTH,SECTOR_INDU,SECTOR_INFT,SECTOR_MATS,SECTOR_REAL,SECTOR_STPL,SECTOR_TCOM
NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
3m Co,-8.358958,0,0,0,0,1,0,0,0,0,0
Abbott Labs,-2.322711,0,0,0,1,0,0,0,0,0,0
Abbvie Inc,-2.206395,0,0,0,1,0,0,0,0,0,0
Abiomed Inc,-2.296501,0,0,0,1,0,0,0,0,0,0
Accenture Plc Ireland,-14.290059,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
Xylem Inc,-8.577682,0,0,0,0,1,0,0,0,0,0
Yum Brands Inc,-0.325260,1,0,0,0,0,0,0,0,0,0
Zimmer Hldgs Inc,-11.156387,0,0,0,1,0,0,0,0,0,0
Zions Bancorp,-16.276212,0,0,1,0,0,0,0,0,0,0


In [9]:
# Get sector exposures.
Beta_mat2 = Beta_mat1
sector_expo = sector1.iloc[:,-10:]
sector_expo

Unnamed: 0_level_0,SECTOR_DSCR,SECTOR_ENER,SECTOR_FINA,SECTOR_HLTH,SECTOR_INDU,SECTOR_INFT,SECTOR_MATS,SECTOR_REAL,SECTOR_STPL,SECTOR_TCOM
NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
3m Co,0,0,0,0,1,0,0,0,0,0
Abbott Labs,0,0,0,1,0,0,0,0,0,0
Abbvie Inc,0,0,0,1,0,0,0,0,0,0
Abiomed Inc,0,0,0,1,0,0,0,0,0,0
Accenture Plc Ireland,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...
Xylem Inc,0,0,0,0,1,0,0,0,0,0
Yum Brands Inc,1,0,0,0,0,0,0,0,0,0
Zimmer Hldgs Inc,0,0,0,1,0,0,0,0,0,0
Zions Bancorp,0,0,1,0,0,0,0,0,0,0


In [10]:
# Put the sector exposures with Beta matrix 1 to get Beta matrix 2.
Beta_mat2 = pd.DataFrame(Beta_mat1,columns=['coef','beta_SP','beta_USD'],index=sector_expo.index)
Beta_mat2 = pd.concat([Beta_mat2,sector_expo],axis=1)
Beta_mat2 = Beta_mat2.iloc[:,1:]
Beta_mat2

Unnamed: 0_level_0,beta_SP,beta_USD,SECTOR_DSCR,SECTOR_ENER,SECTOR_FINA,SECTOR_HLTH,SECTOR_INDU,SECTOR_INFT,SECTOR_MATS,SECTOR_REAL,SECTOR_STPL,SECTOR_TCOM
NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
3m Co,0.897279,0.137059,0,0,0,0,1,0,0,0,0,0
Abbott Labs,0.501031,-0.096830,0,0,0,1,0,0,0,0,0,0
Abbvie Inc,1.450096,-0.556901,0,0,0,1,0,0,0,0,0,0
Abiomed Inc,1.263956,0.059849,0,0,0,1,0,0,0,0,0,0
Accenture Plc Ireland,1.152855,0.193686,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
Xylem Inc,1.037699,-0.747278,0,0,0,0,1,0,0,0,0,0
Yum Brands Inc,0.657877,-0.258958,1,0,0,0,0,0,0,0,0,0
Zimmer Hldgs Inc,0.721118,-0.503893,0,0,0,1,0,0,0,0,0,0
Zions Bancorp,1.136268,-0.084878,0,0,1,0,0,0,0,0,0,0


In [11]:
factor_ret = np.zeros(shape=(13,210))

In [12]:
# Size factor
size_factor = df_500.iloc[:,[1,2]]
size_factor['Market Cap ($ Mil)'] = np.log(size_factor['Market Cap ($ Mil)'])
size_factor

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0_level_0,SECTOR,Market Cap ($ Mil)
NAME,Unnamed: 1_level_1,Unnamed: 2_level_1
3m Co,INDU,11.616826
Abbott Labs,HLTH,11.752226
Abbvie Inc,HLTH,11.839881
Abiomed Inc,HLTH,9.591513
Accenture Plc Ireland,INFT,11.406309
...,...,...
Xylem Inc,INDU,9.391912
Yum Brands Inc,DSCR,10.264896
Zimmer Hldgs Inc,HLTH,9.959679
Zions Bancorp,FINA,8.965718


In [13]:
size_sec = size_factor.groupby('SECTOR').mean()
size_sec.columns = ['MarketCapMean']
size_sec

Unnamed: 0_level_0,MarketCapMean
SECTOR,Unnamed: 1_level_1
DSCR,9.66822
ENER,9.922564
FINA,10.069093
HLTH,10.382486
INDU,9.887828
INFT,10.225671
MATS,9.72449
REAL,9.682977
STPL,10.415517
TCOM,10.488791


In [14]:
size_sec['MarketCapStd'] = size_factor.groupby('SECTOR').std()
size_sec

Unnamed: 0_level_0,MarketCapMean,MarketCapStd
SECTOR,Unnamed: 1_level_1,Unnamed: 2_level_1
DSCR,9.66822,1.016778
ENER,9.922564,1.018311
FINA,10.069093,1.008906
HLTH,10.382486,1.014239
INDU,9.887828,0.874319
INFT,10.225671,1.136336
MATS,9.72449,0.75901
REAL,9.682977,0.629647
STPL,10.415517,0.985399
TCOM,10.488791,1.519808


In [15]:
df_500_1 = df_500.copy()
for i in size_sec.index:
    df_500_1.loc[(df_500_1['SECTOR']==i),'MarketCapMean']=size_sec.loc[i,'MarketCapMean']
    df_500_1.loc[(df_500_1['SECTOR']==i),'MarketCapStd']=size_sec.loc[i,'MarketCapStd']

df_500_1

Unnamed: 0_level_0,TICKER,SECTOR,Market Cap ($ Mil),2001-07-31 00:00:00,2001-08-31 00:00:00,2001-09-30 00:00:00,2001-10-31 00:00:00,2001-11-30 00:00:00,2001-12-31 00:00:00,2002-01-31 00:00:00,...,2018-05-31 00:00:00,2018-06-30 00:00:00,2018-07-31 00:00:00,2018-08-31 00:00:00,2018-09-30 00:00:00,2018-10-31 00:00:00,2018-11-30 00:00:00,2018-12-31 00:00:00,MarketCapMean,MarketCapStd
NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3m Co,MMM,INDU,110949,-1.945100,-6.446533,-5.475902,6.077282,10.339559,3.168009,-6.098369,...,2.149656,-0.258595,7.930319,0.006810,-0.099670,-9.705245,10.030197,-8.358958,9.887828,0.874319
Abbott Labs,ABT,HLTH,127036,12.112976,-7.258951,4.326061,2.599966,3.812730,1.363484,3.886888,...,5.848923,-0.877646,7.939856,1.983455,9.754681,-5.645529,7.412211,-2.322711,10.382486,1.014239
Abbvie Inc,ABBV,HLTH,138674,,,,,,,,...,2.475368,-6.357350,0.546199,4.065904,-1.458604,-16.817462,21.091854,-2.206395,10.382486,1.014239
Abiomed Inc,ABMD,HLTH,14640,-19.423240,-1.315789,-6.826667,24.441900,-12.741490,-16.605166,-6.131479,...,26.645622,7.322769,-13.328444,14.681409,10.617837,-24.135631,-2.497069,-2.296501,10.382486,1.014239
Accenture Plc Ireland,ACN,INFT,89887,,-0.401070,-14.429530,37.803922,28.628344,19.115044,-4.234770,...,3.002638,5.040437,-2.604036,6.113071,0.668382,-6.540393,4.377624,-14.290059,10.225671,1.136336
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Xylem Inc,XYL,INDU,11991,,,,,,,,...,-3.145842,-4.289844,13.624241,-0.578087,5.216744,-17.628674,11.283937,-8.577682,9.887828,0.874319
Yum Brands Inc,YUM,DSCR,28707,4.214608,-6.842003,-7.977550,28.990256,-6.206153,3.688246,13.414138,...,-6.221202,-3.823932,1.367929,10.055781,4.626532,-0.549981,2.423112,-0.325260,9.668220,1.016778
Zimmer Hldgs Inc,ZBH,HLTH,21156,,-4.895105,2.022059,11.387387,4.367519,-5.331680,6.516045,...,-3.177918,0.152802,12.634576,-1.505699,6.538274,-13.600042,3.019571,-11.156387,10.382486,1.014239
Zions Bancorp,ZION,FINA,7830,-0.932129,-1.698511,-6.287271,-10.696854,1.164570,8.906415,-4.260132,...,0.525614,-3.867899,-1.878993,3.662039,-5.892335,-6.181357,4.055791,-16.276212,10.069093,1.008906


In [16]:
# Calculate z-score of log(mkt_cap).
df_500_1['zscore']=(np.log(df_500_1['Market Cap ($ Mil)'])-df_500_1['MarketCapMean'])/df_500_1['MarketCapStd']
df_500_1

Unnamed: 0_level_0,TICKER,SECTOR,Market Cap ($ Mil),2001-07-31 00:00:00,2001-08-31 00:00:00,2001-09-30 00:00:00,2001-10-31 00:00:00,2001-11-30 00:00:00,2001-12-31 00:00:00,2002-01-31 00:00:00,...,2018-06-30 00:00:00,2018-07-31 00:00:00,2018-08-31 00:00:00,2018-09-30 00:00:00,2018-10-31 00:00:00,2018-11-30 00:00:00,2018-12-31 00:00:00,MarketCapMean,MarketCapStd,zscore
NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3m Co,MMM,INDU,110949,-1.945100,-6.446533,-5.475902,6.077282,10.339559,3.168009,-6.098369,...,-0.258595,7.930319,0.006810,-0.099670,-9.705245,10.030197,-8.358958,9.887828,0.874319,1.977538
Abbott Labs,ABT,HLTH,127036,12.112976,-7.258951,4.326061,2.599966,3.812730,1.363484,3.886888,...,-0.877646,7.939856,1.983455,9.754681,-5.645529,7.412211,-2.322711,10.382486,1.014239,1.350510
Abbvie Inc,ABBV,HLTH,138674,,,,,,,,...,-6.357350,0.546199,4.065904,-1.458604,-16.817462,21.091854,-2.206395,10.382486,1.014239,1.436935
Abiomed Inc,ABMD,HLTH,14640,-19.423240,-1.315789,-6.826667,24.441900,-12.741490,-16.605166,-6.131479,...,7.322769,-13.328444,14.681409,10.617837,-24.135631,-2.497069,-2.296501,10.382486,1.014239,-0.779869
Accenture Plc Ireland,ACN,INFT,89887,,-0.401070,-14.429530,37.803922,28.628344,19.115044,-4.234770,...,5.040437,-2.604036,6.113071,0.668382,-6.540393,4.377624,-14.290059,10.225671,1.136336,1.038986
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Xylem Inc,XYL,INDU,11991,,,,,,,,...,-4.289844,13.624241,-0.578087,5.216744,-17.628674,11.283937,-8.577682,9.887828,0.874319,-0.567203
Yum Brands Inc,YUM,DSCR,28707,4.214608,-6.842003,-7.977550,28.990256,-6.206153,3.688246,13.414138,...,-3.823932,1.367929,10.055781,4.626532,-0.549981,2.423112,-0.325260,9.668220,1.016778,0.586831
Zimmer Hldgs Inc,ZBH,HLTH,21156,,-4.895105,2.022059,11.387387,4.367519,-5.331680,6.516045,...,0.152802,12.634576,-1.505699,6.538274,-13.600042,3.019571,-11.156387,10.382486,1.014239,-0.416871
Zions Bancorp,ZION,FINA,7830,-0.932129,-1.698511,-6.287271,-10.696854,1.164570,8.906415,-4.260132,...,-3.867899,-1.878993,3.662039,-5.892335,-6.181357,4.055791,-16.276212,10.069093,1.008906,-1.093636


In [17]:
# Put size factor with Beta Matrix to get the final Beta Matrix.
Beta_mat2 = pd.concat([Beta_mat2,df_500_1['zscore']],axis=1)
Beta_mat2

Unnamed: 0_level_0,beta_SP,beta_USD,SECTOR_DSCR,SECTOR_ENER,SECTOR_FINA,SECTOR_HLTH,SECTOR_INDU,SECTOR_INFT,SECTOR_MATS,SECTOR_REAL,SECTOR_STPL,SECTOR_TCOM,zscore
NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
3m Co,0.897279,0.137059,0,0,0,0,1,0,0,0,0,0,1.977538
Abbott Labs,0.501031,-0.096830,0,0,0,1,0,0,0,0,0,0,1.350510
Abbvie Inc,1.450096,-0.556901,0,0,0,1,0,0,0,0,0,0,1.436935
Abiomed Inc,1.263956,0.059849,0,0,0,1,0,0,0,0,0,0,-0.779869
Accenture Plc Ireland,1.152855,0.193686,0,0,0,0,0,1,0,0,0,0,1.038986
...,...,...,...,...,...,...,...,...,...,...,...,...,...
Xylem Inc,1.037699,-0.747278,0,0,0,0,1,0,0,0,0,0,-0.567203
Yum Brands Inc,0.657877,-0.258958,1,0,0,0,0,0,0,0,0,0,0.586831
Zimmer Hldgs Inc,0.721118,-0.503893,0,0,0,1,0,0,0,0,0,0,-0.416871
Zions Bancorp,1.136268,-0.084878,0,0,1,0,0,0,0,0,0,0,-1.093636


In [18]:
# Run cross-sectional regression.
for i in range(210):
    reg = pd.concat([df_500.iloc[:,i+3],Beta_mat2],axis=1)
    reg.dropna(inplace=True)
    X = reg.iloc[:,-13:]
    X = sm.add_constant(X)
    Y = reg.iloc[:,0]

    result2 = sm.OLS(Y,X).fit()
    result2.summary()
    factor_ret[:,i] = result2.params.values[1:]
    
factor_ret

array([[ -6.86285728,  -6.53110722, -20.77960007, ...,  -7.03079982,
          0.49019469,  -3.17679038],
       [  1.14889942,  -3.81494125,  -4.42704275, ...,   3.36258877,
          1.69933723,   0.48735719],
       [ 12.19409328,  -0.84627951,   6.14513207, ...,  -4.72032772,
         -1.78038685,  -3.9030419 ],
       ...,
       [  6.4748542 ,   0.5861965 ,   6.04239303, ...,  -0.9762139 ,
         -2.04001118,  -6.06136948],
       [  4.82512414,  -0.83342392,   5.57295535, ...,  -0.7417938 ,
         -4.75218037,  -4.66125462],
       [ -0.94741375,  -0.93846446,  -0.14960713, ...,   0.48634149,
          0.58367353,   1.49874591]])

In [19]:
pd.DataFrame(factor_ret)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,200,201,202,203,204,205,206,207,208,209
0,-6.862857,-6.531107,-20.7796,9.16459,11.163984,1.215029,-4.323136,-5.53338,6.677938,-8.524798,...,-1.541033,-2.24575,2.079768,-2.869671,1.789404,-1.263999,-0.475853,-7.0308,0.490195,-3.17679
1,1.148899,-3.814941,-4.427043,5.534274,5.878303,-0.502738,-1.612277,-3.514096,-0.359773,-4.333348,...,-0.842876,0.077256,3.955822,1.263941,0.014721,1.749081,-1.058467,3.362589,1.699337,0.487357
2,12.194093,-0.84628,6.145132,0.47364,4.318661,3.370005,10.697396,8.667919,-11.619652,9.601583,...,-5.087975,0.796624,0.358355,1.071403,-1.414894,0.579908,1.160565,-4.720328,-1.780387,-3.903042
3,5.315452,-3.381388,4.087406,7.848457,-8.52276,3.799435,-1.851951,9.980194,-2.622675,1.17721,...,-0.680842,9.672145,3.682521,0.755149,-0.401137,-4.342493,2.554841,-10.515252,-7.983625,-8.856122
4,7.337022,-0.366846,17.953474,-9.750611,-0.611098,-1.179031,4.286246,7.722149,-9.511225,4.989156,...,-5.830681,-0.698454,-2.097977,-2.526363,0.872308,-0.221929,-1.441822,-2.727631,-0.899673,-5.49616
5,4.397612,0.190909,8.834765,2.879946,0.536815,-3.196261,0.527301,2.695108,-8.177159,-0.901076,...,-5.581573,-1.40398,0.652143,0.176648,3.40109,2.786344,3.402252,-8.664647,1.826115,-4.60305
6,9.781195,0.346426,9.998076,-3.630006,2.105712,0.565578,7.164433,10.661489,-10.55208,3.759445,...,-4.432178,-4.30573,1.022698,-2.894414,4.583613,0.885673,2.033739,-8.910813,1.470327,-5.440288
7,7.325754,-0.635126,8.745651,8.241545,3.268029,-1.688333,7.519178,0.528758,-6.466756,-2.971178,...,-4.27592,-0.903985,2.423637,-0.614386,-1.094338,5.066053,1.08013,-6.393504,-2.400866,-2.210402
8,9.080813,2.162124,10.472454,-1.789567,4.865412,-2.591446,6.720628,9.87949,-7.862185,0.769461,...,-6.761645,-0.783629,2.462243,-0.256819,-0.305439,-2.07481,0.62509,-5.299211,1.504166,-2.081822
9,3.769479,1.999704,12.708085,-6.354562,0.613596,-2.717193,1.122977,3.366614,-6.494473,3.682304,...,-0.057924,-1.05751,2.456788,3.748026,-1.893627,1.655226,-2.214345,-0.16241,2.529691,-2.28594


### B. Check for each factor their historical returns are significant or not (based on T-stat).

In [20]:
# B
for i in range(13):
    fac = factor_ret[i,:]
    print(stats.ttest_1samp(fac, 0))
    

Ttest_1sampResult(statistic=0.6123848615508055, pvalue=0.5409488451974684)
Ttest_1sampResult(statistic=0.03022910595701002, pvalue=0.9759131826145961)
Ttest_1sampResult(statistic=0.909892617361786, pvalue=0.36392727225175314)
Ttest_1sampResult(statistic=0.4196653580513525, pvalue=0.6751610000537014)
Ttest_1sampResult(statistic=-0.46128546958229194, pvalue=0.6450735081491071)
Ttest_1sampResult(statistic=1.436579114091394, pvalue=0.15233283506033665)
Ttest_1sampResult(statistic=0.6937049715861746, pvalue=0.48863751349341056)
Ttest_1sampResult(statistic=1.0662505619695861, pvalue=0.28754055294786196)
Ttest_1sampResult(statistic=0.8438942444744896, pvalue=0.39969336630623864)
Ttest_1sampResult(statistic=1.1615769024280702, pvalue=0.2467322191509449)
Ttest_1sampResult(statistic=0.5297613672644921, pvalue=0.5968396962181252)
Ttest_1sampResult(statistic=0.3131453726670626, pvalue=0.7544824891124041)
Ttest_1sampResult(statistic=0.82920847483845, pvalue=0.4079324805025072)


#### The results show that pvalues are very large (>0.05), so the null hypothesis cannot be rejected. So the historical returns are not significant.

### C. Using the last month in the back-test, i.e., 12/31/2018:
### 1. all factor portfolios are long-short neutral portfolio, i.e., the total weights sum to 0.

In [21]:
# Add one column for coefficients of alpha.
Beta_mat3 = sm.add_constant(Beta_mat2)
Beta_mat3

Unnamed: 0_level_0,const,beta_SP,beta_USD,SECTOR_DSCR,SECTOR_ENER,SECTOR_FINA,SECTOR_HLTH,SECTOR_INDU,SECTOR_INFT,SECTOR_MATS,SECTOR_REAL,SECTOR_STPL,SECTOR_TCOM,zscore
NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
3m Co,1.0,0.897279,0.137059,0,0,0,0,1,0,0,0,0,0,1.977538
Abbott Labs,1.0,0.501031,-0.096830,0,0,0,1,0,0,0,0,0,0,1.350510
Abbvie Inc,1.0,1.450096,-0.556901,0,0,0,1,0,0,0,0,0,0,1.436935
Abiomed Inc,1.0,1.263956,0.059849,0,0,0,1,0,0,0,0,0,0,-0.779869
Accenture Plc Ireland,1.0,1.152855,0.193686,0,0,0,0,0,1,0,0,0,0,1.038986
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Xylem Inc,1.0,1.037699,-0.747278,0,0,0,0,1,0,0,0,0,0,-0.567203
Yum Brands Inc,1.0,0.657877,-0.258958,1,0,0,0,0,0,0,0,0,0,0.586831
Zimmer Hldgs Inc,1.0,0.721118,-0.503893,0,0,0,1,0,0,0,0,0,0,-0.416871
Zions Bancorp,1.0,1.136268,-0.084878,0,0,1,0,0,0,0,0,0,0,-1.093636


In [22]:
mat_Beta = np.mat(Beta_mat3)
factor_port = ((mat_Beta.T)*mat_Beta).I*mat_Beta.T
pd.DataFrame(factor_port)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,495,496,497,498,499,500,501,502,503,504
0,0.001139,0.002272,-0.005884,-0.002052,0.001313,0.001965,-0.002058,0.00395,-0.010133,0.027368,...,0.004889,-0.006295,0.035596,-0.001224,-6.5e-05,-0.000213,0.003299,0.000798,0.001286,0.000468
1,-0.002018,-0.004756,0.00983,0.004279,-0.002789,-0.003305,0.004236,-0.007546,0.021789,0.014099,...,-0.010321,0.009249,0.000794,0.001256,0.000192,-0.001722,-0.007472,-0.002914,-0.002407,0.000554
2,0.001093,-0.001271,-0.008462,0.001073,-0.000929,0.002723,0.000817,0.001203,0.008401,-0.011276,...,-0.003142,-0.015048,0.002646,-0.005493,0.000302,-0.010129,-0.004525,-0.006331,0.000622,0.006983
3,0.001229,0.003653,-0.005369,-0.003273,0.002178,0.001829,-0.003192,0.020548,-0.017244,-0.043585,...,0.008004,0.011727,-0.036803,0.000182,-0.000197,0.003225,0.021622,0.003331,0.0016,-0.00177
4,0.00146,0.002153,-0.008168,-0.00196,0.001201,0.002705,-0.002023,0.004495,-0.008999,-0.04714,...,0.004543,-0.010061,-0.035258,-0.002523,-3e-06,-0.002462,0.002535,-0.000542,0.001514,0.002041
5,0.001431,0.00343,-0.006922,-0.003085,0.002014,0.002329,-0.003051,0.005394,-0.015753,-0.045656,...,0.022155,-0.006407,-0.036323,-0.000818,-0.000142,0.001388,0.005427,0.002185,0.016423,-0.000502
6,0.000733,0.018465,0.013105,0.014536,0.001231,0.001117,-0.001817,0.002999,-0.009724,-0.040509,...,0.004531,-0.002459,-0.036278,-2.5e-05,-0.000106,0.001617,0.003482,0.018161,0.000935,0.01554
7,0.015679,0.003267,-0.005393,-0.002931,0.001938,0.001828,-0.002872,0.004789,-0.015286,-0.043555,...,0.007138,-0.004205,-0.036558,-0.000148,-0.000163,0.016862,0.005444,0.002684,0.001499,-0.001219
8,0.001445,0.00477,-0.005929,-0.004267,0.017568,0.002037,0.010568,0.006428,-0.008062,-0.044524,...,0.010488,-0.003067,-0.037373,0.015511,0.014424,0.005154,0.008391,0.004891,0.001966,-0.002978
9,0.001588,0.00279,-0.008516,-0.002528,0.001591,0.002833,-0.002564,0.005225,-0.012144,-0.047715,...,0.00596,-0.009769,-0.035577,-0.002182,-5.1e-05,-0.001386,0.003757,0.000334,0.001726,0.001369


In [23]:
factor_port.shape

(14, 505)

In [24]:
sumweight = factor_port.sum(axis=1)
sumweight[1:]

matrix([[ 2.68188249e-15],
        [-1.11022302e-16],
        [ 5.55111512e-16],
        [-8.32667268e-16],
        [-1.58206781e-15],
        [-1.45716772e-15],
        [-1.11022302e-15],
        [-1.85962357e-15],
        [-1.19348975e-15],
        [-8.32667268e-17],
        [-1.60982339e-15],
        [-1.12410081e-15],
        [ 7.28583860e-17]])

In [25]:
# All the sums of weights for each factor are close to 0. So all factor portfolios are long-short neutral portfolio.

### 2. For any factor portfolio, it has unit exposure to its own factor, but zero exposure to all other factors in the model.

In [26]:
for i in range(1,14):
    expoitself = mat_Beta[:,i].T*factor_port[i,:].T
    print(expoitself)

[[1.]]
[[1.]]
[[1.]]
[[1.]]
[[1.]]
[[1.]]
[[1.]]
[[1.]]
[[1.]]
[[1.]]
[[1.]]
[[1.]]
[[1.]]


In [27]:
# For any factor portfolio, it has unit exposure to its own factor (equal to 1).

In [28]:
# Say for the first factor.
for i in range(2,14):
    otherexpo = mat_Beta[:,i].T*factor_port[1,:].T
    print(otherexpo)

[[0.]]
[[-3.81639165e-17]]
[[8.63024929e-17]]
[[1.2490009e-16]]
[[2.08166817e-17]]
[[9.02056208e-17]]
[[1.3964524e-16]]
[[6.24500451e-17]]
[[1.11022302e-16]]
[[4.68375339e-17]]
[[1.56125113e-17]]
[[-4.16333634e-17]]


In [29]:
# Calculate the exposure matrix.
exposure_matrix = np.zeros(shape=(13,13))
for factor_num in range(1,14):
    for i in range(1,14):
        exposure_matrix[factor_num-1, i-1] = mat_Beta[:,i].T*factor_port[factor_num,:].T

exposure_matrix = pd.DataFrame(exposure_matrix)
exposure_matrix

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,1.0,0.0,-3.8163920000000003e-17,8.630249000000001e-17,1.249001e-16,2.0816680000000002e-17,9.020562e-17,1.396452e-16,6.245005e-17,1.110223e-16,4.683753e-17,1.561251e-17,-4.1633360000000003e-17
1,1.94289e-16,1.0,4.1633360000000003e-17,-5.637851e-17,-5.5511150000000004e-17,-8.673617e-18,6.938894e-18,-5.5511150000000004e-17,8.673617e-18,-1.561251e-17,-8.673616999999999e-19,8.673617e-18,2.775558e-17
2,-2.567391e-15,-2.428613e-16,1.0,-4.943962e-17,3.833739e-16,-5.5511150000000004e-17,2.584738e-16,3.6429190000000005e-17,1.873501e-16,-6.938894e-18,6.245005e-17,2.823262e-16,-3.677614e-16
3,-3.982925e-15,-2.844947e-16,2.359224e-16,1.0,2.0816680000000002e-17,-2.810252e-16,-2.983724e-16,1.092876e-16,1.613293e-16,-2.324529e-16,3.053113e-16,2.099015e-16,-3.608225e-16
4,-5.07927e-15,-3.330669e-16,4.917941e-16,-1.010476e-16,1.0,-3.538836e-16,-2.775558e-16,-1.31839e-16,-9.020562e-17,-1.457168e-16,1.179612e-16,2.385245e-16,-4.1633360000000003e-17
5,-5.64826e-15,-1.856154e-16,3.009745e-16,-1.335737e-16,2.419939e-16,1.0,-3.295975e-16,-6.314393e-16,2.4286130000000003e-17,-1.301043e-16,4.813858e-17,1.053845e-16,-1.665335e-16
6,-4.093947e-15,-2.255141e-16,4.89192e-16,1.2143060000000001e-17,3.747003e-16,-3.712308e-16,1.0,-2.246467e-16,9.714451000000001e-17,-3.469447e-16,-3.122502e-17,4.336809e-18,-3.469447e-16
7,-5.176415e-15,-1.162265e-16,-1.94289e-16,2.6020850000000003e-17,4.302114e-16,-3.330669e-16,6.678685000000001e-17,1.0,4.1633360000000003e-17,-7.979728000000001e-17,1.231654e-16,2.430781e-16,-3.053113e-16
8,-4.218847e-15,-7.632783000000001e-17,2.844947e-16,-1.062518e-16,-2.498002e-16,-2.2551410000000003e-17,2.914335e-16,-1.322727e-16,1.0,-2.844947e-16,1.595946e-16,3.191891e-16,-3.469447e-16
9,-3.521489e-15,-3.053113e-16,2.905662e-16,-2.020953e-16,7.563394e-16,-1.908196e-16,-1.665335e-16,1.292369e-16,1.734723e-18,1.0,9.540979000000001e-17,3.096481e-16,-2.289835e-16


In [30]:
# For any factor portfolio, it has zero exposure to all other factors in the model. All close to 0.