## Portfolio Factor Analysis

In [1]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA, KernelPCA
from sklearn.manifold import Isomap

### Import Data

Read in Excel file and specifically import net returns data

In [2]:
xls=pd.ExcelFile('Simple_Strategies_Returns.xlsx')
netreturns = pd.read_excel(xls, 1)

Split data into 'new' and 'old' sections:  
'Old' section has 120 months of 26 factors  
'New' section has 486 months of 34 factors

In [3]:
oldnetreturns = netreturns[netreturns.Month < 197307]
oldnetreturns = oldnetreturns.reset_index(drop=True)
oldnetreturns = oldnetreturns.dropna(axis=1)
oldnetreturns.shape

(120, 25)

In [4]:
newnetreturns = netreturns[netreturns.Month >= 197307]
newnetreturns = newnetreturns.reset_index(drop=True)
newnetreturns.shape

(486, 33)

Fama-French Data

In [5]:
ff_3 = pd.read_csv('FF_3_Factors')

In [6]:
ff_3_monthly = ff_3.iloc[:1110,]

In [11]:
ff_3_yearly = ff_3.loc[1112:,]
ff_3_yearly.drop(1204, inplace=True)
ff_3_yearly.reset_index(inplace=True, drop=True)
ff_3_yearly.rename(columns={"Month":"Year"}, inplace=True)

In [12]:
ff_5 = pd.read_csv('FF_5_Factors')

In [13]:
ff_5_monthly = ff_5.loc[:666,]

In [14]:
ff_5_yearly = ff_5.loc[668:,]
ff_5_yearly.reset_index(inplace=True, drop=True)
ff_5_yearly.rename(columns={"Month":"Year"}, inplace=True)

TODO:  
- Import S&P 500 returns [__DONE__]  
- Import Fama-French data [__DONE__] (no data before 1970s)
- Build dimensionality reduction pipeline (to try different methods)
- Build supervised learning pipeline (i.e., timeseries regression of reduced data on S&P returns)

In [25]:
spx = pd.read_csv('CRSP SPX Index File.csv')
spx['caldt'] = pd.to_datetime(spx['caldt'], format = '%Y%m%d')
spx['caldt'] = spx.caldt.dt.to_period('M')
spx = spx[(spx['caldt'] > '1963-06') & (spx['caldt'] <= '2013-12')]

Unnamed: 0,caldt,Value Weighted Return (incl. dividends),Value Weighted Return (ex dividends),S&P Composite Return
6,1963-07,-0.001821,-0.003131,-0.003460
7,1963-08,0.053466,0.048496,0.048749
8,1963-09,-0.010402,-0.011579,-0.011034
9,1963-10,0.034825,0.033290,0.032218
10,1963-11,-0.004487,-0.011085,-0.010539
11,1963-12,0.026065,0.024651,0.024444
12,1964-01,0.027391,0.026373,0.026926
13,1964-02,0.017015,0.012171,0.009865
14,1964-03,0.017232,0.015814,0.015167
15,1964-04,0.006192,0.005297,0.006077


In [32]:
spx_tr = spx.iloc[:,0:2] #SPX Total Return
spx_pr = spx.iloc[:,[0,2]] #SPX Price Return
spx_tr.reset_index(inplace = True, drop = True) 
spx_pr.reset_index(inplace = True, drop = True)

### Rep. Learning Options

DROP MONTHS

Linear PCA

In [107]:
k = 5
pca = PCA(n_components=k)
linpca_newnet = pca.fit_transform(newnetreturns.drop("Month", axis=1))

In [108]:
linpca_newnet.shape

(486, 5)

Kernel PCA

In [109]:
k = 5
poly_kpca = KernelPCA(n_components=k, kernel='poly')
poly_pca_newnet = poly_kpca.fit_transform(newnetreturns.drop("Month", axis=1))

In [110]:
poly_pca_newnet.shape

(486, 5)

In [12]:
k = 5
rbf_kpca = KernelPCA(n_components=k, kernel='rbf')
rbf_pca_newnet = rbf_kpca.fit_transform(newnetreturns.drop("Month", axis=1))

In [13]:
rbf_pca_newnet.shape

(486, 5)

Isomap

In [113]:
k = 5
embedding = Isomap(n_components=k, n_neighbors=5)
isomap_newnet = embedding.fit_transform(newnetreturns.drop("Month", axis=1))

In [114]:
isomap_newnet.shape

(486, 5)