### Portfolio Factor Analysis

In [14]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA, KernelPCA
from sklearn.manifold import Isomap

Read in Excel file and specifically import net returns data

In [3]:
xls=pd.ExcelFile('Simple_Strategies_Returns.xlsx')
netreturns = pd.read_excel(xls, 1)

Split data into 'new' and 'old' sections:  
'Old' section has 120 months of 26 factors  
'New' section has 486 months of 34 factors

In [4]:
oldnetreturns = netreturns[netreturns.Month < 197307]
oldnetreturns = oldnetreturns.reset_index()
oldnetreturns = oldnetreturns.dropna(axis=1)

In [5]:
oldnetreturns.shape

(120, 26)

In [6]:
newnetreturns = netreturns[netreturns.Month >= 197307]
newnetreturns = newnetreturns.reset_index()

In [7]:
newnetreturns.shape

(486, 34)

TODO:  
- Import S&P 500 returns  
- Import Fama-French data
- Build dimensionality reduction pipeline (to try different methods)
- Build supervised learning pipeline (i.e., timeseries regression of reduced data on S&P returns)

### Rep. Learning Options

Linear PCA

In [8]:
k = 5
pca = PCA(n_components=k)
linpca_newnet = pca.fit_transform(newnetreturns)

In [9]:
linpca_newnet.shape

(486, 5)

Kernel PCA

In [10]:
k = 5
poly_kpca = KernelPCA(n_components=k, kernel='poly')
poly_pca_newnet = poly_kpca.fit_transform(newnetreturns)

In [11]:
poly_pca_newnet.shape

(486, 5)

In [12]:
k = 5
rbf_kpca = KernelPCA(n_components=k, kernel='rbf')
rbf_pca_newnet = rbf_kpca.fit_transform(newnetreturns)

In [13]:
rbf_pca_newnet.shape

(486, 5)

Isomap

In [15]:
k = 5
embedding = Isomap(n_components=k, n_neighbors=5)
isomap_newnet = embedding.fit_transform(newnetreturns)

In [16]:
isomap_newnet.shape

(486, 5)