# PCA for Algorithmic Trading: Eigen Portfolios

### Loading Libraries

In [1]:
# Numerical Computing
import numpy as np

# Data Manipulation
import pandas as pd

# Data Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Warnings
import warnings

# Scikit-Learn
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale

In [2]:
np.random.seed(42)

sns.set_style('white')

warnings.filterwarnings('ignore')

### Eigenportfolios

#### Data Preparation

In [3]:
idx = pd.IndexSlice

with pd.HDFStore('../../data/assets.h5') as store:
    stocks = store['us_equities/stocks'].marketcap.nlargest(30)
    returns = (store['quandl/wiki/prices']
               .loc[idx['2010': '2018', stocks.index], 'adj_close']
               .unstack('ticker')
               .pct_change())

In [4]:
normed_returns = scale(returns
                       .clip(lower=returns.quantile(q=.025), 
                             upper=returns.quantile(q=.975), 
                             axis=1)
                      .apply(lambda x: x.sub(x.mean()).div(x.std())))

In [5]:
returns = returns.dropna(thresh=int(returns.shape[0] * .95), axis=1)
returns = returns.dropna(thresh=int(returns.shape[1] * .95))

returns.info()

In [6]:
cov = returns.cov()

In [7]:
sns.clustermap(cov);

### Running PCA

In [8]:
# Placing Model
pca = PCA()

# Fitting Model
pca.fit(cov)
pd.Series(pca.explained_variance_ratio_).to_frame('Explained Variance').head().style.format('{:,.2%}'.format)

#### Creating PF Weights from Principal Components

In [9]:
top4 = pd.DataFrame(pca.components_[:4], columns=cov.columns)

eigen_portfolios = top4.div(top4.sum(1), axis=0)
eigen_portfolios.index = [f'Portfolio {i}' for i in range(1, 5)]

### Eigenportfolio Weights

In [10]:
axes = eigen_portfolios.T.plot.bar(subplots=True,
                                   layout=(2, 2),
                                   figsize=(14, 8),
                                   legend=False)

for ax in axes.flatten():
    ax.set_ylabel('Portfolio Weight')
    ax.set_xlabel('')
sns.despine()
plt.tight_layout()
plt.show()

### Eigenportfolio Performance

In [11]:
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(14, 6), sharex=True)

axes = axes.flatten()
returns.mean(1).add(1).cumprod().sub(1).plot(title='The Market', ax=axes[0])

for i in range(3):
    rc = returns.mul(eigen_portfolios.iloc[i]).sum(1).add(1).cumprod().sub(1)
    rc.plot(title=f'Portfolio {i+1}', ax=axes[i+1], lw=1, rot=0)

for i in range(4):
    axes[i].set_xlabel('')
sns.despine()
fig.tight_layout()
plt.show()