# Equity PCA Factor Models

In [36]:
from scipy import stats
import pandas as pd
import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.io as pio
import statsmodels.api as sm

import pathlib
import sys
utils_path = pathlib.Path().absolute().parent.parent
sys.path.append(utils_path.__str__())
import utils.layout as lay
from utils.functions import PCA

In [10]:
pio.templates.default = 'simple_white+blog_mra'

### Case study

In [17]:
df = pd.read_excel(r"data/Case Study_II.2_PCA Equity Factor Model.xls", sheet_name="DJIA Prices").set_index("Date")
ret = np.log(df/df.shift())[1:]
cov_mx = ret.cov()

In [15]:
fig = px.line(df/df.values[0])
fig.update_layout(legend_title="", title_text="30 DJIA 01/05 - 04/06", yaxis_title="Price")
fig

In [156]:
eigenvals, eigenvecs = PCA(cov_mx)
eigenvecs = eigenvecs*-1
princ_comp = ret.dot(eigenvecs)

In [157]:
eigenvals

Unnamed: 0,λ1,λ2,λ3,λ4,λ5,λ6,λ7,λ8,λ9,λ10,...,λ21,λ22,λ23,λ24,λ25,λ26,λ27,λ28,λ29,λ30
Eignevalue,0.001318,0.000664,0.000309,0.00028,0.000197,0.000176,0.00015,0.000147,0.000134,0.000128,...,8.3e-05,7.9e-05,3.9e-05,4.3e-05,4.6e-05,4.9e-05,5.4e-05,7.1e-05,6e-05,6.6e-05
Variation Explained,0.273412,0.137797,0.064201,0.058135,0.040959,0.036503,0.031185,0.030493,0.027761,0.026507,...,0.012434,0.011223,0.010164,0.009537,0.008997,0.008172,0.006707,0.005948,0.005376,0.003896
Cumulative Variation,0.273412,0.411209,0.47541,0.533546,0.574505,0.611008,0.642193,0.672686,0.700447,0.726954,...,0.92998,0.941202,0.951366,0.960904,0.969901,0.978073,0.98478,0.990728,0.996104,1.0


In [158]:
fig =  px.line(princ_comp.loc[:, :"λ5"])
fig.update_layout(legend_title="", title_text="5 Principal Components", yaxis_title="Value")
fig

In [168]:
coefs = []
df_coefs = pd.DataFrame(index=["intercept"].extend(princ_comp.columns[:5]))

tstats = []
df_tstats = pd.DataFrame(index=["intercept"].extend(princ_comp.columns[:5]))

df_r_sq = pd.Series(index=ret.columns)


for stock in ret.columns:
    X = sm.add_constant(princ_comp[princ_comp.columns[:5]])
    reg = sm.OLS(ret[stock], X).fit()
    
    df_coefs[stock] = reg.params
    df_tstats[stock] = reg.tvalues
    df_r_sq[stock] = reg.rsquared
    
    coefs = []
    tstats = []
    
df_coefs = df_coefs.T
df_tstats = df_tstats.T

$$ Betas $$ 
$$\begin{array}{lrrrrrr}
\hline
 & const & λ1 & λ2 & λ3 & λ4 & λ5 \\
\hline
AA & -0.00 & -0.20 & -0.10 & -0.29 & -0.14 & -0.42 \\
AIG & -0.00 & -0.17 & -0.11 & 0.06 & 0.17 & 0.26 \\
ATT & 0.00 & -0.12 & -0.04 & 0.02 & 0.05 & 0.03 \\
AXP & 0.00 & -0.18 & -0.05 & -0.06 & 0.08 & 0.10 \\
BA & 0.00 & -0.16 & -0.12 & -0.06 & 0.03 & 0.12 \\
... & ... & ... & ... & ... & ... & \\
\hline
\end{array}$$

$$ T-Stats$$ 
$$\begin{array}{lrrrrrr}
\hline
 & const & λ1 & λ2 & λ3 & λ4 & λ5 \\
\hline
AA & -0.94 & -14.92 & -5.11 & -10.41 & -4.64 & -12.11 \\
AIG & -0.31 & -10.94 & -5.32 & 2.05 & 5.21 & 6.52 \\
ATT & 0.05 & -11.19 & -2.53 & 0.98 & 2.04 & 0.98 \\
AXP & 0.12 & -15.00 & -3.04 & -2.51 & 2.88 & 3.12 \\
BA & 2.23 & -9.24 & -5.02 & -1.74 & 0.75 & 2.61 \\
... & ... & ... & ... & ... & ... & \\
\hline
\end{array}
$$

In [169]:
fig = px.bar(df_r_sq.sort_values(ascending=True), orientation='h')
fig.update_layout(showlegend=False, height=800, title_text="Regression R-Squared", 
                  xaxis_title="R2", yaxis_title="")
fig.show()

In [329]:
var_princ_comp = princ_comp.loc[:, :"λ5"].var()
vol_princ_comp = (var_princ_comp*250)**0.5

In [188]:
#systematic variance
sys_var = ((df_coefs.loc[:, "λ1":]**2).dot(var_princ_comp)) *250
sys_risk = sys_var
#total variance
tot_var = ret.var() *250
tot_risk = tot_var
#residual variance
res_var = (tot_var - sys_var) 
res_risk = res_var

In [221]:
var_decomp = pd.DataFrame([sys_var,  res_var], index=["Systematic", "Specific"]).T
var_decomp_stack = pd.DataFrame(var_decomp.unstack().reset_index())
var_decomp_stack.columns=["Type", "Stock", "Variance"]
var_decomp_stack  = var_decomp_stack.sort_values(by="Variance", ascending=False)

In [223]:
fig = px.bar(var_decomp_stack, x="Variance", y="Stock", color='Type', orientation='h', 
             title='Variance Decomposition')
fig.update_layout(height=800, legend_title="")
fig.show()

### (i) Arbitrary Portfolio - Input Amounts

In [331]:
weights = pd.read_excel(r"data/Case Study_II.2_PCA Equity Factor Model.xls", sheet_name="Ex_II.2.9", skiprows=17).iloc[:2, :]
weights = weights.set_index(weights.columns[0])
weights.columns = list(ret.columns)
weights.index.name = "Stock"
weights = weights.T

In [332]:
fig = px.bar(weights, x=weights.index, y="Weight", title="Arbitrary Long-Short Portfolio")
fig.update_layout(xaxis_title="")
fig.show()

$$
\begin{array}{lllrrrrrrrrrrrrrrrrrrrrrrrrrrrr}
\hline
 & AA & AIG & ATT & AXP & BA & CAT & CITI & DD & DIS & GE & GM & HD & HON & HP & IBM & INT & JNJ & JPM & KO & MCD & MMM & MO & MRK & MSFT & PFE & PG & UTX & VZ & WM & XON \\
Type &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  \\
\hline
Amount ($) & 100 & 0 & 0.00 & -50.00 & 0.00 & 0.00 & 75.00 & 0.00 & 40.00 & 0.00 & 80.00 & -40.00 & -20.00 & 90.00 & 0.00 & -15.00 & 0.00 & 35.00 & 0.00 & 25.00 & -10.00 & 10.00 & 0.00 & 25.00 & 0.00 & 45.00 & -75.00 & 0.00 & 0.00 & 30.00 \\
Weight & 0.29 & 0 & 0.00 & -0.14 & 0.00 & 0.00 & 0.22 & 0.00 & 0.12 & 0.00 & 0.23 & -0.12 & -0.06 & 0.26 & 0.00 & -0.04 & 0.00 & 0.10 & 0.00 & 0.07 & -0.03 & 0.03 & 0.00 & 0.07 & 0.00 & 0.13 & -0.22 & 0.00 & 0.00 & 0.09 \\
\hline
\end{array}$$

In [333]:
ptf_sens_perc = df_coefs.T.dot(weights["Weight"])
ptf_sens_doll = df_coefs.T.dot(weights["Amount ($)"])
ptf_sens_all = pd.DataFrame([ptf_sens_perc, ptf_sens_doll], index=["%", "$"]).T

$$ Portfolio Sensitivities $$
$$\begin{array}{lrr} \hline
 & \% & $ \\ \hline
const & -0.00 & -0.02 \\
λ1 & 0.21 & 73.84 \\
λ2 & -0.16 & -55.78 \\
λ3 & -0.10 & -33.28 \\
λ4 & -0.08 & -27.76 \\
λ5 & 0.19 & 0.68 \\
\hline
\end{array}$$

### (ii) Equal Weight Portfolio

In [334]:
weights = pd.read_excel(r"data/Case Study_II.2_PCA Equity Factor Model.xls", sheet_name="Ex_II.2.9", skiprows=36).iloc[:2, :]
weights = weights.set_index(weights.columns[0])
weights.columns = list(ret.columns)
weights.index.name = "Stock"
weights = weights.T

$$
\begin{array}{lllllllllllllllllllllllllllllll}
\hline
 & AA & AIG & ATT & AXP & BA & CAT & CITI & DD & DIS & GE & GM & HD & HON & HP & IBM & INT & JNJ & JPM & KO & MCD & MMM & MO & MRK & MSFT & PFE & PG & UTX & VZ & WM & XON \\
Stock &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  \\
\hline
Amount ($) & 1 & 1 & 1.00 & 1.00 & 1.00 & 1.00 & 1.00 & 1.00 & 1.00 & 1.00 & 1.00 & 1.00 & 1.00 & 1.00 & 1.00 & 1.00 & 1.00 & 1.00 & 1.00 & 1.00 & 1.00 & 1.00 & 1.00 & 1.00 & 1.00 & 1.00 & 1.00 & 1.00 & 1.00 & 1.00 \\
Weight & 0.03 & 0.03 & 0.03 & 0.03 & 0.03 & 0.03 & 0.03 & 0.03 & 0.03 & 0.03 & 0.03 & 0.03 & 0.03 & 0.03 & 0.03 & 0.03 & 0.03 & 0.03 & 0.03 & 0.03 & 0.03 & 0.03 & 0.03 & 0.03 & 0.03 & 0.03 & 0.03 & 0.03 & 0.03 & 0.03 \\
\hline
\end{array}$$

In [335]:
ptf_sens_perc = df_coefs.T.dot(weights["Weight"])
ptf_sens_doll = df_coefs.T.dot(weights["Amount ($)"])
ptf_sens_all = pd.DataFrame([ptf_sens_perc, ptf_sens_doll], index=["%", "$"]).T

$$PortfolioSensitivities$$ 
$$
\begin{array}{lrr}
\hline
 & \% & $ \\
\hline
const & -0.00 & -0.00 \\
λ1 & 0.17 & 5.19 \\
λ2 & 0.04 & 1.35 \\
λ3 & -0.01 & -0.26 \\
λ4 & -0.00 & -0.03 \\
λ5 & -0.01 & -0.43 \\
\hline
\end{array}$$

### (iii) DJIA Portfolio (1 Share in Each Stock)

In [336]:
weights = pd.read_excel(r"data/Case Study_II.2_PCA Equity Factor Model.xls", sheet_name="Ex_II.2.9", skiprows=54).iloc[:2, :]
weights = weights.set_index(weights.columns[0])
weights.columns = list(ret.columns)
weights.index.name = "Stock"
weights = weights.T

In [337]:
fig = px.bar(weights, x=weights.index, y="Weight", title="DIJA Index Portfolio")
fig.update_layout(xaxis_title="")
fig.show()

$$\begin{array}{lllllllllllllllllllllllllllllll}
\hline
 & AA & AIG & ATT & AXP & BA & CAT & CITI & DD & DIS & GE & GM & HD & HON & HP & IBM & INT & JNJ & JPM & KO & MCD & MMM & MO & MRK & MSFT & PFE & PG & UTX & VZ & WM & XON \\
Stock &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  \\
\hline
Amount ($) & 35.39 & 63.81 & 25.35 & 52.25 & 86.46 & 77.87 & 48.01 & 44.61 & 27.02 & 33.97 & 21.79 & 40.28 & 43.55 & 32.96 & 81.66 & 19.06 & 58.37 & 42.50 & 41.85 & 34.60 & 85.06 & 70.54 & 34.74 & 27.15 & 24.87 & 56.34 & 64.22 & 32.52 & 45.82 & 65.00 \\
Weight & 0.02 & 0.05 & 0.02 & 0.04 & 0.06 & 0.05 & 0.03 & 0.03 & 0.02 & 0.02 & 0.02 & 0.03 & 0.03 & 0.02 & 0.06 & 0.01 & 0.04 & 0.03 & 0.03 & 0.02 & 0.06 & 0.05 & 0.02 & 0.02 & 0.02 & 0.04 & 0.05 & 0.02 & 0.03 & 0.05 \\
\hline
\end{array}$$

In [323]:
ptf_sens_perc = df_coefs.T.dot(weights["Weight"])
ptf_sens_doll = df_coefs.T.dot(weights["Amount ($)"])
ptf_sens_all = pd.DataFrame([ptf_sens_perc, ptf_sens_doll], index=["%", "$"]).T

$$PortfolioSensitivities$$
$$
\begin{array}{lrr}
\hline
 & \% & $ \\
\hline
const & 0.00 & 0.07 \\
λ1 & 0.17 & 238.82 \\
λ2 & 0.07 & 92.69 \\
λ3 & 0.02 & 28.69 \\
λ4 & -0.01 & -19.38 \\
λ5 & -1.04 & -14.77 \\
\hline
\end{array}$$