# Principal Component Analysis as an Example of Factor Model

In [5]:
import math
import numpy as np
import pandas as pd
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression
from sklearn.multioutput import MultiOutputRegressor
from sklearn.decomposition import PCA

In [6]:
lookback = 252  # trading period for factor exposure
numFactors = 5
topN = 50  # for trading strategy, long stocks with topN expected 1-day returns

In [7]:
df = pd.read_table('./example_code/IJR_20080114.txt')

In [8]:
df['Date'] = df['Date'].astype('int')

In [9]:
df.set_index('Date', inplace=True)

In [10]:
df.sort_index(inplace=True)

In [11]:
df.fillna(method='ffill', inplace=True)

### note the rows of dailyret are the observations at different time periods

In [12]:
dailyret = df.pct_change()

In [13]:
positionsTable = np.zeros(df.shape)

In [14]:
end_index = df.shape[0]

In [16]:
for t in np.arange(lookback + 1, end_index):
    R = dailyret.iloc[t - lookback + 1:t, ].T  # here the columns of R are the different observations.
    hasData = np.where(R.notna().all(axis=1))[0]
    R.dropna(inplace=True)  # avoid any stocks with missing returns
    pca = PCA()
    X = pca.fit_transform(R.T)[:, :numFactors]
    X = sm.add_constant(X)
    y1 = R.T
    clf = MultiOutputRegressor(LinearRegression(fit_intercept=False), n_jobs=4).fit(X, y1)
    Rexp = np.sum(clf.predict(X), axis=0) # Rexp is the expected return for next period assuming factor returns remain constant
    R = dailyret.iloc[t - lookback + 1:t + 1, ].T  # here the columns of R are the different observations.

    idxSort = Rexp.argsort()

    positionsTable[t, hasData[idxSort[np.arange(0, topN)]]] = -1 # short topN stocks with lowest expected returns
    # positionsTable[t, hasData[idxSort[np.arange(-topN,0)]]]=1
    positionsTable[t, hasData[idxSort[np.arange(-topN, -1)]]] = 1 # buy topN stocks with highest expected returns


In [17]:
capital = np.nansum(np.array(abs(pd.DataFrame(positionsTable)).shift()), axis=1)

In [18]:
positionsTable[capital == 0,] = 0

In [19]:
capital[capital == 0] = 1

In [21]:
ret = np.nansum(np.array(pd.DataFrame(positionsTable).shift()) * np.array(dailyret), axis=1) / capital # compute daily returns of trading strategy

In [22]:
avgret = np.nanmean(ret) * 252

In [23]:
avgstdev = np.nanstd(ret) * math.sqrt(252)

In [24]:
Sharpe = avgret / avgstdev

In [28]:
(avgret, avgstdev, Sharpe)

(0.04052422056844461, 0.07002908500498846, 0.5786769963588401)