# FAMA & Macbeth - How to build a linear factor model

### Loading Libraries

In [7]:
# Numerical Computing
import numpy as np

# Data Manipulation
import pandas as pd
import pandas_datareader.data as web


# StatsModels
from statsmodels.api import OLS, add_constant

# Linear Model
from linearmodels.asset_pricing import LinearFactorModel

# Data Visualization
import seaborn as sns
import matplotlib.pyplot as plt

# Warnings
import warnings

In [8]:
sns.set_style('whitegrid')

warnings.filterwarnings('ignore')

### Getting Data

#### Risk Factors

In [9]:
ff_factor = 'F-F_Research_Data_5_Factors_2x3'
ff_factor_data = web.DataReader(ff_factor, 'famafrench', start='2010', end='2017-12')[0]

ff_factor_data.info()

In [10]:
ff_factor_data.describe()

#### Portfolios

In [11]:
ff_portfolio = '17_Industry_Portfolios'
ff_portfolio_data = web.DataReader(ff_portfolio, 'famafrench', start='2010', end='2017-12')[0]

ff_portfolio_data = ff_portfolio_data.sub(ff_factor_data.RF, axis=0)
ff_portfolio_data.info()

In [16]:
ff_portfolio_data.describe()

#### Equity Data

In [17]:
with pd.HDFStore('../data/assets.h5') as store:
    prices = store['/quandl/wiki/prices'].adj_close.unstack().loc['2010':'2017']
    equities = store['/us_equities/stocks'].drop_duplicates()

In [18]:
sectors = equities.filter(prices.columns, axis=0).sector.to_dict()

prices = prices.filter(sectors.keys()).dropna(how='all', axis=1)

In [19]:
returns = prices.resample('M').last().pct_change().mul(100).to_period('M')
returns = returns.dropna(how='all').dropna(axis=1)

returns.info()

#### Aligning Data

In [20]:
ff_factor_data = ff_factor_data.loc[returns.index]
ff_portfolio_data = ff_portfolio_data.loc[returns.index]

In [21]:
ff_factor_data.describe()

#### Compute Excess Returns

In [22]:
excess_returns = returns.sub(ff_factor_data.RF, axis=0)
excess_returns.info()

In [23]:
excess_returns = excess_returns.clip(lower=np.percentile(excess_returns, 1),
                                     upper=np.percentile(excess_returns, 99))

### Fama-Macbeth Regression

In [24]:
ff_portfolio_data.info()

In [25]:
ff_factor_data = ff_factor_data.drop('RF', axis=1)
ff_factor_data.info()

#### Step 1: Factor Exposures

In [26]:
betas = []

for industry in ff_portfolio_data:
    step1 = OLS(endog=ff_portfolio_data.loc[ff_factor_data.index, industry], 
                exog=add_constant(ff_factor_data)).fit()
    betas.append(step1.params.drop('const'))

In [27]:
betas = pd.DataFrame(betas, 
                     columns=ff_factor_data.columns, 
                     index=ff_portfolio_data.columns)

betas.info()

#### Step 2: Risk Premia

In [28]:
lambdas = []

for period in ff_portfolio_data.index:
    step2 = OLS(endog=ff_portfolio_data.loc[period, betas.index], 
                exog=betas).fit()
    lambdas.append(step2.params)

In [29]:
lambdas = pd.DataFrame(lambdas, 
                       index=ff_portfolio_data.index,
                       columns=betas.columns.tolist())
lambdas.info()

In [30]:
lambdas.mean().sort_values().plot.barh(figsize=(12, 4))
sns.despine()
plt.tight_layout();
plt.show

In [31]:
t = lambdas.mean().div(lambdas.std())
t

#### Results

In [32]:
window = 24  # months
ax1 = plt.subplot2grid((1, 3), (0, 0))
ax2 = plt.subplot2grid((1, 3), (0, 1), colspan=2)
lambdas.mean().sort_values().plot.barh(ax=ax1)
lambdas.rolling(window).mean().dropna().plot(lw=1,
                                             figsize=(14, 5),
                                             sharey=True,
                                             ax=ax2)
sns.despine()
plt.tight_layout()
plt.show()

In [33]:
window = 24  # months
lambdas.rolling(window).mean().dropna().plot(lw=2,
                                             figsize=(14, 7),
                                             subplots=True,
                                             sharey=True)
sns.despine()
plt.tight_layout()

#### Fama-Macbeth with The `LinearModels` Library

In [34]:
mod = LinearFactorModel(portfolios=ff_portfolio_data, 
                        factors=ff_factor_data)
res = mod.fit()
print(res)

In [35]:
print(res.full_summary)

In [36]:
lambdas.mean()