In [57]:
# import standard libraries
import copy
# import third-party libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pandas.tseries.offsets import MonthEnd
from sklearn.linear_model import LinearRegression
import wrds
from IPython.display import display
# import local libraries

In [58]:
class CustomRegression(LinearRegression):
    
    def __init__(self, labels, target, excess):
        super().__init__()
        self.labels = labels
        self.target = target
        self.excess = excess

    def runRegression(self):
        self.model = self.fit(self.excess[self.labels], self.excess[self.target])
        self.beta = list(self.model.coef_)
        self.alpha = self.model.intercept_
        
    def computeMetrics(self):
        target_est = self.model.predict(self.excess[self.labels])
        target = self.excess[self.target].values
        res = target - target_est
        ss_r = sum(res**2)
        ss_t = sum((target - np.mean(target))**2)
        
        # compute t-statistics
        X = self.excess[self.labels]
        X = pd.DataFrame({"Constant":np.ones(len(X))}).join(pd.DataFrame(X.reset_index(drop=True)))
        mse = ss_r/(len(X) - len(X.columns))    
        var_b = mse*(np.linalg.inv(np.dot(X.T, X)).diagonal())
        params = np.append(self.alpha, self.beta)
        self.t_stats = params/ np.sqrt(var_b)
        
        # r squared
        self.r2 = 1 - ss_r/ss_t
        
        # information ratio
        sigma_res = np.std(res)
        self.ir = self.alpha/sigma_res * np.sqrt(12)
        
    def __str__(self):
        str1 = '\u03B1 = {0:.3f}'.format(self.alpha)
        str2 = 't_stat = {0:.3f}'.format(self.t_stats[0])
        str3 = '{}'.format(', '.join(['\u03B2{0} = {1:.3f}'.format(j+1, b) for j, b in enumerate(self.beta)]))
        str4 = '{}'.format(', '.join(['t_stat{0} = {1:.3f}'.format(j+1, b) for j, b in enumerate(self.t_stats[1:])]))
        str5 = 'R2 = {0:.3f}'.format(self.r2)
        str6 = 'IR = {0:.3f}'.format(self.ir)
        return str1 + '\n' + str2 + '\n' + str3 + '\n' + str4 + '\n' + str5 + '\n' + str6
        

In [59]:
start = '1976-01-01'
end = '2019-12-31'

In [60]:
DOWNLOAD = False
if DOWNLOAD:
        
    # connect to databse and download csv files (run once)
    db = wrds.Connection(wrds_username = 'wmartin')
    db.create_pgpass_file() # run once

    # download brk data  
    brk = db.raw_sql("select date, ret"
                     " from crsp.msf "
                     "where permno in (17778) "
                     "and date>='{}' and date<='{}'".format(start, end))

    # index to datetime
    brk = brk.set_index('date')

    # write to csv
    brk.to_csv('brk.csv')
        
else:
        
    brk = pd.read_csv('brk.csv', index_col = 'date')

In [61]:
# read csv fama-french and mom
mom = pd.read_csv('F-F_Momentum_Factor.csv', skiprows = 12, index_col = 0)
mom = mom.loc[:'201912']
mom = mom.rename(columns = {'Mom   ': 'Mom'})
fama = pd.read_csv('F-F_Research_Data_5_Factors_2x3.csv', skiprows = 3, index_col = 0)
fama = fama.loc[:'201912']

# set index to datetime
brk.index = pd.to_datetime(brk.index)
# convert datetimes to end of month
brk.index = brk.index.to_period('M').to_timestamp('M')
# set index of fama website to datetime (end of month)
mom.index = (pd.to_datetime(mom.index, format = '%Y%m')+MonthEnd(1))
fama.index = (pd.to_datetime(fama.index, format = '%Y%m')+MonthEnd(1))

# convert all cells to float in one big dataframe
df = pd.concat([brk, fama, mom], axis = 1)
df = df.astype(float)

In [66]:
def runExercise(start, end):
    print('From {} to {}\n'.format(start, end))
    # clean and truncate data
    data = df.loc[start:end]
    # do a forward fill
    data = data.fillna(method = 'ffill')
    # drop rows before brk really starts
    data = data.dropna()
    # mom and fama data are already in percentages (divide by 100)
    data[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'RF', 'Mom']] /= 100

    # (b)
    # compute excess dataframe (we need excess returns)
    excess = copy.deepcopy(data[['ret', 'Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'Mom']])
    excess['ret'] = excess['ret'].sub(data['RF'])
    # risk free rate
    rf = data[['RF']]

    # compute annualized metrics of excess and risk-free
    ann_mean_excess = excess.mean()*12
    print('Annualized mean of excess returns:')
    print(ann_mean_excess)
    ann_std_excess = excess.std()*np.sqrt(12)
    print('\nAnnualized std of excess returns:')
    print(ann_std_excess)

    ann_mean_rf = rf.mean()*12
    print('\nAnnualized mean of riskless asset:')
    print(ann_mean_rf)
    ann_std_rf = rf.std()*np.sqrt(12)
    print('\nAnnualized std of riskless asset:')
    print(ann_std_rf)

    ann_sr_excess = (ann_mean_excess - ann_mean_rf.values[0])/ann_std_excess    
    print('\nAnnualized sharpe ratio of excess returns:')
    print(ann_sr_excess)
    ann_sr_rf = (ann_mean_rf - ann_mean_rf.values[0])/ann_std_rf
    print('\nAnnualized sharpe ratio of riskless asset:')
    print(ann_sr_rf)

    # (c)
    # create regression models
    regr1 = CustomRegression(['Mkt-RF'], 'ret', excess)
    regr2 = CustomRegression(['Mkt-RF', 'SMB', 'HML'], 'ret', excess)
    regr3 = CustomRegression(['Mkt-RF', 'SMB', 'HML', 'Mom'], 'ret', excess)
    regr4 = CustomRegression(['Mkt-RF', 'SMB', 'HML', 'Mom', 'RMW', 'CMA'], 'ret', excess)

    regr_all = [regr1, regr2, regr3, regr4]

    print()
    
    for i, regressor in enumerate(regr_all):
        regressor.runRegression()
        regressor.computeMetrics()
        print('Regressor {}'.format(i+1))
        print(regressor)
        print()

In [67]:
# (b-c)
runExercise(start, end)

# (h)
runExercise('1976-01-01', '1995-12-31')

From 1976-01-01 to 2019-12-31

Annualized mean of excess returns:
ret       0.185343
Mkt-RF    0.078565
SMB       0.024547
HML       0.027563
RMW       0.040712
CMA       0.030433
Mom       0.075058
dtype: float64

Annualized std of excess returns:
ret       0.231796
Mkt-RF    0.151624
SMB       0.099633
HML       0.099377
RMW       0.078987
CMA       0.067400
Mom       0.151413
dtype: float64

Annualized mean of riskless asset:
RF    0.043717
dtype: float64

Annualized std of riskless asset:
RF    0.010153
dtype: float64

Annualized sharpe ratio of excess returns:
ret       0.610997
Mkt-RF    0.229836
SMB      -0.192405
HML      -0.162549
RMW      -0.038040
CMA      -0.197082
Mom       0.206992
dtype: float64

Annualized sharpe ratio of riskless asset:
RF    0.0
dtype: float64

Regressor 1
α = 0.011
t_stat = 4.121
β1 = 0.684
t_stat1 = 11.363
R2 = 0.200
IR = 0.635

Regressor 2
α = 0.009
t_stat = 3.659
β1 = 0.810, β2 = -0.245, β3 = 0.502
t_stat1 = 13.115, t_stat2 = -2.683, t_stat3 = 5.4