In [49]:
import os
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from datetime import datetime as dt
# Please complete the following three functions

# This function will run OLS regression
# Inputs:
# - regressor(pd.DataFrame): explanatory varaibles ("X") 
# - targets(pd.DataFrame): target variable ("y")
# - annualization(int): number of data points we have per year
# Output:
# - reg(pd.DataFrame): containing the intercept(alpha), regression coefficients (beta) and r-squared of the regression
def ols_regression(regressors, targets, annualization=12):

    # align the targets and regressors on the same dates
    df_aligned = targets.join(regressors, how='inner', lsuffix='y')
    Y = df_aligned[targets.columns]
    X = df_aligned[regressors.columns]
    
    reg = pd.DataFrame(index=targets.columns)
    
    for col in Y.columns:
        y = Y[col]
        model = LinearRegression().fit(X, y)
        reg.loc[col, 'alpha'] = model.intercept_ * annualization
        reg.loc[col, regressors.columns] = model.coef_
        reg.loc[col, 'r-squared'] = model.score(X,y)
    
    return reg

# This function runs time-series regression
# Regress each security's excess return on factors' excess returns
# Inputs:
# - equities(pd.DataFrame): contains monthly returns for 15 securities
# - ff(pd.DataFrame): contains monthly factor returns and risk-free rates
# Outputs:
# - ts_reg(pd.DataFrame): containing the intercept(alpha), 4 regression coefficients (beta) and r-squared of the regression
# - mae(float): Mean Absolute Error of estimated alphas
def time_series_reg(equities, ff):
    
    targets_excess = equities.subtract(ff['RF'],axis=0)
    ff_excess = ff[['MKT','SMB','HML','UMD']].subtract(ff['RF'],axis=0)

    ts_reg = ols_regression(ff_excess,targets_excess)

    mae = np.mean(np.abs(ts_reg['alpha']))
    
    return ts_reg, mae

# This function runs cross-sectional regression
# Regress sample average of each security's excess return on estimated factors' beta
# Inputs:
# - equities(pd.DataFrame): contains monthly returns for 15 securities
# - ff(pd.DataFrame): contains monthly factor returns and risk-free rates
# - ts_reg(pd.DataFrame): contains summary of time-series regression
# Output:
# - cs_reg(pd.DataFrame): containing the intercept(theta), 4 regression coefficients (phi) and r-squared of the regression
def cross_sectional_reg(equities, ff, ts_reg):
    
    aer = pd.DataFrame(index=equities.columns)
    aer['Avg Excess Returns'] = equities.subtract(ff['RF'],axis=0).mean()
    ts_betas = ts_reg[['MKT','SMB','HML','UMD']]
    
    cs_reg = ols_regression(ts_betas,aer[['Avg Excess Returns']]*12)
    
    return cs_reg
def test1(equities, ff):
    ts_reg, mae = time_series_reg(equities, ff)
    fptr.write(ts_reg.head(10).to_string())

def test2(equities, ff):
    ts_reg, mae = time_series_reg(equities, ff)
    fptr.write("Mean Absolute Error: ")
    fptr.write(str(round(mae, 4)))

def test3(equities, ff):
    ts_reg, mae = time_series_reg(equities, ff)
    cs_reg = cross_sectional_reg(equities, ff, ts_reg)
    fptr.write(cs_reg.to_string())
    
if __name__ == '__main__':
    fptr = open(os.environ['OUTPUT_PATH'], 'w')
    tmp = input()
    
    # load monthly industry equity data
    # from 2000-01 to 2021-05 
    equities = []
    colnames = list(map(str, input().rstrip().split('\t')))

    for i in range(257):
        line = list(map(str, input().split('\t')))
        for j in range(16):
            if j == 0:
                line[j] = pd.to_datetime(line[j])
            else:
                line[j] = float(line[j])
        equities.append(line)    

    equities = pd.DataFrame(equities, columns = colnames)
    equities.set_index('Date', inplace = True)
    
    # print(equities)
    
    # load monthly fama french factor data
    # from 2000-01 to 2021-05 
    
    ff = []
    colnames2 = list(map(str, input().rstrip().split('\t')))

    for i in range(258, 515):
        line = list(map(str, input().split('\t')))
        line = line[:6]
        for j in range(6):
            if j == 0:
                line[j] = pd.to_datetime(line[j])
            else:
                line[j] = float(line[j])
        ff.append(line)
    
    ff = pd.DataFrame(ff, columns = colnames2)
    ff.set_index('Date', inplace = True)
    
    # print(ff)
    
    if tmp == '1':
        test1(equities, ff)
    elif tmp == '2':
        test2(equities, ff)
    elif tmp == '3':
        test3(equities, ff)
    else:
        raise RuntimeError('invalid input')

In [50]:
regressors.columns

Index(['AAPL', 'AMZN', 'GS', 'JPM', 'MSFT', 'SPY', 'XLB', 'XLE', 'XLF', 'XLI',
       'XLK', 'XLP', 'XLU', 'XLV', 'XLY'],
      dtype='object')

In [51]:
regressors = pd.read_csv("regressors", sep = "	",engine = 'python',index_col="Date")
regressors.head()

Unnamed: 0_level_0,AAPL,AMZN,GS,JPM,MSFT,SPY,XLB,XLE,XLF,XLI,XLK,XLP,XLU,XLV,XLY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2000-01-31 00:00:00,0.009118,-0.151888,-0.025871,0.044493,-0.161671,-0.049787,-0.122281,0.008074,-0.029586,-0.081794,-0.061485,0.008819,-0.017768,-0.025291,-0.124246
2000-02-29 00:00:00,0.10482,0.066796,0.00955,-0.013168,-0.086845,-0.015227,-0.100469,-0.042334,-0.107046,-0.055173,0.105068,-0.117014,-0.121538,-0.065387,-0.055715
2000-03-31 00:00:00,0.184842,-0.027223,0.137838,0.094977,0.188811,0.096915,0.100893,0.124423,0.182624,0.139207,0.083893,0.037593,0.109237,0.08995,0.141516
2000-04-30 00:00:00,-0.086517,-0.176306,-0.112879,-0.168445,-0.343529,-0.03512,-0.03322,-0.014925,0.009659,0.014445,-0.091847,0.053019,0.065459,-0.011717,-0.020822
2000-05-31 00:00:00,-0.322922,-0.124575,-0.211126,0.035529,-0.103047,-0.015723,-0.031557,0.117424,0.022321,-0.00422,-0.103977,0.071329,-0.002194,-0.026804,-0.05398


In [52]:
targets = pd.read_csv("Targets", sep = "	",engine = 'python')
targets.head()

Unnamed: 0,Date,MKT,SMB,HML,UMD,RF,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15
0,2000-01-31 00:00:00,-0.0433,0.0611,-0.0134,0.0227,0.0041,,,,,,,,,,
1,2000-02-29 00:00:00,0.0288,0.2162,-0.0851,0.1879,0.0043,,,,,,,,,,
2,2000-03-31 00:00:00,0.0567,-0.1635,0.0813,-0.0592,0.0047,,,,,,,,,,
3,2000-04-30 00:00:00,-0.0594,-0.0647,0.0787,-0.0811,0.0046,,,,,,,,,,
4,2000-05-31 00:00:00,-0.0392,-0.0583,0.0551,-0.0858,0.005,,,,,,,,,,


In [53]:
ols_regression(targets,regressors)

ValueError: Found array with 0 sample(s) (shape=(0, 16)) while a minimum of 1 is required.

In [34]:
# This function runs time-series regression
# Regress each security's excess return on factors' excess returns
# Inputs:
# - equities(pd.DataFrame): contains monthly returns for 15 securities
# - ff(pd.DataFrame): contains monthly factor returns and risk-free rates
# Outputs:
# - ts_reg(pd.DataFrame): containing the intercept(alpha), 4 regression coefficients (beta) and r-squared of the regression
# - mae(float): Mean Absolute Error of estimated alphas
def time_series_reg(equities, ff):
    
    targets_excess = equities.subtract(ff['RF'],axis=0)
    ff_excess = ff[['MKT','SMB','HML','UMD']].subtract(ff['RF'],axis=0)

    ts_reg = ols_regression(ff_excess,targets_excess)

    mae = np.mean(np.abs(ts_reg['alpha']))
    
    return ts_reg, mae

In [35]:
time_series_reg(regressors, targets)

(                   AAPL          AMZN            GS           JPM  \
 alpha      4.391120e-32  2.311116e-32 -6.471125e-32  1.016891e-31   
 AAPL       1.000000e+00  8.022223e-16 -7.419992e-16 -1.855059e-17   
 AMZN       5.879780e-16  1.000000e+00 -4.996004e-16  5.551115e-17   
 GS        -4.559531e-16 -2.612966e-16  1.000000e+00 -8.266456e-16   
 JPM       -7.531197e-17 -6.283393e-17 -9.253968e-16  1.000000e+00   
 MSFT       2.978616e-16 -5.638029e-17  2.715912e-17 -4.295190e-16   
 SPY        4.174314e-16  1.937607e-15 -1.133392e-15  2.469867e-15   
 XLB       -1.500786e-16 -4.736607e-16  2.038941e-17  1.742909e-16   
 XLE       -6.823824e-16 -4.889135e-16  1.451752e-16 -2.526250e-16   
 XLF       -3.716560e-16 -1.390004e-16  1.119537e-16 -4.766254e-16   
 XLI       -7.372821e-17 -3.839275e-17  3.411035e-16  2.666192e-16   
 XLK       -2.996747e-16 -1.255090e-15  8.511530e-16 -3.120110e-16   
 XLP       -4.339797e-16 -4.056825e-16 -1.305853e-16  4.690469e-17   
 XLU       -6.375398

In [None]:
# This function runs cross-sectional regression
# Regress sample average of each security's excess return on estimated factors' beta
# Inputs:
# - equities(pd.DataFrame): contains monthly returns for 15 securities
# - ff(pd.DataFrame): contains monthly factor returns and risk-free rates
# - ts_reg(pd.DataFrame): contains summary of time-series regression
# Output:
# - cs_reg(pd.DataFrame): containing the intercept(theta), 4 regression coefficients (phi) and r-squared of the regression
def cross_sectional_reg(equities, ff, ts_reg):
    
    aer = pd.DataFrame(index=equities.columns)
    aer['Avg Excess Returns'] = equities - equities.apply(np.mean)
    ts_betas = ols_regression(aer['Avg Exess Returns'],)
    
    cs_reg = None# -- Your Code -- #
    
    return cs_reg

