In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm

pd.options.mode.chained_assignment = None

## Read risk factors

In [2]:
factors_path = r'C:\Users\behnood\Desktop\Thesis\TSETMC\Factor Model'\
    r'\Factors\all_factors.xlsx'
factors_df = pd.read_excel(factors_path)
factors_df['month'] = factors_df['month'].str.replace('-', '')

## Estimate regressions and write reqgressions summaries

In [6]:
# Estimate, and write regression summaries
# REMEMBER: change measure in file path
path = r'C:\Users\behnood\Desktop\Thesis\TSETMC\Portfolio Analysis'\
    r'\new_new_spread_portsort.xlsx'
df = pd.read_excel(path, dtype={'month': str})

measures = df['factor'].unique().tolist()

rf_path = r'C:\Users\behnood\Desktop\Thesis\TSETMC\Factor Model'\
    r'\Factors\first_factor.xlsx'
rf_df = pd.read_excel(rf_path, usecols=[0, 2])
rf_df['month'] = rf_df['month'].str.replace('-', '')

df = pd.merge(df, rf_df, on='month')

df['Q1'] = df['Q1'] - df['Rf']
df['Q2'] = df['Q2'] - df['Rf']
df['Q3'] = df['Q3'] - df['Rf']
df['Q4'] = df['Q4'] - df['Rf']
df['Q5'] = df['Q5'] - df['Rf']
df['Q5-Q1'] = df['Q5'] - df['Q1']
df.drop(columns=['Rf'], inplace=True)

quantiles = df.columns[2:]

for measure in measures:
    df2 = df.loc[df['factor'] == measure].reset_index(drop=True)
    for quantile in quantiles:
        y = df2[quantile]
        # CAPM
        x = factors_df['MKT']
        x2 = sm.add_constant(x)
        result = sm.OLS(y, x2).fit()
        with open(f'reg_{measure}_{quantile}_capm.txt', 'w') as fh:
            fh.write(result.summary2(float_format="%.6f").as_text())
        # Fama-French
        x = factors_df[['MKT', 'SMB', 'HML']]
        x2 = sm.add_constant(x)
        result = sm.OLS(y, x2).fit()
        with open(f'reg_{measure}_{quantile}_3f.txt', 'w') as fh:
            fh.write(result.summary2(float_format="%.6f").as_text())
        # Carhart
        x = factors_df[['MKT', 'SMB', 'HML', 'UMD']]
        x2 = sm.add_constant(x)
        result = sm.OLS(y, x2).fit()
        with open(f'reg_{measure}_{quantile}_4f.txt', 'w') as fh:
            fh.write(result.summary2(float_format="%.6f").as_text())
        # PS
        x = factors_df[['MKT', 'SMB', 'HML', 'UMD', 'LIQ']]
        x2 = sm.add_constant(x)
        result = sm.OLS(y, x2).fit()
        with open(f'reg_{measure}_{quantile}_5f.txt', 'w') as fh:
            fh.write(result.summary2(float_format="%.6f").as_text())
    # final_df.to_excel(f'reg_{factor}.xlsx', index=False)

## Estimating alphas

In [7]:
# Estimate, and write regression alphas
# REMEMBER: change measure in file path
path = r'C:\Users\behnood\Desktop\Thesis\TSETMC\Portfolio Analysis'\
    r'\new_new_spread_portsort.xlsx'
df = pd.read_excel(path, dtype={'month': str})

measures = df['factor'].unique().tolist()

rf_path = r'C:\Users\behnood\Desktop\Thesis\TSETMC\Factor Model'\
    r'\Factors\first_factor.xlsx'
rf_df = pd.read_excel(rf_path, usecols=[0, 2])
rf_df['month'] = rf_df['month'].str.replace('-', '')

df = pd.merge(df, rf_df, on='month')

df['Q1'] = df['Q1'] - df['Rf']
df['Q2'] = df['Q2'] - df['Rf']
df['Q3'] = df['Q3'] - df['Rf']
df['Q4'] = df['Q4'] - df['Rf']
df['Q5'] = df['Q5'] - df['Rf']
df['Q5-Q1'] = df['Q5'] - df['Q1']
df.drop(columns=['Rf'], inplace=True)

quantiles = df.columns[2:]
main_list = []
for measure in measures:
    df2 = df.loc[df['factor'] == measure].reset_index(drop=True)
    model = LinearRegression()
    temp_list = []
    reg_dict = dict()
    for quantile in quantiles:
        y = df2[quantile]
        # CAPM
        x = factors_df[['MKT']]
        model.fit(x, y)
        alpha_capm = model.intercept_
        # Fama-French
        x = factors_df[['MKT', 'SMB', 'HML']]
        model.fit(x, y)
        alpha_ff = model.intercept_
        # Carhart
        x = factors_df[['MKT', 'SMB', 'HML', 'UMD']]
        model.fit(x, y)
        alpha_carhart = model.intercept_
        # PS
        x = factors_df[['MKT', 'SMB', 'HML', 'UMD', 'LIQ']]
        model.fit(x, y)
        alpha_ps = model.intercept_
        reg_dict[quantile] = [alpha_capm, alpha_ff, alpha_carhart, alpha_ps]
        temp_list.append(reg_dict)
    reg_df = pd.DataFrame.from_dict(reg_dict)
    reg_df.to_excel(f'alpha_{measure}.xlsx', index=False)

## Estimating t-statistics

In [8]:
# Calculate, and write regression alphas T stat
# REMEMBER: change measure in file path
path = r'C:\Users\behnood\Desktop\Thesis\TSETMC\Portfolio Analysis'\
    r'\new_new_spread_portsort.xlsx'
df = pd.read_excel(path, dtype={'month': str})

measures = df['factor'].unique().tolist()

rf_path = r'C:\Users\behnood\Desktop\Thesis\TSETMC\Factor Model'\
    r'\Factors\first_factor.xlsx'
rf_df = pd.read_excel(rf_path, usecols=[0, 2])
rf_df['month'] = rf_df['month'].str.replace('-', '')

df = pd.merge(df, rf_df, on='month')

df['Q1'] = df['Q1'] - df['Rf']
df['Q2'] = df['Q2'] - df['Rf']
df['Q3'] = df['Q3'] - df['Rf']
df['Q4'] = df['Q4'] - df['Rf']
df['Q5'] = df['Q5'] - df['Rf']
df['Q5-Q1'] = df['Q5'] - df['Q1']
df.drop(columns=['Rf'], inplace=True)

quantiles = df.columns[2:]
main_list = []
for measure in measures:
    df2 = df.loc[df['factor'] == measure].reset_index(drop=True)
    model = LinearRegression()
    temp_list = []
    reg_dict = dict()
    for quantile in quantiles:
        y = df2[quantile]
        # CAPM
        x = factors_df[['MKT']]
        x2 = sm.add_constant(x)
        result = sm.OLS(y, x2).fit()
        t_alpha_capm = result.summary2().tables[1]['t'][0]
        # Fama French
        x = factors_df[['MKT', 'SMB', 'HML']]
        x2 = sm.add_constant(x)
        result = sm.OLS(y, x2).fit()
        t_alpha_ff = result.summary2().tables[1]['t'][0]
        # Carhart
        x = factors_df[['MKT', 'SMB', 'HML', 'UMD']]
        x2 = sm.add_constant(x)
        result = sm.OLS(y, x2).fit()
        t_alpha_carhart = result.summary2().tables[1]['t'][0]
        # Pastor Stambaugh
        x = factors_df[['MKT', 'SMB', 'HML', 'UMD', 'LIQ']]
        x2 = sm.add_constant(x)
        result = sm.OLS(y, x2).fit()
        t_alpha_ps = result.summary2().tables[1]['t'][0]
        reg_dict[quantile] = [
            t_alpha_capm, t_alpha_ff, t_alpha_carhart, t_alpha_ps
        ]
        temp_list.append(reg_dict)
    reg_df = pd.DataFrame.from_dict(reg_dict)
    reg_df.to_excel(f't_alpha_{measure}.xlsx', index=False)