In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm

In [2]:
Factors = pd.read_csv('../Factors/Factors.csv')

In [59]:
dates = Factors['Date'].unique()
def get_factor_returns(factor_name):
    factor_returns = []
    p_value = []
    t_value = []
    for date in dates:
        data = Factors.loc[Factors["Date"]==date, ["Date", "Index", "Return", factor_name]]
        y = data["Return"]
        x = data[factor_name]
        x = x.values.reshape(-1, 1)
        model = sm.OLS(y, x).fit()
        pvalue = model.summary2().tables[1]['P>|t|'][0]
        coef = model.summary2().tables[1]['Coef.'][0]
        t = model.summary2().tables[1]['t'][0]
        factor_returns.append(coef)
        p_value.append(pvalue)
        t_value.append(t)
    return factor_returns, p_value, t_value


In [60]:
factors = ["Momentum_1m", "Momentum_2m", "Momentum_3m", "Turnover_1m", "Turnover_2m", "Turnover_3m", "Volatility_1m", "Volatility_2m", "Volatility_3m", "std_1m", "std_2m", "std_3m", "ROA", "ROE", "EP", "BP"]

In [61]:
def get_factor_returns_all():
    factors_returns = pd.DataFrame()
    p_values = pd.DataFrame()
    t_values = pd.DataFrame()
    factors_returns["Date"] = Factors["Date"].unique()
    p_values["Date"] = Factors["Date"].unique()
    t_values["Date"] = Factors["Date"].unique()
    for factor in factors:
        factor_returns, pvalue, t = pd.Series(get_factor_returns(factor)[0]), pd.Series(get_factor_returns(factor)[1]), pd.Series(get_factor_returns(factor)[2])
        factors_returns[factor] = factor_returns
        p_values[factor] = pvalue
        t_values[factor] = t
    return factors_returns, p_values, t_values

In [62]:
factors_returns, p_values, t_values = get_factor_returns_all()

In [64]:
factors_returns.to_csv("../Analysis/factors_returns.csv", index=False)
p_values.to_csv("../Analysis/p_values.csv", index=False)
t_values.to_csv("../Analysis/t_values.csv", index=False)