In [37]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import yfinance as yf

In [38]:
Momentum = pd.read_csv("../Factors/Momentum_Factor.csv")
Turnover = pd.read_csv("../Factors/Turnover_Factor.csv")
Volatility = pd.read_csv("../Factors/Volatility_Factor.csv")
Value_Industry = pd.read_csv("../Factors/Value-Industry_Factor.csv")
FinancialQuality = pd.read_csv("../Factors/Financial-Quality_Factor.csv")
Return = pd.read_csv("../Factors/return_ratio.csv")

In [39]:
Momentum = Momentum.loc[:, ["Date","Index","Adj_Close","relative_strength_1m","relative_strength_2m","relative_strength_3m"]].rename(columns={"relative_strength_1m":"Momentum_1m","relative_strength_2m":"Momentum_2m","relative_strength_3m":"Momentum_3m"})
Turnover = Turnover.loc[:, ["Date","Index","Turnover_1m","Turnover_2m","Turnover_3m"]]
Volatility = Volatility.loc[:, ["Date","Index","high_low_1m","high_low_2m","high_low_3m","std_1m","std_2m","std_3m"]].rename(columns={"high_low_1m":"Volatility_1m","high_low_2m":"Volatility_2m","high_low_3m":"Volatility_3m"})
Value_Industry = Value_Industry.loc[:, ["Date","Index","EP","BP","industry"]]
returns = Return.loc[:, ["Date", "Index", "Return"]]

In [40]:
Factors = Momentum.merge(returns, on=["Date","Index"], how="left").merge(Turnover, on=["Date","Index"], how="left").merge(Volatility, on=["Date","Index"], how="left").merge(FinancialQuality, on=["Date","Index"], how="left").merge(Value_Industry, on=["Date","Index"], how="left")
Factors.drop(columns=['Unnamed: 0'], inplace=True)
Factors = pd.get_dummies(Factors, columns=["industry"])

In [41]:
null_rows = np.where(Factors.isnull())[0]
missings = Factors.iloc[null_rows]["Index"].unique()
Factors = Factors.loc[Factors["Index"].isin(missings)==False]

In [42]:
# normalization
# normal_columns = ["Momentum_1m","Momentum_2m","Momentum_3m","Turnover_1m","Turnover_2m","Turnover_3m","Volatility_1m","Volatility_2m","Volatility_3m","std_1m","std_2m","std_3m","ROA","ROE","EP","BP"]
# normal_df = Factors[normal_columns]
def normal_table(df):
    normal_columns = ["Momentum_1m","Momentum_2m","Momentum_3m","Turnover_1m","Turnover_2m","Turnover_3m","Volatility_1m","Volatility_2m","Volatility_3m","std_1m","std_2m","std_3m","ROA","ROE","EP","BP"]
    for column in normal_columns:
        n = 3
        median = df[column].median()
        median_series = (df[column] - median).abs()
        MAD = median_series.median()
        upper = median+MAD*n
        lower = median-MAD*n
        df.loc[df[column]>upper, column] = upper
        df.loc[df[column]<lower, column] = lower
        df[column] = (df[column] - df[column].mean()) / df[column].std()
    return df
# for column in normal_columns:
#     n = 2
#     median = Factors[column].median()
#     median_series = (Factors[column] - median).abs()
#     MAD = median_series.median()
#     upper = median+MAD*n
#     lower = median-MAD*n
#     Factors.loc[Factors[column]>upper, column] = upper
#     Factors.loc[Factors[column]<lower, column] = lower
#     Factors[column] = (Factors[column] - Factors[column].mean()) / Factors[column].std()
    

In [43]:
Factors = Factors.groupby("Date").apply(lambda df : normal_table(df))

In [46]:
Factors.to_csv("../Factors/Factors.csv", index=False)

In [47]:
Factors

Unnamed: 0,Date,Index,Adj_Close,Momentum_1m,Momentum_2m,Momentum_3m,Return,Turnover_1m,Turnover_2m,Turnover_3m,...,industry_Communication Services,industry_Consumer Cyclical,industry_Consumer Defensive,industry_Energy,industry_Financial Services,industry_Healthcare,industry_Industrials,industry_Real Estate,industry_Technology,industry_Utilities
0,2021-01-04,FTNT,29.112000,1.509253,0.423402,-0.062156,-0.019996,-0.285383,-0.086579,-0.048863,...,0,0,0,0,0,0,0,0,1,0
1,2021-01-05,FTNT,28.348000,0.732189,0.206508,-0.259777,-0.026243,-0.272697,-0.079926,-0.050330,...,0,0,0,0,0,0,0,0,1,0
2,2021-01-06,FTNT,27.622000,0.226557,-0.277413,-0.454297,-0.025610,-0.281239,-0.098481,-0.064920,...,0,0,0,0,0,0,0,0,1,0
3,2021-01-07,FTNT,28.650000,0.369790,-0.206709,-0.379767,0.037217,-0.265716,-0.107953,-0.069496,...,0,0,0,0,0,0,0,0,1,0
4,2021-01-08,FTNT,29.628000,0.600674,0.073511,-0.228582,0.034136,-0.271396,-0.115117,-0.079530,...,0,0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
262939,2022-07-25,SKYW,21.750000,-0.171436,-1.222563,-1.431199,0.008345,-0.052823,0.060735,0.230381,...,0,0,0,0,0,0,1,0,0,0
262940,2022-07-26,SKYW,21.170000,-0.780535,-1.052905,-1.362528,-0.026667,-0.134906,0.048236,0.207156,...,0,0,0,0,0,0,1,0,0,0
262941,2022-07-27,SKYW,22.139999,-0.258852,-0.881825,-1.278662,0.045820,-0.172973,0.011561,0.198016,...,0,0,0,0,0,0,1,0,0,0
262942,2022-07-28,SKYW,22.520000,-0.208851,-0.969848,-1.040081,0.017164,-0.181162,-0.012323,0.174423,...,0,0,0,0,0,0,1,0,0,0
