In [44]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import yfinance as yf

In [45]:
Momentum = pd.read_csv("../Factors/Momentum_Factor.csv")
Turnover = pd.read_csv("../Factors/Turnover_Factor.csv")
Volatility = pd.read_csv("../Factors/Volatility_Factor.csv")
Value_Industry = pd.read_csv("../Factors/Value-Industry_Factor.csv")
FinancialQuality = pd.read_csv("../Factors/Financial-Quality_Factor.csv")
Return = pd.read_csv("../Factors/Return_Factor.csv")

In [47]:
Momentum = Momentum.loc[:, ["Date","Index","Adj_Close","relative_strength_1m","relative_strength_2m","relative_strength_3m"]].rename(columns={"relative_strength_1m":"Momentum_1m","relative_strength_2m":"Momentum_2m","relative_strength_3m":"Momentum_3m"})
Turnover = Turnover.loc[:, ["Date","Index","Turnover_1m","Turnover_2m","Turnover_3m"]]
Volatility = Volatility.loc[:, ["Date","Index","high_low_1m","high_low_2m","high_low_3m","std_1m","std_2m","std_3m"]].rename(columns={"high_low_1m":"Volatility_1m","high_low_2m":"Volatility_2m","high_low_3m":"Volatility_3m"})
Value_Industry = Value_Industry.loc[:, ["Date","Index","EP","BP","industry"]]

In [48]:
Factors = Momentum.merge(Turnover, on=["Date","Index"], how="left").merge(Volatility, on=["Date","Index"], how="left").merge(FinancialQuality, on=["Date","Index"], how="left").merge(Value_Industry, on=["Date","Index"], how="left")
Factors.drop(columns=['Unnamed: 0'], inplace=True)
Factors = pd.get_dummies(Factors, columns=["industry"])

In [50]:
null_rows = np.where(Factors.isnull())[0]
missings = Factors.iloc[null_rows]["Index"].unique()
Factors = Factors.loc[Factors["Index"].isin(missings)==False]

In [52]:
# normalization
normal_columns = ["Momentum_1m","Momentum_2m","Momentum_3m","Turnover_1m","Turnover_2m","Turnover_3m","Volatility_1m","Volatility_2m","Volatility_3m","std_1m","std_2m","std_3m","ROA","ROE","EP","BP"]
for column in normal_columns:
    n = 3
    median = Factors[column].median()
    median_series = (Factors[column] - median).abs()
    MAD = median_series.median()
    upper = median+MAD*n
    lower = median-MAD*n
    Factors.loc[Factors[column]>upper, column] = upper
    Factors.loc[Factors[column]<lower, column] = lower
    Factors[column] = (Factors[column] - Factors[column].mean()) / Factors[column].std()
    

In [55]:
Factors.to_csv("../Factors/Factors.csv", index=False)