In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from math import floor



In [20]:
factor_returns = pd.read_csv("../Analysis/factors_returns_multi.csv")
style_factors = list(factor_returns.columns)[1:11]
style_factor_returns = factor_returns[["Date"]+style_factors]
factors = pd.read_csv("../Factors/merge_Factors.csv")

# 1. pick the factor with the highest factor return on a specific date 
# 2. pick the 10 stocks with the highest factor value on that date
def strategy1(date):
    predicted_factor_return = style_factor_returns.loc[style_factor_returns["Date"]==date]
    predicted_factor_return = predicted_factor_return.set_index("Date")
    max_factor = predicted_factor_return.idxmax(axis=1)[0]
    max_factor_stock = factors.loc[factors["Date"]==date, ["Index", max_factor]] 
    max_value = max_factor_stock[max_factor].max()
    max_factor_stock = max_factor_stock.loc[max_factor_stock[max_factor]!=max_value]
    max_factor_stock = max_factor_stock.sort_values(by=max_factor, ascending=False) 
    selected_stocks = max_factor_stock["Index"].head(10).tolist()
    return selected_stocks
    

In [21]:
strategy1("2021-10-01")

['RRR', 'CHEF', 'SKYW', 'CENX', 'KRNT', 'DAIO', 'INBX', 'VBTX', 'HRMY', 'WIRE']

In [83]:
factor_returns = pd.read_csv("../Analysis/factors_returns_multi.csv")
factors = pd.read_csv("../Factors/merge_Factors.csv")
earning_factors = ["Momentum_1", "Momentum_2"]
risk_factors = ["Financial Quality", "Turnover"]
all_factors = earning_factors + risk_factors




# 1. give earning and risk factors with 100 and -100 scores respectively
# 2. give scores to stocks based on their factor values(higher factor value on earning factor means higher score, higher factor value on risk factor means lower score)
# 3. aggregate scores for each stock
# 4. pick the 10 stocks with the highest scores
def strategy2(date):
    seg_num = 5
    
    # get factors score from factors
    factors_score = factors[["Date", "Index"]+earning_factors+risk_factors]
    factors_score = factors_score.loc[factors_score["Date"]==date]
    for factor in all_factors:
        factors_score = factors_score.drop_duplicates(subset=[factor])
    for factor in all_factors:
        factors_score[factor+"_score"] = None
    num_row = factors_score.shape[0]
    cut_length = floor(num_row/seg_num)
    cutting_point = [i*cut_length for i in range(seg_num)]
    cutting_point.append(num_row)
    stock_index = []
    score_list = list(np.arange(0, 1, 1/seg_num)+1/seg_num)
    score_list.reverse()
    
    # get stock indexes in each region
    for i in range(len(cutting_point)-1):
        stock_index.append(list(range(cutting_point[i], cutting_point[i+1])))

    for factor in earning_factors:
        factors_score = factors_score.sort_values(by=factor, ascending=False)
        col_index = list(factors_score.columns).index(factor+"_score")
        for i in range(len(stock_index)):
            factors_score.iloc[stock_index[i], col_index] = 100*score_list[i]
    for factor in risk_factors:
        factors_score = factors_score.sort_values(by=factor, ascending=True)
        col_index = list(factors_score.columns).index(factor+"_score")
        for i in range(len(stock_index)):
            factors_score.iloc[stock_index[i], col_index] = 100*score_list[i]
    factors_score["total_score"] = factors_score[earning_factors[0]+"_score"] + factors_score[earning_factors[1]+"_score"] + factors_score[risk_factors[0]+"_score"] + factors_score[risk_factors[1]+"_score"]

    factors_score = factors_score.sort_values(by="total_score", ascending=False)
    selected_stocks = factors_score["Index"].head(10).tolist()
    return selected_stocks
    


In [85]:
strategy2("2021-10-01")

['PDFS', 'MYFW', 'EQBK', 'BWB', 'SBT', 'RDCM', 'MGYR', 'HBCP', 'TH', 'OBNK']

In [82]:
strategy2("2021-10-01")

Unnamed: 0,Date,Index,Momentum_1,Momentum_2,Financial Quality,Turnover,Momentum_1_score,Momentum_2_score,Financial Quality_score,Turnover_score,total_score
161538,2021-10-01,PDFS,1.000010,1.745454,-0.782740,-0.980271,100.0,100.0,100.0,80.0,380.0
141254,2021-10-01,MYFW,0.859288,0.522206,0.043229,-1.225525,100.0,100.0,80.0,100.0,380.0
61501,2021-10-01,EQBK,0.926017,0.690386,0.065118,-1.026325,100.0,100.0,60.0,100.0,360.0
27848,2021-10-01,BWB,1.386039,0.558478,0.110006,-1.134856,100.0,100.0,60.0,100.0,360.0
183666,2021-10-01,SBT,0.401059,0.621650,-0.078562,-1.302032,80.0,100.0,80.0,100.0,360.0
...,...,...,...,...,...,...,...,...,...,...,...
226539,2021-10-01,ZEUS,-0.614809,-1.392975,1.134871,0.478571,40.0,20.0,20.0,20.0,100.0
202106,2021-10-01,TAIT,-1.420378,-1.314383,0.649083,0.475640,20.0,20.0,40.0,20.0,100.0
187815,2021-10-01,SIMO,-0.885192,-1.127633,1.397291,1.317001,20.0,20.0,20.0,20.0,80.0
117282,2021-10-01,LGIH,-1.517683,-1.321664,1.414150,0.420521,20.0,20.0,20.0,20.0,80.0


In [64]:
factors_score = factors[["Date"]+earning_factors+risk_factors]
factors_score = factors_score[factors_score["Date"]=="2021-10-01"]
factors_score = factors_score.drop_duplicates(subset=["Momentum_1"])
factors_score = factors_score.drop_duplicates(subset=["Momentum_2"])
factors_score = factors_score.drop_duplicates(subset=["Financial Quality"])
factors_score = factors_score.drop_duplicates(subset=["Turnover"])
factors_score

Unnamed: 0,Date,Momentum_1,Momentum_2,Financial Quality,Turnover
188,2021-10-01,1.263499,0.461452,0.130446,-0.219228
649,2021-10-01,-0.047572,0.668883,-1.643774,-0.812893
1110,2021-10-01,-0.006830,1.491863,0.820026,0.083233
1571,2021-10-01,1.955283,1.158072,0.204951,1.769106
2032,2021-10-01,-1.387660,-0.805282,1.341995,-0.581680
...,...,...,...,...,...
224234,2021-10-01,0.284787,-0.071789,0.269692,-0.931553
224695,2021-10-01,1.183966,0.674053,0.060096,1.100777
226078,2021-10-01,-1.274509,-0.765385,1.114419,-0.572656
226539,2021-10-01,-0.614809,-1.392975,1.134871,0.478571


In [55]:
# date = "2021-10-01"
# factors_score = factors[["Date"]+earning_factors+risk_factors]
# factors_score = factors_score.loc[factors_score["Date"]==date]
# for factor in all_factors:
#     factors_score[factor+"_score"] = None
# num_row = factors_score.shape[0]
list(factors_score.columns).index("Momentum_1")

1

In [32]:
strategy2("2021-10-01")

[[0,
  1,
  2,
  3,
  4,
  5,
  6,
  7,
  8,
  9,
  10,
  11,
  12,
  13,
  14,
  15,
  16,
  17,
  18,
  19,
  20,
  21,
  22,
  23,
  24,
  25,
  26,
  27,
  28,
  29,
  30,
  31,
  32,
  33,
  34,
  35,
  36,
  37,
  38,
  39,
  40,
  41,
  42,
  43,
  44,
  45,
  46,
  47,
  48,
  49,
  50,
  51,
  52,
  53,
  54,
  55,
  56,
  57,
  58,
  59,
  60,
  61,
  62,
  63,
  64,
  65,
  66,
  67,
  68,
  69,
  70,
  71,
  72,
  73,
  74,
  75,
  76,
  77,
  78,
  79,
  80,
  81,
  82,
  83,
  84,
  85,
  86,
  87,
  88,
  89,
  90,
  91,
  92,
  93,
  94,
  95,
  96,
  97,
  98],
 [99,
  100,
  101,
  102,
  103,
  104,
  105,
  106,
  107,
  108,
  109,
  110,
  111,
  112,
  113,
  114,
  115,
  116,
  117,
  118,
  119,
  120,
  121,
  122,
  123,
  124,
  125,
  126,
  127,
  128,
  129,
  130,
  131,
  132,
  133,
  134,
  135,
  136,
  137,
  138,
  139,
  140,
  141,
  142,
  143,
  144,
  145,
  146,
  147,
  148,
  149,
  150,
  151,
  152,
  153,
  154,
  155,
  156,
  157,
  1

In [12]:
factors.iloc[5:10]

Unnamed: 0,Date,Index,Return,Momentum_1,Momentum_2,Momentum_3,Turnover,Volitility_1,Volitility_2,Volitility_3,...,industry_Communication Services,industry_Consumer Cyclical,industry_Consumer Defensive,industry_Energy,industry_Financial Services,industry_Healthcare,industry_Industrials,industry_Real Estate,industry_Technology,industry_Utilities
5,2021-01-11,ABCB,0.019649,0.740633,0.44315,1.598341,-0.345025,0.048887,0.372523,-0.217563,...,0,0,0,0,1,0,0,0,0,0
6,2021-01-12,ABCB,0.013306,0.779089,0.280818,1.473493,-0.337071,0.03317,0.50856,-0.229812,...,0,0,0,0,1,0,0,0,0,0
7,2021-01-13,ABCB,-0.003396,0.680747,0.624076,1.646755,-0.333426,0.026976,0.48938,-0.27425,...,0,0,0,0,1,0,0,0,0,0
8,2021-01-14,ABCB,0.019536,0.637144,0.75445,1.619512,-0.327106,0.02814,0.490225,-0.295499,...,0,0,0,0,1,0,0,0,0,0
9,2021-01-15,ABCB,-0.01738,0.395481,0.60444,1.427914,-0.323582,0.035389,0.282917,-0.311819,...,0,0,0,0,1,0,0,0,0,0


In [61]:
pd.qcut(m1, 5)

0                        (0.883, 1.765]
396                    (-0.768, -0.284]
792                      (0.883, 1.765]
1188      (-1.9469999999999998, -0.768]
1584      (-1.9469999999999998, -0.768]
                      ...              
205524                   (0.145, 0.883]
205920                  (-0.284, 0.145]
206316                  (-0.284, 0.145]
206712                  (-0.284, 0.145]
207108    (-1.9469999999999998, -0.768]
Name: Momentum_1, Length: 524, dtype: category
Categories (5, interval[float64, right]): [(-1.9469999999999998, -0.768] < (-0.768, -0.284] < (-0.284, 0.145] < (0.145, 0.883] < (0.883, 1.765]]