In [13]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from math import floor



In [19]:
# factor_returns = pd.read_csv("../Analysis/factors_returns_multi.csv")
factor_returns = pd.read_csv("../Predict/factors_returns_pred.csv")
style_factors = list(factor_returns.columns)[1:11]
style_factor_returns = factor_returns[["Date"]+style_factors]
factors = pd.read_csv("../Factors/merge_Factors.csv")
earning_factors = ["Momentum_1", "Momentum_2"]
risk_factors = ["Financial Quality", "Turnover"]
all_factors = earning_factors + risk_factors

# 1. pick the factor with the highest factor return on a specific date 
# 2. pick the 10 stocks with the highest factor value on that date
def strategy1(date):
    predicted_factor_return = style_factor_returns.loc[style_factor_returns["Date"]==date]
    predicted_factor_return = predicted_factor_return.set_index("Date")
    max_factor = predicted_factor_return.idxmax(axis=1)[0]
    max_factor_stock = factors.loc[factors["Date"]==date, ["Index", max_factor]] 
    max_value = max_factor_stock[max_factor].max()
    max_factor_stock = max_factor_stock.loc[max_factor_stock[max_factor]!=max_value]
    max_factor_stock = max_factor_stock.sort_values(by=max_factor, ascending=False) 
    selected_stocks = max_factor_stock["Index"].head(10).tolist()
    return selected_stocks
    

In [20]:
strategy1("2022-10-04")

['RMBS', 'ZI', 'DXCM', 'HZNP', 'BECN', 'AEHR', 'LPLA', 'CROX', 'IBKR', 'NTLA']

In [21]:
# factor_returns = pd.read_csv("../Analysis/factors_returns_multi.csv")
# factors = pd.read_csv("../Factors/merge_Factors.csv")
# 1. give earning and risk factors with 100 and -100 scores respectively
# 2. give scores to stocks based on their factor values(higher factor value on earning factor means higher score, higher factor value on risk factor means lower score)
# 3. aggregate scores for each stock
# 4. pick the 10 stocks with the highest scores
def strategy2(date):
    seg_num = 5
    
    # get factors score from factors
    factors_score = factors[["Date", "Index"]+earning_factors+risk_factors]
    factors_score = factors_score.loc[factors_score["Date"]==date]
    for factor in all_factors:
        factors_score = factors_score.drop_duplicates(subset=[factor])
    for factor in all_factors:
        factors_score[factor+"_score"] = None
    num_row = factors_score.shape[0]
    cut_length = floor(num_row/seg_num)
    cutting_point = [i*cut_length for i in range(seg_num)]
    cutting_point.append(num_row)
    stock_index = []
    score_list = list(np.arange(0, 1, 1/seg_num)+1/seg_num)
    score_list.reverse()
    
    # get stock indexes in each region
    for i in range(len(cutting_point)-1):
        stock_index.append(list(range(cutting_point[i], cutting_point[i+1])))

    for factor in earning_factors:
        factors_score = factors_score.sort_values(by=factor, ascending=False)
        col_index = list(factors_score.columns).index(factor+"_score")
        for i in range(len(stock_index)):
            factors_score.iloc[stock_index[i], col_index] = 100*score_list[i]
    for factor in risk_factors:
        factors_score = factors_score.sort_values(by=factor, ascending=True)
        col_index = list(factors_score.columns).index(factor+"_score")
        for i in range(len(stock_index)):
            factors_score.iloc[stock_index[i], col_index] = 100*score_list[i]
    factors_score["total_score"] = factors_score[earning_factors[0]+"_score"] + factors_score[earning_factors[1]+"_score"] + factors_score[risk_factors[0]+"_score"] + factors_score[risk_factors[1]+"_score"]

    factors_score = factors_score.sort_values(by="total_score", ascending=False)
    selected_stocks = factors_score["Index"].head(10).tolist()
    return selected_stocks
    


In [22]:
strategy2("2022-10-04")

['RBKB', 'AXON', 'IROQ', 'FBNC', 'HFBL', 'CFB', 'MGYR', 'LSBK', 'NFBK', 'HBCP']

In [79]:
def strategy3(date):
    seg_num = 5
    
    industry_factor_list = list(factor_returns.columns)[11:]
    industry_factor_return = factor_returns.loc[factor_returns["Date"]==date,["Date"]+industry_factor_list]
    return_list = industry_factor_return.values.reshape(-1)[1:]
    standarized_return_list = (return_list-min(return_list))/(max(return_list)-min(return_list))
    return_score_list = standarized_return_list/sum(standarized_return_list)*100

    factors_score = factors[["Date", "Index"]+earning_factors+risk_factors+industry_factor_list]
    factors_score = factors_score.loc[factors_score["Date"]==date]
    factors_score.loc[:, industry_factor_list] = factors_score.loc[:, industry_factor_list].multiply(return_score_list, axis=1)
    
    # get factors score from factors
    for factor in all_factors:
        factors_score = factors_score.drop_duplicates(subset=[factor])
    for factor in all_factors:
        factors_score[factor+"_score"] = None
    num_row = factors_score.shape[0]
    cut_length = floor(num_row/seg_num)
    cutting_point = [i*cut_length for i in range(seg_num)]
    cutting_point.append(num_row)
    stock_index = []
    score_list = list(np.arange(0, 1, 1/seg_num)+1/seg_num)
    score_list.reverse()
    
    # get stock indexes in each region
    for i in range(len(cutting_point)-1):
        stock_index.append(list(range(cutting_point[i], cutting_point[i+1])))

    for factor in earning_factors:
        factors_score = factors_score.sort_values(by=factor, ascending=False)
        col_index = list(factors_score.columns).index(factor+"_score")
        for i in range(len(stock_index)):
            factors_score.iloc[stock_index[i], col_index] = 100*score_list[i]
    for factor in risk_factors:
        factors_score = factors_score.sort_values(by=factor, ascending=True)
        col_index = list(factors_score.columns).index(factor+"_score")
        for i in range(len(stock_index)):
            factors_score.iloc[stock_index[i], col_index] = 100*score_list[i]
    # factors_score["total_score"] = factors_score[earning_factors[0]+"_score"] + factors_score[earning_factors[1]+"_score"] + factors_score[risk_factors[0]+"_score"] + factors_score[risk_factors[1]+"_score"]
    column_index = 2+len(all_factors)
    factors_score["total_score"] = factors_score.iloc[:, column_index:].sum(axis=1)

    factors_score = factors_score.sort_values(by="total_score", ascending=False)
    selected_stocks = factors_score["Index"].head(10).tolist()
    return selected_stocks

In [80]:
strategy3("2022-10-04")

['RBKB', 'AXON', 'HBCP', 'LMST', 'NFBK', 'IROQ', 'HFBL', 'MGYR', 'LSBK', 'CFB']

In [60]:
date = "2022-10-04"
industry_factor_list = list(factor_returns.columns)[11:]
industry_factor_return = factor_returns.loc[factor_returns["Date"]==date,["Date"]+industry_factor_list]
return_list = industry_factor_return.values.reshape(-1)[1:]
standarized_return_list = (return_list-min(return_list))/(max(return_list)-min(return_list))
return_score_list = standarized_return_list/sum(standarized_return_list)*100


factors_score = factors[["Date", "Index"]+earning_factors+risk_factors+industry_factor_list]
factors_score = factors_score.loc[factors_score["Date"]==date]
factors_score.loc[:, industry_factor_list] = factors_score.loc[:, industry_factor_list].multiply(return_score_list, axis=1)
factors_score
# add idustry factor to strategy2
# def strategy3(date):
    


Unnamed: 0,Date,Index,Momentum_1,Momentum_2,Financial Quality,Turnover,industry_Basic Materials,industry_Communication Services,industry_Consumer Cyclical,industry_Consumer Defensive,industry_Energy,industry_Financial Services,industry_Healthcare,industry_Industrials,industry_Real Estate,industry_Technology,industry_Utilities
441,2022-10-04,ABCB,1.094967,0.993963,0.123282,-0.603296,0.0,0.0,0.0,0.0,0.0,5.28103,0.0,0.0,0.0,0.0,0.0
902,2022-10-04,ACET,1.260618,-0.533565,-1.637459,1.808051,0.0,0.0,0.0,0.0,0.0,0.0,14.378936,0.0,0.0,0.0,0.0
1363,2022-10-04,ACLS,1.136752,-0.406650,0.823894,1.060484,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.762644,0.0
1824,2022-10-04,ACMR,-1.685414,-1.238450,0.197519,1.484494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.762644,0.0
2285,2022-10-04,ADBE,-1.721130,-1.692498,1.357406,0.005262,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.762644,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
226331,2022-10-04,ZBRA,-0.290090,-0.407566,1.126258,-0.276715,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.762644,0.0
226792,2022-10-04,ZEUS,-0.295517,-0.674491,1.147205,-0.336098,3.277949,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
227253,2022-10-04,ZI,1.830862,0.602951,0.070170,1.056296,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.762644,0.0
227714,2022-10-04,ZNTL,-1.721130,-1.399337,-1.637459,1.780123,0.0,0.0,0.0,0.0,0.0,0.0,14.378936,0.0,0.0,0.0,0.0


In [59]:
industry_factor_list

['industry_Basic Materials',
 'industry_Communication Services',
 'industry_Consumer Cyclical',
 'industry_Consumer Defensive',
 'industry_Energy',
 'industry_Financial Services',
 'industry_Healthcare',
 'industry_Industrials',
 'industry_Real Estate',
 'industry_Technology',
 'industry_Utilities']

In [58]:
factors_score

Unnamed: 0,Date,Index,Momentum_1,Momentum_2,Financial Quality,Turnover,industry_Basic Materials,industry_Communication Services,industry_Consumer Cyclical,industry_Consumer Defensive,industry_Energy,industry_Financial Services,industry_Healthcare,industry_Industrials,industry_Real Estate,industry_Technology,industry_Utilities
441,2022-10-04,ABCB,1.094967,0.993963,0.123282,-0.603296,0,0,0,0,0,1,0,0,0,0,0
902,2022-10-04,ACET,1.260618,-0.533565,-1.637459,1.808051,0,0,0,0,0,0,1,0,0,0,0
1363,2022-10-04,ACLS,1.136752,-0.406650,0.823894,1.060484,0,0,0,0,0,0,0,0,0,1,0
1824,2022-10-04,ACMR,-1.685414,-1.238450,0.197519,1.484494,0,0,0,0,0,0,0,0,0,1,0
2285,2022-10-04,ADBE,-1.721130,-1.692498,1.357406,0.005262,0,0,0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
226331,2022-10-04,ZBRA,-0.290090,-0.407566,1.126258,-0.276715,0,0,0,0,0,0,0,0,0,1,0
226792,2022-10-04,ZEUS,-0.295517,-0.674491,1.147205,-0.336098,1,0,0,0,0,0,0,0,0,0,0
227253,2022-10-04,ZI,1.830862,0.602951,0.070170,1.056296,0,0,0,0,0,0,0,0,0,1,0
227714,2022-10-04,ZNTL,-1.721130,-1.399337,-1.637459,1.780123,0,0,0,0,0,0,1,0,0,0,0


In [46]:
industry_factor_list

Unnamed: 0,0,1,2
0,0.300758,0.082995,0.735918
1,-0.169537,1.122108,-0.150239
2,0.047216,-0.854261,0.248088
3,0.991615,-0.679461,-0.488998


In [50]:
y.shape

(3,)

In [48]:
y=np.array([0,1,0])
x.multiply(y, axis=1)

Unnamed: 0,0,1,2
0,0.0,0.082995,0.0
1,-0.0,1.122108,-0.0
2,0.0,-0.854261,0.0
3,0.0,-0.679461,-0.0


In [68]:
x=pd.DataFrame([[1,2,3],[4,5,6],[7,8,9]])
x
# x.iloc[:,2:]

Unnamed: 0,0,1,2
0,1,2,3
1,4,5,6
2,7,8,9


In [66]:
type(x.sum(axis=1))

pandas.core.series.Series