In [1]:
import pandas as pd
import numpy as np
from portsort import portsort

import matplotlib.pyplot as plt

from pathlib import Path
from fndata import FnStockData
from pandas.tseries.offsets import MonthEnd
from pandas.tseries.offsets import YearEnd
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')


#### 선견편향 제거를 위해서 사이즈 레깅

In [2]:
### 매월 6월 말 기준으로 리벨런싱
factor_df=pd.read_csv('factor.csv')
# factor_df['size_lag1']=factor_df.groupby('Symbol')['size'].shift(1)
rebalancing_period=sorted(list(set(pd.to_datetime(factor_df['date'])+YearEnd(0)+MonthEnd(-6))))
factor_df['date']=pd.to_datetime(factor_df['date'])

factor_df_size_ffill=factor_df.copy()
factor_df_size_ffill=factor_df_size_ffill.drop_duplicates(subset=['date','Symbol'])
factor_df_size_ffill=factor_df_size_ffill.loc[~(factor_df_size_ffill['수익률 (1개월)(%)'].isna())]
factor_df_size_ffill=factor_df_size_ffill.loc[~(factor_df_size_ffill['기말발행주식수 (보통)(주)'].isna())]


In [3]:
factor_df.columns

Index(['date', 'Symbol', 'FnGuide Sector', '거래정지여부', '관리종목여부',
       '기말발행주식수 (보통)(주)', '매출액(천원)', '매출원가(천원)', '보통주자본금(천원)', '수익률 (1개월)(%)',
       '수정계수', '수정주가(원)', '영업이익(천원)', '이연법인세부채(천원)', '이익잉여금(천원)', '이자비용(천원)',
       '자기주식(천원)', '자본잉여금(천원)', '종가(원)', '총자산(천원)', 'size', 'bm', 'op',
       'invit', 'mom', 'devil_hml'],
      dtype='object')

# 백테스트 짜기

#### 1. 모멘텀 팩터를 제외한 다른 팩터들은 매년 6월 말 리벨런싱/ 모멘텀은 매달 말 리벨런싱.
#### 2. 다른 팩터들은 이미 래깅이 되어있지만 사이즈는 안되어 있어서 레깅함.
#### 3. 포트폴리오 구성할 떄도 6월 말의 시총액 가중평균으로 7월부터 이듬 해 6월까지 리턴을 가중평균 해야함.
#### 4. Independent Sorting

In [4]:
class backtest:

    def __init__(self,factor_df,quantile_1,quantile_2,factor_1,factor_2):
        self.factor_df=factor_df
        self.quantile_1=quantile_1
        self.quantile_2=quantile_2
        self.factor_1=factor_1
        self.factor_2=factor_2
        




    def winsorizing(factor_list, q):
        #factor_list=[i+'w' for i in factor_list]
        self.factor_df[factor_list]=self.factor_df.groupby('date')[factor_list].apply(lambda x: x.clip(x.quantile(q, interpolation='lower'), 
                     x.quantile(1-q, interpolation='higher'), axis=0))


    


    def assign_scores(self,x,quantile_list):
        # 각 그룹에 대해 퀀타일을 계산
        result = x.quantile(q=quantile_list)
        score = pd.Series(np.NaN, index=x.index)
        
        for i in range(len(quantile_list)):
            if i == 0:
                score = np.where(x <= result[quantile_list[i]], i + 1, score)
            else:
                score = np.where((x <= result[quantile_list[i]]) & 
                                (x >= result[quantile_list[i-1]]), 
                                i + 1, score)
        
        # 마지막 퀀타일보다 큰 값에 대해 score 할당
        score = np.where(x > result[quantile_list[-1]], len(quantile_list) + 1, score)
        
        return pd.Series(score, index=x.index)


        
    def sorting(self,dependent_sort=True,lagging1=0,lagging2=0):
        self.test=self.factor_df.copy()
        self.test=self.test.loc[self.test['거래정지여부']=='정상']#### 거래되지 않는 종목들 테스트에서 제외
        self.test=self.test.loc[self.test['관리종목여부']=='정상']
        self.test['rtn']=self.test['수익률 (1개월)(%)']/100## 이름 헷갈려서 바꿈
        
        if lagging1!=0:
       
            self.test[self.factor_1]=self.test.groupby('Symbol')[self.factor_1].shift(lagging1)
        if lagging2!=0:
            self.test[self.factor_2]=self.test.groupby('Symbol')[self.factor_2].shift(lagging2)
        
        self.test['size_1']=self.test.groupby('Symbol')['size'].shift(1)
        
        self.test['score']=self.test.groupby('date')[self.factor_1].transform(func=lambda x: self.assign_scores(x,quantile_list=self.quantile_1))
        ###dependent sort
        if dependent_sort:
            self.test['score2']=self.test.groupby('date')[self.factor_2].transform(func=lambda x: self.assign_scores(x,quantile_list=self.quantile_2))
           

        else: ### independent_sort
            self.test['score2']=self.test.groupby(['date','score'])[self.factor_2].transform(func=lambda x: self.assign_scores(x,quantile_list=self.quantile_2))
            




    def run(self,score1,score2,value_weighted=True,not_monthly_rebalancing=True):
        
        self.test['indicator']=np.where((self.test['score']==score1) & (self.test['score2']==score2),1,np.nan)
        #self.result=self.test.loc[self.test['indicator']==1]
        self.test['indicator_1']=self.test.groupby('Symbol')['indicator'].shift(1)
        if not_monthly_rebalancing:
            self.test['indicator_1']=self.test.groupby('Symbol')['indicator_1'].ffill()
        #test_period=sorted((list(set(self.test['date']))))
        #self.rebalancing_period=rebalancing_period

        if value_weighted:
            self.v_weight=self.test.loc[self.test['indicator_1'].notna()]
            self.v_weight['weight']=self.v_weight.groupby(['date','indicator_1'])['size_1'].transform(lambda x: x/x.sum())
            self.port=pd.merge( self.test, self.v_weight[['date','Symbol','weight']],on=['date','Symbol'],how='left')[['date',"Symbol",'weight','rtn','indicator_1']]
        else:
            self.port=self.test.copy()
            self.port['weight']=self.port.groupby(['date'])['indicator_1'].transform(lambda x: x/x.count())

        self.port['port_rtn']=self.port['rtn']*self.port['weight']
        self.result=self.port[['date','Symbol','indicator_1','port_rtn']]
        self.port_rtn=self.result.dropna().groupby('date')['port_rtn'].sum()
        return self.port_rtn
        



        
        

In [6]:
from tqdm import tqdm
quantile_list=[0.2,0.4,0.6,0.8]
test=backtest(factor_df=factor_df_size_ffill,quantile_1=quantile_list,quantile_2=quantile_list,factor_1='size',factor_2='bm')
test.sorting(lagging2=6)
result=pd.DataFrame()
for i in tqdm(range(1,6)):
    for j in range(1,6):
        tmp=test.run(score1=i,score2=j,value_weighted=True)
        result[f'size_{i}_bm_{6-j}']=tmp

100%|██████████| 5/5 [00:10<00:00,  2.16s/it]


In [35]:
import plotly.express as px
px.line((1+result).cumprod())

In [8]:
result=result*100

In [36]:
result.to_csv('5x5_table.csv')

## 팩터 수익률

In [10]:
factor_df_size_ffill['devil_hml_m']=factor_df_size_ffill['devil_hml'].copy()

In [11]:
factor_df_size_ffill['devil_hml_m']=factor_df_size_ffill['devil_hml'].copy() ### 매달 리벨런싱하는 것도 만듬
factors=['bm','op','invit','devil_hml','mom','devil_hml_m']
quantile_list1=[0.5]
quantile_list2=[0.3, 0.7]
factor_result=pd.DataFrame()
quantile_list3=[0.3,0.7]
for factor in tqdm(factors):

        
    for i in range(1,3):
        for j in range(1,4):
            factor_test=backtest(factor_df=factor_df_size_ffill,quantile_1=quantile_list1,quantile_2=quantile_list2,factor_1='size',factor_2=factor)
            factor_test.sorting(lagging2=6)
            if factor=='mom':
                factor_test=backtest(factor_df=factor_df_size_ffill,quantile_1=quantile_list1,quantile_2=quantile_list3,factor_1='size',factor_2=factor)
                factor_test.sorting()
                tmp=factor_test.run(score1=i,score2=j, value_weighted=True,not_monthly_rebalancing=False)

            elif factor=='devil_hml_m':
                factor_test=backtest(factor_df=factor_df_size_ffill,quantile_1=quantile_list1,quantile_2=quantile_list2,factor_1='size',factor_2=factor)
                factor_test.sorting()
                tmp=factor_test.run(score1=i,score2=j, value_weighted=True,not_monthly_rebalancing=False)

            else:
                tmp=factor_test.run(score1=i,score2=j, value_weighted=True)
            if j!=2:
                factor_result[f'size_{i}_{factor}_{4-j}']=tmp


  0%|          | 0/6 [00:00<?, ?it/s]

100%|██████████| 6/6 [00:45<00:00,  7.64s/it]


In [12]:
real_factor_result=pd.DataFrame(index=factor_result.index)

####bm
real_factor_result['HML']=(factor_result['size_1_bm_1']+factor_result['size_2_bm_1']-factor_result['size_1_bm_3']-factor_result['size_2_bm_3'])/2

####op
real_factor_result['RMW']=(factor_result['size_1_op_1']+factor_result['size_2_op_1']-factor_result['size_1_op_3']-factor_result['size_2_op_3'])/2

#####invit
real_factor_result['CMA']= -(factor_result['size_1_invit_1']+factor_result['size_2_invit_1']-factor_result['size_1_invit_3']-factor_result['size_2_invit_3'])/2

#####mom
real_factor_result['UMD']=(factor_result['size_1_mom_1']+factor_result['size_2_mom_1']-factor_result['size_1_mom_3']-factor_result['size_2_mom_3'])/2

#####devil_hml
real_factor_result['devil_HML']=(factor_result['size_1_devil_hml_1']+factor_result['size_2_devil_hml_1']-factor_result['size_1_devil_hml_3']-factor_result['size_2_devil_hml_3'])/2

real_factor_result['devil_HML_m']=(factor_result['size_1_devil_hml_m_1']+factor_result['size_2_devil_hml_m_1']-factor_result['size_1_devil_hml_m_3']-factor_result['size_2_devil_hml_m_3'])/2



In [70]:
factor_result2

Unnamed: 0_level_0,size_1_mom_3,size_1_mom_1,size_2_mom_3,size_2_mom_1
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2014-02-28,0.029529,0.043132,0.033806,0.048317
2014-03-31,0.043865,0.051872,-0.002531,0.001550
2014-04-30,0.037254,0.063085,0.003483,0.006173
2014-05-31,-0.020929,-0.001857,0.022609,0.012061
2014-06-30,-0.026152,-0.009687,-0.034759,0.017962
...,...,...,...,...
2024-05-31,-0.025141,-0.006597,-0.024148,-0.024530
2024-06-30,-0.014000,-0.013089,0.024280,0.024029
2024-07-31,-0.054648,-0.066481,-0.019697,-0.020569
2024-08-31,-0.054686,-0.065334,-0.028374,-0.029027


In [13]:
real_factor_result

Unnamed: 0_level_0,HML,RMW,CMA,UMD,devil_HML,devil_HML_m
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-07-31,0.040194,0.004833,,0.007943,0.061010,0.048244
2014-08-31,-0.028596,0.009381,,0.051907,-0.024262,-0.007776
2014-09-30,-0.009343,0.009736,,0.039668,-0.021561,-0.017486
2014-10-31,0.016922,0.012072,,0.012959,-0.010278,-0.026632
2014-11-30,0.045583,0.014723,,-0.100616,-0.003971,0.041589
...,...,...,...,...,...,...
2024-05-31,0.001575,0.005293,0.018671,0.023474,0.029722,
2024-06-30,0.014054,0.007554,0.001762,0.071286,0.026527,0.197500
2024-07-31,0.007073,-0.007041,-0.004715,0.009996,-0.016449,0.023096
2024-08-31,-0.008503,0.009473,-0.005270,-0.033799,-0.020986,


In [14]:
(1+real_factor_result).cumprod()

Unnamed: 0_level_0,HML,RMW,CMA,UMD,devil_HML,devil_HML_m
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-07-31,1.040194,1.004833,,1.007943,1.061010,1.048244
2014-08-31,1.010449,1.014259,,1.060262,1.035268,1.040093
2014-09-30,1.001008,1.024134,,1.102321,1.012946,1.021905
2014-10-31,1.017947,1.036497,,1.116606,1.002535,0.994689
2014-11-30,1.064348,1.051758,,1.004258,0.998553,1.036058
...,...,...,...,...,...,...
2024-05-31,1.520490,0.821310,1.047912,1.081712,0.899949,
2024-06-30,1.541859,0.827514,1.049759,1.158822,0.923822,1.973979
2024-07-31,1.552764,0.821687,1.044809,1.170406,0.908626,2.019571
2024-08-31,1.539561,0.829471,1.039303,1.130847,0.889557,


In [31]:
(1+real_factor_result).cumprod()


Unnamed: 0_level_0,HML,RMW,CMA,UMD,devil_HML,devil_HML_m
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-07-31,1.040194,1.004833,,0.991622,1.061010,1.047345
2014-08-31,1.010449,1.014259,,1.029439,1.035268,1.024159
2014-09-30,1.001008,1.024134,,1.037865,1.012946,0.986166
2014-10-31,1.017947,1.036497,,1.020236,1.002535,0.972967
2014-11-30,1.064348,1.051758,,0.969019,0.998553,0.979497
...,...,...,...,...,...,...
2024-05-31,1.520490,0.821310,1.047912,0.928425,0.899949,1.068558
2024-06-30,1.541859,0.827514,1.049759,0.928732,0.923822,1.099361
2024-07-31,1.552764,0.821687,1.044809,0.922832,0.908626,1.086989
2024-08-31,1.539561,0.829471,1.039303,0.917618,0.889557,1.059769


In [15]:
factors=['bm','op','invit']
quantile_list1=[0.5]
quantile_list2=[1/3, 1-(1/3)]
quantile_list3=[0.3,0.7]
factor_result=pd.DataFrame()

for factor in tqdm(factors):

        
    for i in range(1,3):
        for j in range(1,4):
            factor_test=backtest(factor_df=factor_df_size_ffill,quantile_1=quantile_list3,quantile_2=quantile_list1,factor_1=factor,factor_2='size')
            factor_test.sorting(lagging1=6)
            
            if factor=='mom':
                tmp=factor_test.run(score1=j,score2=i, value_weighted=True)

            else:
                tmp=factor_test.run(score1=j,score2=i, value_weighted=True)
                #print(f'{i}_{j}')
       
            factor_result[f'size_{i}_{factor}_{4-j}']=tmp


100%|██████████| 3/3 [00:21<00:00,  7.09s/it]


In [None]:
factor_result

Unnamed: 0_level_0,size_1_bm_3,size_1_bm_2,size_1_bm_1,size_2_bm_3,size_2_bm_2,size_2_bm_1,size_1_op_3,size_1_op_2,size_1_op_1,size_2_op_3,size_2_op_2,size_2_op_1,size_1_invit_3,size_1_invit_2,size_1_invit_1,size_2_invit_3,size_2_invit_2,size_2_invit_1
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2014-07-31,0.001317,0.014440,0.035156,-0.001978,0.024434,0.044571,0.019992,0.021312,0.005491,-0.000329,0.028579,0.023838,,,,,,
2014-08-31,0.023281,0.018885,0.025082,0.026777,0.013293,-0.032215,0.023229,0.019749,0.026770,-0.015017,0.014523,0.000203,,,,,,
2014-09-30,0.020042,0.012335,0.003475,0.004058,-0.048195,0.001938,0.023050,0.004777,0.000930,-0.067938,0.016446,-0.026345,,,,,,
2014-10-31,-0.019138,-0.027640,-0.024609,-0.035582,-0.061930,0.003733,-0.017545,-0.027656,-0.032209,-0.071640,-0.029466,-0.032834,,,,,,
2014-11-30,-0.009750,-0.016544,-0.010342,-0.000659,0.003899,0.091100,-0.006718,-0.018831,-0.009759,0.009816,0.002231,0.042304,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-05-31,-0.006760,-0.008560,-0.008394,-0.026018,-0.015589,-0.021234,-0.004576,-0.003380,-0.001398,-0.030588,0.005661,-0.023179,0.004993,-0.000044,-0.003362,0.000328,-0.019986,-0.028658
2024-06-30,-0.013632,-0.016080,-0.012326,0.012029,0.039638,0.038831,-0.017052,-0.009331,-0.016771,0.008206,0.032786,0.023032,-0.010714,-0.014265,-0.020957,0.019575,0.031580,0.026293
2024-07-31,-0.066017,-0.076097,-0.064614,-0.028783,-0.031674,-0.016041,-0.057192,-0.078562,-0.065540,-0.020488,-0.039901,-0.026222,-0.056382,-0.076526,-0.060802,-0.037835,-0.018515,-0.023985
2024-08-31,-0.077217,-0.068273,-0.057340,-0.009587,-0.043242,-0.046471,-0.068556,-0.065202,-0.047498,-0.026892,-0.028178,-0.029005,-0.059215,-0.071773,-0.046157,-0.026057,-0.035114,-0.028574


In [16]:
real_factor_result['SMB']=np.sum((factor_result.iloc[:,:3].values-factor_result.iloc[:,3:6].values)/3 + (factor_result.iloc[:,6:9].values-factor_result.iloc[:,9:12].values)/3+(factor_result.iloc[:,12:15].values-factor_result.iloc[:,15:18].values)/3,axis=1)/3
#real_factor_result['HML'] = -(factor_result.iloc[:,0] + factor_result.iloc[:,3] - factor_result.iloc[:,2] - factor_result.iloc[:, 5])/2

In [112]:
factor_result

Unnamed: 0_level_0,size_1_bm_3,size_1_bm_2,size_1_bm_1,size_2_bm_3,size_2_bm_2,size_2_bm_1,size_1_op_3,size_1_op_2,size_1_op_1,size_2_op_3,size_2_op_2,size_2_op_1,size_1_invit_3,size_1_invit_2,size_1_invit_1,size_2_invit_3,size_2_invit_2,size_2_invit_1
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2014-07-31,0.001317,0.014440,0.035156,-0.001978,0.024434,0.044571,0.019992,0.021312,0.005491,-0.000329,0.028579,0.023838,,,,,,
2014-08-31,0.023281,0.018885,0.025082,0.026777,0.013293,-0.032215,0.023229,0.019749,0.026770,-0.015017,0.014523,0.000203,,,,,,
2014-09-30,0.020042,0.012335,0.003475,0.004058,-0.048195,0.001938,0.023050,0.004777,0.000930,-0.067938,0.016446,-0.026345,,,,,,
2014-10-31,-0.019138,-0.027640,-0.024609,-0.035582,-0.061930,0.003733,-0.017545,-0.027656,-0.032209,-0.071640,-0.029466,-0.032834,,,,,,
2014-11-30,-0.009750,-0.016544,-0.010342,-0.000659,0.003899,0.091100,-0.006718,-0.018831,-0.009759,0.009816,0.002231,0.042304,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-05-31,-0.006760,-0.008560,-0.008394,-0.026018,-0.015589,-0.021234,-0.004576,-0.003380,-0.001398,-0.030588,0.005661,-0.023179,0.004993,-0.000044,-0.003362,0.000328,-0.019986,-0.028658
2024-06-30,-0.013632,-0.016080,-0.012326,0.012029,0.039638,0.038831,-0.017052,-0.009331,-0.016771,0.008206,0.032786,0.023032,-0.010714,-0.014265,-0.020957,0.019575,0.031580,0.026293
2024-07-31,-0.066017,-0.076097,-0.064614,-0.028783,-0.031674,-0.016041,-0.057192,-0.078562,-0.065540,-0.020488,-0.039901,-0.026222,-0.056382,-0.076526,-0.060802,-0.037835,-0.018515,-0.023985
2024-08-31,-0.077217,-0.068273,-0.057340,-0.009587,-0.043242,-0.046471,-0.068556,-0.065202,-0.047498,-0.026892,-0.028178,-0.029005,-0.059215,-0.071773,-0.046157,-0.026057,-0.035114,-0.028574


In [113]:
#real_factor_result.drop(columns='devil_HML',inplace=True)

In [17]:

from pathlib import Path
from fndata import FnStockData,FnMarketData
from pandas.tseries.offsets import MonthEnd
from pandas.tseries.offsets import YearEnd
CWD = Path('.').resolve()
DATA_DIR = CWD / 'data'
fndata_path = DATA_DIR / '고금계과제_시장수익률_201301-202408.csv'
fn = FnMarketData(fndata_path)
df = fn.get_data(format='wide')

In [18]:
rf_path = DATA_DIR / '통안채1년물_월평균_201301-202408.csv'
rf = pd.read_csv(rf_path)


In [19]:
df=df.reset_index()
rf['date']=df['date']


In [117]:
(rf['원자료']/12)/100

0      0.002267
1      0.002242
2      0.002175
3      0.002150
4      0.002150
         ...   
135    0.002824
136    0.002822
137    0.002793
138    0.002675
139    0.002540
Name: 원자료, Length: 140, dtype: float64

In [20]:
real_factor_result=real_factor_result.reset_index()
real_factor_result.rename(columns={'index':'date'},inplace=True)
real_factor_result

Unnamed: 0,date,HML,RMW,CMA,UMD,devil_HML,devil_HML_m,SMB
0,2014-07-31,0.040194,0.004833,,0.007943,0.061010,0.048244,
1,2014-08-31,-0.028596,0.009381,,0.051907,-0.024262,-0.007776,
2,2014-09-30,-0.009343,0.009736,,0.039668,-0.021561,-0.017486,
3,2014-10-31,0.016922,0.012072,,0.012959,-0.010278,-0.026632,
4,2014-11-30,0.045583,0.014723,,-0.100616,-0.003971,0.041589,
...,...,...,...,...,...,...,...,...
118,2024-05-31,0.001575,0.005293,0.018671,0.023474,0.029722,,0.014198
119,2024-06-30,0.014054,0.007554,0.001762,0.071286,0.026527,0.197500,-0.040344
120,2024-07-31,0.007073,-0.007041,-0.004715,0.009996,-0.016449,0.023096,-0.039810
121,2024-08-31,-0.008503,0.009473,-0.005270,-0.033799,-0.020986,,-0.032012


In [21]:
df['RF']=(rf['원자료']/12)/100
df['Mkt-RF']=df['MKF2000']-df['RF']
real_factor_result=pd.merge(real_factor_result,df[['date','RF','Mkt-RF']],how='left',on=['date'])

In [22]:
real_factor_result

Unnamed: 0,date,HML,RMW,CMA,UMD,devil_HML,devil_HML_m,SMB,RF,Mkt-RF
0,2014-07-31,0.040194,0.004833,,0.007943,0.061010,0.048244,,0.002077,0.029123
1,2014-08-31,-0.028596,0.009381,,0.051907,-0.024262,-0.007776,,0.001983,-0.001883
2,2014-09-30,-0.009343,0.009736,,0.039668,-0.021561,-0.017486,,0.001900,-0.028700
3,2014-10-31,0.016922,0.012072,,0.012959,-0.010278,-0.026632,,0.001779,-0.026579
4,2014-11-30,0.045583,0.014723,,-0.100616,-0.003971,0.041589,,0.001706,0.028294
...,...,...,...,...,...,...,...,...,...,...
118,2024-05-31,0.001575,0.005293,0.018671,0.023474,0.029722,,0.014198,0.002822,-0.022822
119,2024-06-30,0.014054,0.007554,0.001762,0.071286,0.026527,0.197500,-0.040344,0.002793,0.024307
120,2024-07-31,0.007073,-0.007041,-0.004715,0.009996,-0.016449,0.023096,-0.039810,0.002675,-0.018175
121,2024-08-31,-0.008503,0.009473,-0.005270,-0.033799,-0.020986,,-0.032012,0.002540,-0.038340


In [23]:
real_factor_result[['HML','RMW',"CMA","UMD",'SMB',"RF","Mkt-RF",'devil_HML','devil_HML_m']]=real_factor_result[['HML','RMW',"CMA","UMD",'SMB',"RF","Mkt-RF",'devil_HML','devil_HML_m']]*100

In [32]:
submit=real_factor_result[['date','HML','RMW',"CMA","UMD",'SMB',"RF","Mkt-RF"]]
submit.to_csv('factor_port.csv')
print(submit)

          date       HML       RMW       CMA        UMD       SMB        RF  \
0   2014-07-31  4.019412  0.483300       NaN   0.794311       NaN  0.207750   
1   2014-08-31 -2.859560  0.938057       NaN   5.190679       NaN  0.198333   
2   2014-09-30 -0.934340  0.973645       NaN   3.966798       NaN  0.190000   
3   2014-10-31  1.692192  1.207153       NaN   1.295948       NaN  0.177917   
4   2014-11-30  4.558324  1.472322       NaN -10.061575       NaN  0.170583   
..         ...       ...       ...       ...        ...       ...       ...   
118 2024-05-31  0.157519  0.529339  1.867083   2.347442  1.419795  0.282250   
119 2024-06-30  1.405402  0.755387  0.176246   7.128556 -4.034396  0.279333   
120 2024-07-31  0.707284 -0.704125 -0.471475   0.999637 -3.980962  0.267500   
121 2024-08-31 -0.850318  0.947296 -0.527021  -3.379921 -3.201218  0.254000   
122 2024-09-19 -2.790031 -0.621126  1.533090  -7.982823 -1.197860       NaN   

       Mkt-RF  
0    2.912250  
1   -0.188333  
2  

In [122]:
# real_factor_result.to_csv('factor_port.csv')

In [26]:
draw=real_factor_result.set_index(['date'])
px.line((1+draw.dropna()/100).cumprod())

In [27]:
fn=pd.read_csv('fn_factor2.csv')
fn=fn.iloc[7:].T
fn=fn.iloc[6:]
fn.columns=['date','MOM_fn',"SMB_fn","HML_fn"]
fn['date']=pd.to_datetime(fn['date'])

In [28]:

import plotly.express as px
temp=pd.merge(draw.reset_index(),fn,on=['date'],how='left')
#temp[['HML_fn','SMB_fn']]=temp[['HML_fn','SMB_fn']]
#temp=temp.astype(float)

temp=temp.set_index(['date'])
temp=temp.astype(float)
px.line((1+temp.loc[temp.index>'2016']/100).cumprod())

In [29]:
temp.corr()

Unnamed: 0,HML,RMW,CMA,UMD,devil_HML,devil_HML_m,SMB,RF,Mkt-RF,MOM_fn,SMB_fn,HML_fn
HML,1.0,0.550699,0.145147,-0.05793,0.839721,0.683431,-0.134036,-0.022586,-0.025669,-0.252269,-0.019273,0.734706
RMW,0.550699,1.0,-0.32335,0.024293,0.490096,0.3293,-0.444956,0.075926,-0.175687,-0.064145,-0.402101,0.345926
CMA,0.145147,-0.32335,1.0,-0.088543,0.177864,0.380347,0.392029,-0.010526,-0.003503,-0.077304,0.396397,0.418059
UMD,-0.05793,0.024293,-0.088543,1.0,-0.074787,-0.145355,-0.190624,0.03421,-0.073364,0.84232,-0.059309,-0.053675
devil_HML,0.839721,0.490096,0.177864,-0.074787,1.0,0.742158,-0.116503,-0.025877,-0.070234,-0.290065,0.023962,0.800193
devil_HML_m,0.683431,0.3293,0.380347,-0.145355,0.742158,1.0,-0.112892,0.026753,-0.125781,-0.289861,-0.049042,0.866657
SMB,-0.134036,-0.444956,0.392029,-0.190624,-0.116503,-0.112892,1.0,-0.118694,0.235588,-0.149076,0.9117,-0.063821
RF,-0.022586,0.075926,-0.010526,0.03421,-0.025877,0.026753,-0.118694,1.0,-0.087873,0.090541,-0.129225,0.063224
Mkt-RF,-0.025669,-0.175687,-0.003503,-0.073364,-0.070234,-0.125781,0.235588,-0.087873,1.0,0.045609,0.109802,-0.139599
MOM_fn,-0.252269,-0.064145,-0.077304,0.84232,-0.290065,-0.289861,-0.149076,0.090541,0.045609,1.0,-0.082693,-0.250194
