In [1]:
import statistics
import numpy as np
import pandas as pd
from marcap import marcap_data
from datetime import datetime, timedelta

In [2]:
# 백테스트 기간의 가격 정보를 가져온다.
df_price = marcap_data('2000-01-01', '2019-12-31')

In [96]:
quarter_start_month = [1, 4, 7, 10]
days = []
year = 2002
for i in range(60):
    month = quarter_start_month[i % 4]
    start_date = datetime(year, month, 1)
    
    if month == 10:
        year += 1
    
    month = quarter_start_month[(i + 1) % 4]
    end_date = datetime(year, month, 1) - timedelta(days=1)
    
    day = {'start_date':start_date, 'end_date':end_date}
    days.append(day)

In [97]:
df_account = pd.DataFrame(columns=['start_date', 'end_date'])

In [98]:
df_account = df_account.append(days)

In [99]:
df_account

Unnamed: 0,start_date,end_date
0,2002-01-01,2002-03-31
1,2002-04-01,2002-06-30
2,2002-07-01,2002-09-30
3,2002-10-01,2002-12-31
4,2003-01-01,2003-03-31
5,2003-04-01,2003-06-30
6,2003-07-01,2003-09-30
7,2003-10-01,2003-12-31
8,2004-01-01,2004-03-31
9,2004-04-01,2004-06-30


In [114]:
def get_stocks(start_date):
    print(start_date)
    year_start_date = start_date - timedelta(days=365)
    year_end_date = start_date - timedelta(days=30)
    df_year_price = df_price[(df_price.index > year_start_date) & (df_price.index <= year_end_date)].reset_index()
    
    df_year_price['plus'] = df_year_price['Close'] > df_year_price['Open']
    df_year_price['minus'] = df_year_price['Close'] < df_year_price['Open']
    df_year_price['equal'] = df_year_price['Close'] == df_year_price['Open']
    
    # 종목별로 과거 수익률 구하기 (마지막 1달은 제외)
    df_stock_yield = df_year_price.groupby("Code").agg({'Open': 'first', 
                                                        'Close': 'last', 
                                                        'plus': 'sum',
                                                        'minus': 'sum',
                                                        'equal': 'sum',
                                                        'Date': 'last', 
                                                        'Marcap': 'last'}).reset_index()
    
    df_stock_yield['diff'] = df_stock_yield.apply(lambda x: (year_end_date - x['Date']).days, axis=1)
    df_stock_yield = df_stock_yield[df_stock_yield['diff'] < 7]
    
    # open 가격이 0인 것 제외
    df_stock_yield = df_stock_yield[df_stock_yield['Open'] > 0]
    
    # 시총 하위 50%만 필터링
    df_stock_yield = df_stock_yield.sort_values(by=['Marcap'])
    df_stock_yield = df_stock_yield[:int(len(df_stock_yield)*0.5)]
    
    df_stock_yield['yield'] = df_stock_yield['Close'] / df_stock_yield['Open']
    
#     # ID 구하기
#     df_stock_yield['all'] = df_stock_yield['plus'] + df_stock_yield['minus'] + df_stock_yield['equal']
#     df_stock_yield['plus_rate'] = df_stock_yield['plus'] / df_stock_yield['all']
#     df_stock_yield['minus_rate'] = df_stock_yield['minus'] / df_stock_yield['all']
#     df_stock_yield['id'] = (df_stock_yield['yield'] > 1.0) * (df_stock_yield['minus_rate'] - df_stock_yield['plus_rate'])
    
#     df_stock_yield['1/id'] = 1 / df_stock_yield['id']
#     df_stock_yield['RANK_1'] = df_stock_yield['yield'].rank(ascending=False) # 높을 수록 좋음
#     df_stock_yield['RANK_2'] = df_stock_yield['1/id'].rank(ascending=False) # 높을 수록 좋음
#     df_stock_yield['TOTAL_RANK'] = df_stock_yield['RANK_1'] + df_stock_yield['RANK_2']
    
#     df_stock_yield = df_stock_yield.sort_values(by=['TOTAL_RANK'], ascending=False)
    
    df_stock_yield = df_stock_yield.sort_values(by=['yield'], ascending=True)
    #print(df_stock_yield[:30])
    
    return df_stock_yield['Code'].tolist()[:50]

In [115]:
# 종목을 구한다.
df_account['stocks'] = df_account.apply(lambda x: get_stocks(x.start_date), axis=1)

2002-01-01 00:00:00
        Code      Open    Close   plus  minus  equal       Date        Marcap  \
1349  037750   20100.0   1620.0  107.0  108.0   10.0 2001-11-30  2.025000e+10   
88    001000  101500.0   8210.0   89.0  122.0   14.0 2001-11-30  2.463000e+10   
583   008080   21000.0   1750.0   94.0  118.0   13.0 2001-11-30  1.773991e+10   
1508  044960   33500.0   2840.0   90.0  124.0   11.0 2001-11-30  1.221200e+10   
650   009337   22000.0   1900.0   43.0   56.0   20.0 2001-11-30  3.325000e+09   
1193  033250   10500.0    980.0   90.0  115.0   20.0 2001-11-30  1.375920e+10   
758   012280    9100.0    880.0   97.0  115.0   13.0 2001-11-30  2.559070e+10   
820   014190   17750.0   1780.0  107.0  101.0   17.0 2001-11-30  1.979370e+10   
659   009470   14500.0   1750.0   86.0  123.0   16.0 2001-11-30  1.157418e+10   
649   009335   15600.0   2045.0   60.0   78.0   26.0 2001-11-30  4.090000e+09   
1506  044780   13000.0   1740.0  108.0  102.0   15.0 2001-11-30  1.461600e+10   
371   00

In [125]:
df_account['yield'] = 0.0
df_account['delisted'] = 0.0
for i, row in df_account.iterrows():
    start_date = df_account.iloc[i].start_date
    end_date = df_account.iloc[i].end_date
    df_year_price = df_price[(df_price.index >= start_date) & (df_price.index <= end_date)]
    
    stocks = row['stocks']
    yields = []
    
    delisted_stocks = 0
    for stock in stocks:
        df_stock_price = df_year_price[df_year_price['Code'] == stock].reset_index()
        
        if len(df_stock_price) == 0:
            continue
            
        last_day = (end_date - df_stock_price.iloc[len(df_stock_price) - 1]['Date']).days
        if last_day > 7:
            yields.append(0.0)
            delisted_stocks += 1
            continue
            
        start_price = df_stock_price.iloc[0]['Open']
        end_price = df_stock_price.iloc[len(df_stock_price) - 1]['Close']

        yields.append(end_price / start_price)
        
        if len(yields) == 30:
            break
    
    yearly_yield = statistics.mean(yields) * 0.99 # 수수료 1%
    df_account.at[i, 'yield'] = yearly_yield
    df_account.at[i, 'delisted'] = delisted_stocks
    print(start_date, end_date, yearly_yield)

2002-01-01 00:00:00 2002-03-31 00:00:00 0.9230844698455744
2002-04-01 00:00:00 2002-06-30 00:00:00 0.6269153023597723
2002-07-01 00:00:00 2002-09-30 00:00:00 0.7402977599852383
2002-10-01 00:00:00 2002-12-31 00:00:00 0.9476952570362954
2003-01-01 00:00:00 2003-03-31 00:00:00 1.0658926069076124
2003-04-01 00:00:00 2003-06-30 00:00:00 1.26091996140049
2003-07-01 00:00:00 2003-09-30 00:00:00 0.8814371451486286
2003-10-01 00:00:00 2003-12-31 00:00:00 0.8936433974737692
2004-01-01 00:00:00 2004-03-31 00:00:00 0.9022669577775828
2004-04-01 00:00:00 2004-06-30 00:00:00 1.8586040213034198
2004-07-01 00:00:00 2004-09-30 00:00:00 1.712651781984761
2004-10-01 00:00:00 2004-12-31 00:00:00 2.403502959940492
2005-01-01 00:00:00 2005-03-31 00:00:00 2.4029473497122926
2005-04-01 00:00:00 2005-06-30 00:00:00 1.3315528575501965
2005-07-01 00:00:00 2005-09-30 00:00:00 1.2814623520902917
2005-10-01 00:00:00 2005-12-31 00:00:00 1.4393396582260287
2006-01-01 00:00:00 2006-03-31 00:00:00 6.278322676879508
20

In [126]:
df_account['cum_yield'] = df_account['yield'].cumprod()

In [127]:
df_account

Unnamed: 0,start_date,end_date,stocks,yield,cum_yield,delisted
0,2002-01-01,2002-03-31,"[037750, 001000, 008080, 044960, 009337, 03325...",0.923084,0.923084,4.0
1,2002-04-01,2002-06-30,"[012280, 033190, 037750, 001000, 044960, 03663...",0.626915,0.578696,3.0
2,2002-07-01,2002-09-30,"[019430, 053110, 046320, 037110, 015390, 04457...",0.740298,0.428407,0.0
3,2002-10-01,2002-12-31,"[045050, 019430, 053110, 037760, 052670, 05117...",0.947695,0.405999,0.0
4,2003-01-01,2003-03-31,"[014047, 014045, 014040, 049790, 045050, 02474...",1.065893,0.432752,0.0
5,2003-04-01,2003-06-30,"[045050, 038520, 015540, 003930, 014940, 06055...",1.26092,0.545665,2.0
6,2003-07-01,2003-09-30,"[013000, 035400, 002930, 013005, 044490, 02367...",0.881437,0.48097,2.0
7,2003-10-01,2003-12-31,"[035400, 028305, 013000, 023670, 025460, 01491...",0.893643,0.429815,3.0
8,2004-01-01,2004-03-31,"[025460, 023670, 025950, 054530, 012620, 03680...",0.902267,0.387808,5.0
9,2004-04-01,2004-06-30,"[025950, 039000, 025460, 011320, 023670, 06713...",1.858604,0.720782,5.0


In [128]:
pow(df_account.iloc[len(df_account) - 1]['cum_yield'], 1 / ( (3 * len(df_account)) / 12))

2.326496730803804