In [1]:
import statistics
import numpy as np
import pandas as pd
from marcap import marcap_data
from datetime import datetime, timedelta

In [2]:
# 백테스트 기간의 가격 정보를 가져온다.
df_price = marcap_data('2000-01-01', '2019-12-31')

In [96]:
quarter_start_month = [1, 4, 7, 10]
days = []
year = 2002
for i in range(60):
    month = quarter_start_month[i % 4]
    start_date = datetime(year, month, 1)
    
    if month == 10:
        year += 1
    
    month = quarter_start_month[(i + 1) % 4]
    end_date = datetime(year, month, 1) - timedelta(days=1)
    
    day = {'start_date':start_date, 'end_date':end_date}
    days.append(day)

In [97]:
df_account = pd.DataFrame(columns=['start_date', 'end_date'])

In [98]:
df_account = df_account.append(days)

In [99]:
df_account

Unnamed: 0,start_date,end_date
0,2002-01-01,2002-03-31
1,2002-04-01,2002-06-30
2,2002-07-01,2002-09-30
3,2002-10-01,2002-12-31
4,2003-01-01,2003-03-31
5,2003-04-01,2003-06-30
6,2003-07-01,2003-09-30
7,2003-10-01,2003-12-31
8,2004-01-01,2004-03-31
9,2004-04-01,2004-06-30


In [108]:
def get_stocks(start_date):
    print(start_date)
    year_start_date = start_date - timedelta(days=365)
    year_end_date = start_date - timedelta(days=30)
    df_year_price = df_price[(df_price.index > year_start_date) & (df_price.index <= year_end_date)].reset_index()
    
    df_year_price['plus'] = df_year_price['Close'] > df_year_price['Open']
    df_year_price['minus'] = df_year_price['Close'] < df_year_price['Open']
    df_year_price['equal'] = df_year_price['Close'] == df_year_price['Open']
    
    # 종목별로 과거 수익률 구하기 (마지막 1달은 제외)
    df_stock_yield = df_year_price.groupby("Code").agg({'Open': 'first', 
                                                        'Close': 'last', 
                                                        'plus': 'sum',
                                                        'minus': 'sum',
                                                        'equal': 'sum',
                                                        'Date': 'last', 
                                                        'Marcap': 'last'}).reset_index()
    
    df_stock_yield['diff'] = df_stock_yield.apply(lambda x: (year_end_date - x['Date']).days, axis=1)
    df_stock_yield = df_stock_yield[df_stock_yield['diff'] < 7]
    
    # open 가격이 0인 것 제외
    df_stock_yield = df_stock_yield[df_stock_yield['Open'] > 0]
    
    # 시총 하위 50%만 필터링
    df_stock_yield = df_stock_yield.sort_values(by=['Marcap'])
    df_stock_yield = df_stock_yield[:int(len(df_stock_yield)*0.5)]
    
    df_stock_yield['yield'] = df_stock_yield['Close'] / df_stock_yield['Open']
    
#     # ID 구하기
#     df_stock_yield['all'] = df_stock_yield['plus'] + df_stock_yield['minus'] + df_stock_yield['equal']
#     df_stock_yield['plus_rate'] = df_stock_yield['plus'] / df_stock_yield['all']
#     df_stock_yield['minus_rate'] = df_stock_yield['minus'] / df_stock_yield['all']
#     df_stock_yield['id'] = (df_stock_yield['yield'] > 1.0) * (df_stock_yield['minus_rate'] - df_stock_yield['plus_rate'])
    
#     df_stock_yield['1/id'] = 1 / df_stock_yield['id']
#     df_stock_yield['RANK_1'] = df_stock_yield['yield'].rank(ascending=False) # 높을 수록 좋음
#     df_stock_yield['RANK_2'] = df_stock_yield['1/id'].rank(ascending=False) # 높을 수록 좋음
#     df_stock_yield['TOTAL_RANK'] = df_stock_yield['RANK_1'] + df_stock_yield['RANK_2']
    
#     df_stock_yield = df_stock_yield.sort_values(by=['TOTAL_RANK'], ascending=False)
    
    df_stock_yield = df_stock_yield.sort_values(by=['yield'], ascending=True)
    #print(df_stock_yield[:30])
    
    return df_stock_yield['Code'].tolist()[:30]

In [109]:
# 종목을 구한다.
df_account['stocks'] = df_account.apply(lambda x: get_stocks(x.start_date), axis=1)

2002-01-01 00:00:00
2002-04-01 00:00:00
2002-07-01 00:00:00
2002-10-01 00:00:00
2003-01-01 00:00:00
2003-04-01 00:00:00
2003-07-01 00:00:00
2003-10-01 00:00:00
2004-01-01 00:00:00
2004-04-01 00:00:00
2004-07-01 00:00:00
2004-10-01 00:00:00
2005-01-01 00:00:00
2005-04-01 00:00:00
2005-07-01 00:00:00
2005-10-01 00:00:00
2006-01-01 00:00:00
2006-04-01 00:00:00
2006-07-01 00:00:00
2006-10-01 00:00:00
2007-01-01 00:00:00
2007-04-01 00:00:00
2007-07-01 00:00:00
2007-10-01 00:00:00
2008-01-01 00:00:00
2008-04-01 00:00:00
2008-07-01 00:00:00
2008-10-01 00:00:00
2009-01-01 00:00:00
2009-04-01 00:00:00
2009-07-01 00:00:00
2009-10-01 00:00:00
2010-01-01 00:00:00
2010-04-01 00:00:00
2010-07-01 00:00:00
2010-10-01 00:00:00
2011-01-01 00:00:00
2011-04-01 00:00:00
2011-07-01 00:00:00
2011-10-01 00:00:00
2012-01-01 00:00:00
2012-04-01 00:00:00
2012-07-01 00:00:00
2012-10-01 00:00:00
2013-01-01 00:00:00
2013-04-01 00:00:00
2013-07-01 00:00:00
2013-10-01 00:00:00
2014-01-01 00:00:00
2014-04-01 00:00:00


In [110]:
df_account['yield'] = 0.0
for i, row in df_account.iterrows():
    start_date = df_account.iloc[i].start_date
    end_date = df_account.iloc[i].end_date
    df_year_price = df_price[(df_price.index >= start_date) & (df_price.index <= end_date)]
    
    stocks = row['stocks']
    yields = []
    
    for stock in stocks:
        df_stock_price = df_year_price[df_year_price['Code'] == stock]
        
        if len(df_stock_price) == 0:
            continue
            
        start_price = df_stock_price.iloc[0]['Open']
        end_price = df_stock_price.iloc[len(df_stock_price) - 1]['Close']

        yields.append(end_price / start_price)
    
    yearly_yield = statistics.mean(yields)
    df_account.at[i, 'yield'] = yearly_yield
    
    print(start_date, end_date, yearly_yield)

2002-01-01 00:00:00 2002-03-31 00:00:00 1.0273719935512087
2002-04-01 00:00:00 2002-06-30 00:00:00 0.7252767656686325
2002-07-01 00:00:00 2002-09-30 00:00:00 0.7477755151366043
2002-10-01 00:00:00 2002-12-31 00:00:00 0.9572679364002984
2003-01-01 00:00:00 2003-03-31 00:00:00 1.0766591988965784
2003-04-01 00:00:00 2003-06-30 00:00:00 1.3129668714947478
2003-07-01 00:00:00 2003-09-30 00:00:00 0.9275325796406876
2003-10-01 00:00:00 2003-12-31 00:00:00 0.9886188719071263
2004-01-01 00:00:00 2004-03-31 00:00:00 1.010016781549762
2004-04-01 00:00:00 2004-06-30 00:00:00 1.9796766498710963
2004-07-01 00:00:00 2004-09-30 00:00:00 1.9353285062895744
2004-10-01 00:00:00 2004-12-31 00:00:00 2.436401457271831
2005-01-01 00:00:00 2005-03-31 00:00:00 2.8682819302856957
2005-04-01 00:00:00 2005-06-30 00:00:00 1.5705443753047832
2005-07-01 00:00:00 2005-09-30 00:00:00 1.2948097233711906
2005-10-01 00:00:00 2005-12-31 00:00:00 1.4538784426525542
2006-01-01 00:00:00 2006-03-31 00:00:00 6.412596973302063


In [111]:
df_account['cum_yield'] = df_account['yield'].cumprod()

In [112]:
df_account

Unnamed: 0,start_date,end_date,stocks,yield,cum_yield
0,2002-01-01,2002-03-31,"[037750, 001000, 008080, 044960, 009337, 03325...",1.027372,1.027372
1,2002-04-01,2002-06-30,"[012280, 033190, 037750, 001000, 044960, 03663...",0.725277,0.745129
2,2002-07-01,2002-09-30,"[019430, 053110, 046320, 037110, 015390, 04457...",0.747776,0.5571892
3,2002-10-01,2002-12-31,"[045050, 019430, 053110, 037760, 052670, 05117...",0.957268,0.5333794
4,2003-01-01,2003-03-31,"[014047, 014045, 014040, 049790, 045050, 02474...",1.076659,0.5742678
5,2003-04-01,2003-06-30,"[045050, 038520, 015540, 003930, 014940, 06055...",1.312967,0.7539946
6,2003-07-01,2003-09-30,"[013000, 035400, 002930, 013005, 044490, 02367...",0.927533,0.6993546
7,2003-10-01,2003-12-31,"[035400, 028305, 013000, 023670, 025460, 01491...",0.988619,0.6913952
8,2004-01-01,2004-03-31,"[025460, 023670, 025950, 054530, 012620, 03680...",1.010017,0.6983207
9,2004-04-01,2004-06-30,"[025950, 039000, 025460, 011320, 023670, 06713...",1.979677,1.382449


In [113]:
pow(df_account.iloc[len(df_account) - 1]['cum_yield'], 1 / ( (3 * len(df_account)) / 12))

3.5978473668448836