In [1]:
import statistics
import numpy as np
import pandas as pd
from marcap import marcap_data
from datetime import datetime, timedelta

In [2]:
# 백테스트 기간의 가격 정보를 가져온다.
df_price = marcap_data('2000-01-01', '2019-12-31')

In [28]:
quarter_start_month = [1, 4, 7, 10]
days = []
year = 2002
for i in range(60):
    month = quarter_start_month[i % 4]
    start_date = datetime(year, month, 1)
    
    if month == 10:
        year += 1
    
    month = quarter_start_month[(i + 1) % 4]
    end_date = datetime(year, month, 1) - timedelta(days=1)
    
    day = {'start_date':start_date, 'end_date':end_date}
    days.append(day)

In [29]:
df_account = pd.DataFrame(columns=['start_date', 'end_date'])

In [30]:
df_account = df_account.append(days)

In [31]:
df_account

Unnamed: 0,start_date,end_date
0,2002-01-01,2002-03-31
1,2002-04-01,2002-06-30
2,2002-07-01,2002-09-30
3,2002-10-01,2002-12-31
4,2003-01-01,2003-03-31
5,2003-04-01,2003-06-30
6,2003-07-01,2003-09-30
7,2003-10-01,2003-12-31
8,2004-01-01,2004-03-31
9,2004-04-01,2004-06-30


In [37]:
def get_stocks(start_date):
    print(start_date)
    year_start_date = start_date - timedelta(days=365)
    year_end_date = start_date - timedelta(days=30)
    df_year_price = df_price[(df_price.index > year_start_date) & (df_price.index <= year_end_date)].reset_index()
    
    # 종목별로 과거 1년의 수익률 구하기 (마지막 1달은 제외)
    df_stock_yield = df_year_price.groupby("Code").agg({'Open': 'first', 'Close': 'last', 'Date': 'last', 'Marcap': 'last'}).reset_index()
    
    df_stock_yield['diff'] = df_stock_yield.apply(lambda x: (year_end_date - x['Date']).days, axis=1)
    df_stock_yield = df_stock_yield[df_stock_yield['diff'] < 7]
    
    # 시총 하위 50%만 필터링
    df_stock_yield = df_stock_yield.sort_values(by=['Marcap'])
    df_stock_yield = df_stock_yield[:int(len(df_stock_yield)*0.3)]
    
    df_stock_yield['yield'] = df_stock_yield['Close'] / df_stock_yield['Open']
    df_stock_yield = df_stock_yield.sort_values(by=['yield'], ascending=False)
    
    return df_stock_yield['Code'].tolist()[:30]

In [38]:
# 종목을 구한다.
df_account['stocks'] = df_account.apply(lambda x: get_stocks(x.start_date), axis=1)

2002-01-01 00:00:00
2002-04-01 00:00:00
2002-07-01 00:00:00
2002-10-01 00:00:00
2003-01-01 00:00:00
2003-04-01 00:00:00
2003-07-01 00:00:00
2003-10-01 00:00:00
2004-01-01 00:00:00
2004-04-01 00:00:00
2004-07-01 00:00:00
2004-10-01 00:00:00
2005-01-01 00:00:00
2005-04-01 00:00:00
2005-07-01 00:00:00
2005-10-01 00:00:00
2006-01-01 00:00:00
2006-04-01 00:00:00
2006-07-01 00:00:00
2006-10-01 00:00:00
2007-01-01 00:00:00
2007-04-01 00:00:00
2007-07-01 00:00:00
2007-10-01 00:00:00
2008-01-01 00:00:00
2008-04-01 00:00:00
2008-07-01 00:00:00
2008-10-01 00:00:00
2009-01-01 00:00:00
2009-04-01 00:00:00
2009-07-01 00:00:00
2009-10-01 00:00:00
2010-01-01 00:00:00
2010-04-01 00:00:00
2010-07-01 00:00:00
2010-10-01 00:00:00
2011-01-01 00:00:00
2011-04-01 00:00:00
2011-07-01 00:00:00
2011-10-01 00:00:00
2012-01-01 00:00:00
2012-04-01 00:00:00
2012-07-01 00:00:00
2012-10-01 00:00:00
2013-01-01 00:00:00
2013-04-01 00:00:00
2013-07-01 00:00:00
2013-10-01 00:00:00
2014-01-01 00:00:00
2014-04-01 00:00:00


In [39]:
df_account['yield'] = 0.0
for i, row in df_account.iterrows():
    start_date = df_account.iloc[i].start_date
    end_date = df_account.iloc[i].end_date
    df_year_price = df_price[(df_price.index >= start_date) & (df_price.index <= end_date)]
    
    stocks = row['stocks']
    yields = []
    
    for stock in stocks:
        df_stock_price = df_year_price[df_year_price['Code'] == stock]
        
        if len(df_stock_price) == 0:
            continue
            
        start_price = df_stock_price.iloc[0]['Open']
        end_price = df_stock_price.iloc[len(df_stock_price) - 1]['Close']

        yields.append(end_price / start_price)
    
    yearly_yield = statistics.mean(yields)
    df_account.at[i, 'yield'] = yearly_yield
    
    print(start_date, end_date, yearly_yield)

2002-01-01 00:00:00 2002-03-31 00:00:00 1.0738154465739655
2002-04-01 00:00:00 2002-06-30 00:00:00 0.8335991869420161
2002-07-01 00:00:00 2002-09-30 00:00:00 0.9450540516937133
2002-10-01 00:00:00 2002-12-31 00:00:00 0.814479588084073
2003-01-01 00:00:00 2003-03-31 00:00:00 0.8022455849822105
2003-04-01 00:00:00 2003-06-30 00:00:00 1.3384097513196356
2003-07-01 00:00:00 2003-09-30 00:00:00 0.9581467945207667
2003-10-01 00:00:00 2003-12-31 00:00:00 1.018510276089439
2004-01-01 00:00:00 2004-03-31 00:00:00 1.0192145592142603
2004-04-01 00:00:00 2004-06-30 00:00:00 1.061135179111703
2004-07-01 00:00:00 2004-09-30 00:00:00 0.9403372920555642
2004-10-01 00:00:00 2004-12-31 00:00:00 1.066100403848856
2005-01-01 00:00:00 2005-03-31 00:00:00 1.1434444830854202
2005-04-01 00:00:00 2005-06-30 00:00:00 1.2071821031237722
2005-07-01 00:00:00 2005-09-30 00:00:00 1.119953795286427
2005-10-01 00:00:00 2005-12-31 00:00:00 1.1761424307238468
2006-01-01 00:00:00 2006-03-31 00:00:00 1.0688474855173227
20

In [40]:
df_account['cum_yield'] = df_account['yield'].cumprod()

In [41]:
df_account

Unnamed: 0,start_date,end_date,stocks,yield,cum_yield
0,2002-01-01,2002-03-31,"[015545, 015540, 020490, 014280, 009160, 02174...",1.073815,1.073815
1,2002-04-01,2002-06-30,"[012270, 002055, 015545, 025620, 002050, 01554...",0.833599,0.895132
2,2002-07-01,2002-09-30,"[013000, 004965, 008705, 013005, 012270, 00205...",0.945054,0.845948
3,2002-10-01,2002-12-31,"[004965, 008705, 001600, 004960, 001675, 01755...",0.81448,0.689007
4,2003-01-01,2003-03-31,"[008705, 004965, 003905, 003900, 017555, 00763...",0.802246,0.552753
5,2003-04-01,2003-06-30,"[017555, 020120, 000300, 017550, 026260, 03197...",1.33841,0.73981
6,2003-07-01,2003-09-30,"[009275, 020120, 068875, 031970, 026260, 01067...",0.958147,0.708847
7,2003-10-01,2003-12-31,"[009275, 008320, 015545, 010670, 027040, 00149...",1.01851,0.721968
8,2004-01-01,2004-03-31,"[027040, 032610, 009275, 015545, 008400, 00840...",1.019215,0.73584
9,2004-04-01,2004-06-30,"[009275, 015545, 008400, 007195, 000725, 01567...",1.061135,0.780826
