In [125]:
import statistics
import numpy as np
import pandas as pd
from marcap import marcap_data
from datetime import datetime, timedelta

In [4]:
# 백테스트 기간의 가격 정보를 가져온다.
df_price = marcap_data('2000-01-01', '2019-12-31')

In [191]:
quarter_start_month = [1, 4, 7, 10]
days = []
year = 2002
for i in range(30):
    month = quarter_start_month[i % 4]
    start_date = datetime(year, month, 1)
    
    if month == 10:
        year += 1
    
    month = quarter_start_month[(i + 1) % 4]
    end_date = datetime(year, month, 1) - timedelta(days=1)
    
    day = {'start_date':start_date, 'end_date':end_date}
    days.append(day)

In [192]:
df_account = pd.DataFrame(columns=['start_date', 'end_date'])

In [193]:
df_account = df_account.append(days)

In [194]:
df_account

Unnamed: 0,start_date,end_date
0,2002-01-01,2002-03-31
1,2002-04-01,2002-06-30
2,2002-07-01,2002-09-30
3,2002-10-01,2002-12-31
4,2003-01-01,2003-03-31
5,2003-04-01,2003-06-30
6,2003-07-01,2003-09-30
7,2003-10-01,2003-12-31
8,2004-01-01,2004-03-31
9,2004-04-01,2004-06-30


In [176]:
df_year_price = df_year_price.reset_index()

In [177]:
df_stock_yield = df_year_price.groupby("Code").agg({'Open': 'first', 'Close': 'last', 'Date': 'last'})

In [180]:
df_stock_yield['diff'] = df_stock_yield.apply(lambda x: (start_date - x['Date']).days, axis=1)

In [181]:
df_stock_yield = df_stock_yield[df_stock_yield['diff'] < 7]

Unnamed: 0_level_0,Open,Close,Date,diff
Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
000010,2660.0,2160.0,2004-06-17,-77
000020,7000.0,6920.0,2004-06-30,-90
000040,445.0,380.0,2004-06-30,-90
000050,51000.0,49000.0,2004-06-30,-90
000060,16950.0,15800.0,2004-06-30,-90
...,...,...,...,...
076170,2540.0,1850.0,2004-06-30,-90
076850,5280.0,5470.0,2004-06-30,-90
077280,17200.0,12450.0,2004-06-30,-90
077970,5000.0,3075.0,2004-06-30,-90


In [196]:
def get_stocks(start_date):
    print(start_date)
    year_start_date = start_date - timedelta(days=365)
    year_end_date = start_date
    df_year_price = df_price[(df_price.index > year_start_date) & (df_price.index <= year_end_date)].reset_index()
    
    # 종목별로 과거 1년의 수익률 구하기
    df_stock_yield = df_year_price.groupby("Code").agg({'Open': 'first', 'Close': 'last', 'Date': 'last'}).reset_index()
    df_stock_yield['diff'] = df_stock_yield.apply(lambda x: (start_date - x['Date']).days, axis=1)
    df_stock_yield = df_stock_yield[df_stock_yield['diff'] < 7]
    
    df_stock_yield['yield'] = df_stock_yield['Close'] / df_stock_yield['Open']
    df_stock_yield = df_stock_yield.sort_values(by=['yield'], ascending=False)
    
    return df_stock_yield['Code'].tolist()[:30]

In [197]:
# 연도별 종목을 구한다.
df_account['stocks'] = df_account.apply(lambda x: get_stocks(x.start_date), axis=1)

2002-01-01 00:00:00
2002-04-01 00:00:00
2002-07-01 00:00:00
2002-10-01 00:00:00
2003-01-01 00:00:00
2003-04-01 00:00:00
2003-07-01 00:00:00
2003-10-01 00:00:00
2004-01-01 00:00:00
2004-04-01 00:00:00
2004-07-01 00:00:00
2004-10-01 00:00:00
2005-01-01 00:00:00
2005-04-01 00:00:00
2005-07-01 00:00:00
2005-10-01 00:00:00
2006-01-01 00:00:00
2006-04-01 00:00:00
2006-07-01 00:00:00
2006-10-01 00:00:00
2007-01-01 00:00:00
2007-04-01 00:00:00
2007-07-01 00:00:00
2007-10-01 00:00:00
2008-01-01 00:00:00
2008-04-01 00:00:00
2008-07-01 00:00:00
2008-10-01 00:00:00
2009-01-01 00:00:00
2009-04-01 00:00:00


In [198]:
df_account['yield'] = 0.0
for i, row in df_account.iterrows():
    start_date = df_account.iloc[i].start_date
    end_date = df_account.iloc[i].end_date
    df_year_price = df_price[(df_price.index >= start_date) & (df_price.index <= end_date)]
    
    stocks = row['stocks']
    yields = []
    
    for stock in stocks:
        df_stock_price = df_year_price[df_year_price['Code'] == stock]
        
        if len(df_stock_price) == 0:
            continue
            
        start_price = df_stock_price.iloc[0]['Open']
        end_price = df_stock_price.iloc[len(df_stock_price) - 1]['Close']

        yields.append(end_price / start_price)
    
    yearly_yield = statistics.mean(yields)
    df_account.at[i, 'yield'] = yearly_yield
    
    print(start_date, end_date, yearly_yield)

2002-01-01 00:00:00 2002-03-31 00:00:00 1.0954794608475742
2002-04-01 00:00:00 2002-06-30 00:00:00 0.593922185248745
2002-07-01 00:00:00 2002-09-30 00:00:00 0.7111834444794408
2002-10-01 00:00:00 2002-12-31 00:00:00 0.9276806549978956
2003-01-01 00:00:00 2003-03-31 00:00:00 0.79775509971497
2003-04-01 00:00:00 2003-06-30 00:00:00 1.2504638223678182
2003-07-01 00:00:00 2003-09-30 00:00:00 0.7778299835867479
2003-10-01 00:00:00 2003-12-31 00:00:00 1.048001480371237
2004-01-01 00:00:00 2004-03-31 00:00:00 1.1520445322665067
2004-04-01 00:00:00 2004-06-30 00:00:00 0.9711306110373626
2004-07-01 00:00:00 2004-09-30 00:00:00 0.8129400350842573
2004-10-01 00:00:00 2004-12-31 00:00:00 1.0735271470759928
2005-01-01 00:00:00 2005-03-31 00:00:00 1.316736338350419
2005-04-01 00:00:00 2005-06-30 00:00:00 1.0314717396956379
2005-07-01 00:00:00 2005-09-30 00:00:00 1.1719344777620213
2005-10-01 00:00:00 2005-12-31 00:00:00 0.9686384925607867
2006-01-01 00:00:00 2006-03-31 00:00:00 0.9343144105049322
20

In [199]:
df_account['cum_yield'] = df_account['yield'].cumprod()

In [200]:
df_account

Unnamed: 0,start_date,end_date,stocks,yield,cum_yield
0,2002-01-01,2002-03-31,"[002055, 015545, 012270, 002050, 003930, 00356...",1.095479,1.095479
1,2002-04-01,2002-06-30,"[036900, 008705, 002055, 003930, 001675, 01841...",0.593922,0.65063
2,2002-07-01,2002-09-30,"[013000, 004965, 013005, 001250, 008705, 01227...",0.711183,0.462717
3,2002-10-01,2002-12-31,"[004965, 008705, 000400, 013000, 011160, 01401...",0.927681,0.429254
4,2003-01-01,2003-03-31,"[008705, 004965, 017555, 004870, 000470, 00401...",0.797755,0.342439
5,2003-04-01,2003-06-30,"[004870, 017555, 020120, 004010, 012650, 01401...",1.250464,0.428208
6,2003-07-01,2003-09-30,"[020120, 000660, 009275, 009270, 010670, 06513...",0.77783,0.333073
7,2003-10-01,2003-12-31,"[000660, 053450, 009275, 025830, 013360, 00927...",1.048001,0.349061
8,2004-01-01,2004-03-31,"[000660, 003560, 009270, 017800, 027040, 00143...",1.152045,0.402134
9,2004-04-01,2004-06-30,"[004660, 025830, 004410, 009270, 009275, 01554...",0.971131,0.390524
