### 상대모멘텀 
- 10개의 주식 데이터를 이용하여 투자 전략 테스팅
1. 월초부터 월말까지의 수정주가를 이용하여 월별 수익율
2. 월별 수익율이 높은 순서중 n개의 주식을 선택 
3. 해당하는 주식들을 매수 후 매도하여 수익율 계산

In [None]:
import pandas as pd 
from datetime import datetime
import numpy as np
import os 
import glob
import warnings

In [None]:
# warning message 제거 
warnings.filterwarnings('ignore')

In [None]:
# 월별 수익율을 계산하는 함수 생성 
def create_1M_rtn(_df, _ticker, _start = "2010-01-01", _col = 'Adj Close'):
    # 복사본 생성 
    result = _df.copy()
    # 컬럼에 Date가 포함되어있는지 확인 
    if 'Date' in result.columns:
        result = result.loc[result['Date'] >= _start, ['Date', _col]]
        # Date 컬럼의 데이터를 시계열로 변경
        result['Date'] = pd.to_datetime(result['Date'], format='%Y-%m-%d')
        result.set_index('Date', inplace=True)
    else:
        result.index = pd.to_datetime(result.index, inplace=True)
        result = result.loc[_start:, [_col]]
    # 기준 년월 컬럼을 생성
    result['STD-YM'] = result.index.strftime('%Y-%m')
    result['1m_rtn'] = 0
    result['CODE'] = _ticker
    # 기준 년월의 중복데이터를 제거하고 고유한 값들을 리스트로 생성
    ym_list = result['STD-YM'].unique()
    return result, ym_list

In [None]:
aapl = pd.read_csv('../../csv/AAPL.csv')

In [None]:
sample_aapl, ym_list = create_1M_rtn(aapl, 'AAPL')

In [None]:
sample_aapl.head()

In [None]:
ym_list

In [None]:
# 특정 경로에 있는 파일들의 목록을 로드 
os.listdir("./data")

files = glob.glob("./data/*.csv")

# 새로운 데이터프레임을 생성 
# 종목별 데이터프레임 
stock_df = pd.DataFrame()
# 월말 데이터프레임 
month_last_df = pd.DataFrame()

for file in files:
    folder, name = os.path.split(file)
    # print(folder, name)
    head, tail = os.path.splitext(name)
    # print(head, tail)
    # head는 create_1M_rtn 함수에 ticker 인자값으로 사용

    # 데이터 파일을 로드 
    read_df = pd.read_csv(file)

    # create_1M_rtn 함수를 호출 
    price_df, ym_list = create_1M_rtn(read_df, head)

    # 유니언 결합 (단순한 행 결합 함수)
    stock_df = pd.concat([stock_df, price_df], axis=0)

    # 월별 상태 모멘텀을 계산하기 위해 1개월간의 수익율 계산
    for ym in ym_list:
        flag = price_df['STD-YM'] == ym
        m_rtn = price_df.loc[flag,].iloc[-1, 0] / price_df.loc[flag,].iloc[0, 0]
        price_df.loc[flag, '1m_rtn'] = m_rtn
        data = price_df.loc[flag, ['CODE', '1m_rtn']].tail(1)
        month_last_df = pd.concat([month_last_df, data], axis=0)

In [None]:
month_last_df.head()

In [84]:
month_rtn_df = month_last_df.copy()

In [85]:
month_rtn_df.reset_index(inplace=True)

In [86]:
month_rtn_df = month_rtn_df.pivot_table(
    index = 'Date', 
    columns= 'CODE', 
    values= '1m_rtn'
)

In [87]:
month_rtn_df = month_rtn_df.rank(
    axis=1, 
    ascending=False, 
    method='max', 
    pct=True
)

In [88]:
# 상위 15% 종목만 선택 
# where(조건식, 거짓일때 변경될 데이터)
month_rtn_df =  month_rtn_df.where(month_rtn_df < 0.35, 0)

In [89]:
# 데이터 중에서 0이 아닌 데이터는 1로 변경 
month_rtn_df[month_rtn_df != 0] = 1

In [90]:
month_rtn_df

CODE,AAPL,AMZN,BND,GDX,GLD,GM,MSFT,SLV,SPY,USM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2010-01-29,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
2010-02-26,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
2010-03-31,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2010-04-30,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0
2010-05-28,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
2019-02-28,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
2019-03-29,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2019-04-30,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2019-05-31,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0


In [91]:
# CODE 값들을 유니크 데이터만 따로 출력 
stock_codes = stock_df['CODE'].unique()
stock_codes

array(['AAPL', 'AMZN', 'BND', 'GDX', 'GLD', 'GM', 'MSFT', 'SLV', 'SPY',
       'USM'], dtype=object)

In [92]:
month_rtn_df.loc["2010-01-29 00:00:00", month_rtn_df.iloc[0] >= 1].index

Index(['BND', 'GLD', 'SPY'], dtype='object', name='CODE')

In [93]:
sig_dict = dict()

for date in month_rtn_df.index:
    # print(date)
    ticker_list = list(
        month_rtn_df.loc[date, month_rtn_df.loc[date] >= 1].index)
    # print(ticker_list)
    sig_dict[date] = ticker_list

In [94]:
sig_dict

{Timestamp('2010-01-29 00:00:00'): ['BND', 'GLD', 'SPY'],
 Timestamp('2010-02-26 00:00:00'): ['AAPL', 'GDX', 'SPY'],
 Timestamp('2010-03-31 00:00:00'): ['AAPL', 'AMZN', 'USM'],
 Timestamp('2010-04-30 00:00:00'): ['AAPL', 'GDX', 'MSFT'],
 Timestamp('2010-05-28 00:00:00'): ['BND', 'GDX', 'GLD'],
 Timestamp('2010-06-30 00:00:00'): ['BND', 'GDX', 'GLD'],
 Timestamp('2010-07-30 00:00:00'): ['MSFT', 'SPY', 'USM'],
 Timestamp('2010-08-31 00:00:00'): ['GDX', 'GLD', 'SLV'],
 Timestamp('2010-09-30 00:00:00'): ['AAPL', 'AMZN', 'SLV'],
 Timestamp('2010-10-29 00:00:00'): ['AMZN', 'MSFT', 'SLV'],
 Timestamp('2010-11-30 00:00:00'): ['AMZN', 'GDX', 'SLV'],
 Timestamp('2010-12-31 00:00:00'): ['MSFT', 'SLV', 'USM'],
 Timestamp('2011-01-31 00:00:00'): ['AAPL', 'BND', 'SPY'],
 Timestamp('2011-02-28 00:00:00'): ['GDX', 'GLD', 'SLV'],
 Timestamp('2011-03-31 00:00:00'): ['AMZN', 'SLV', 'USM'],
 Timestamp('2011-04-29 00:00:00'): ['AMZN', 'GLD', 'SLV'],
 Timestamp('2011-05-31 00:00:00'): ['AAPL', 'BND', 'USM']

In [95]:
# 거래 내역컬럼을 생성하는 데이터프레임 생성하는 함수 
def create_trade_book(_df, _code):
    book = _df[_code].copy()
    book['STD-YM'] = book.index.strftime('%Y-%m')
    for c in _code:
        book['p'+c] = ""
        book['r'+c] = ""
    return book

In [96]:
stock_df.head()

Unnamed: 0_level_0,Adj Close,STD-YM,1m_rtn,CODE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-01-04,26.782711,2010-01,0,AAPL
2010-01-05,26.82901,2010-01,0,AAPL
2010-01-06,26.40226,2010-01,0,AAPL
2010-01-07,26.35346,2010-01,0,AAPL
2010-01-08,26.528664,2010-01,0,AAPL


In [97]:
stock_c_matrix = stock_df.reset_index().pivot_table(
    index='Date', 
    columns = 'CODE', 
    values= stock_df.columns[0]
)

In [98]:
stock_c_matrix

CODE,AAPL,AMZN,BND,GDX,GLD,GM,MSFT,SLV,SPY,USM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2010-01-04,26.782711,133.899994,60.611969,44.908779,109.800003,,24.525019,17.230000,93.675278,36.015179
2010-01-05,26.829010,134.690002,60.789135,45.341774,109.699997,,24.532942,17.510000,93.923241,35.998024
2010-01-06,26.402260,132.250000,60.766037,46.443077,111.510002,,24.382378,17.860001,93.989357,35.680672
2010-01-07,26.353460,130.000000,60.719822,46.217175,110.820000,,24.128809,17.889999,94.386139,35.208931
2010-01-08,26.528664,133.520004,60.781410,46.913723,111.370003,,24.295214,18.150000,94.700218,34.651424
...,...,...,...,...,...,...,...,...,...,...
2019-06-18,198.449997,1901.369995,82.397118,23.670000,127.120003,36.700001,135.160004,14.050000,290.984741,50.540001
2019-06-19,197.869995,1908.790039,82.676468,24.000000,127.889999,36.779999,135.690002,14.170000,291.641541,50.040001
2019-06-20,199.460007,1918.189941,82.806168,25.049999,131.110001,36.959999,136.949997,14.450000,294.427979,49.320000
2019-06-21,198.779999,1911.300049,82.576698,25.209999,131.979996,36.919998,136.970001,14.360000,294.000000,48.330002


In [99]:
book = create_trade_book(stock_c_matrix, stock_codes)

In [100]:
book.columns

Index(['AAPL', 'AMZN', 'BND', 'GDX', 'GLD', 'GM', 'MSFT', 'SLV', 'SPY', 'USM',
       'STD-YM', 'pAAPL', 'rAAPL', 'pAMZN', 'rAMZN', 'pBND', 'rBND', 'pGDX',
       'rGDX', 'pGLD', 'rGLD', 'pGM', 'rGM', 'pMSFT', 'rMSFT', 'pSLV', 'rSLV',
       'pSPY', 'rSPY', 'pUSM', 'rUSM'],
      dtype='object', name='CODE')

In [101]:
# 포지션을 생성 
for date, values in sig_dict.items():
    # print(date, values)
    for stock in values:
        book.loc[date, 'p'+stock] = 'ready'+stock

In [102]:
book[['pBND']].iloc[15:25]

CODE,pBND
Date,Unnamed: 1_level_1
2010-01-26,
2010-01-27,
2010-01-28,
2010-01-29,readyBND
2010-02-01,
2010-02-02,
2010-02-03,
2010-02-04,
2010-02-05,
2010-02-08,


In [103]:
# 거래 내역 추가 
def trading(_book, s_code):
    std_ym = ""
    buy_phase = False

    # 종목별로 순회
    for code in s_code:
        for i in _book.index:
            # 해당 종목코드의 포지션을 잡는다.
            if (_book.loc[i, 'p'+code] == "") & \
                (_book.shift().loc[i, 'p'+code] == "ready"+code):
                std_ym = book.loc[i, 'STD-YM']
                buy_phase = True
            # 해당 종목코드에서 신호가 잡혀있다면 매수 상태 유지 
            if (_book.loc[i, 'p'+code] == "") & \
                (_book.loc[i, 'STD-YM'] == std_ym) & \
                (buy_phase):
                _book.loc[i, 'p'+code] = 'buy'+code
            
            # std_ym, buy_phase 초기화
            if book.loc[i, 'p'+code] == "":
                std_ym = ""
                buy_phase = False
    return book
                

In [104]:
book = trading(book, stock_codes)

In [105]:
book['pAAPL'].value_counts()

pAAPL
             1368
buyAAPL       968
readyAAPL      48
Name: count, dtype: int64

In [106]:
# 수익율 계산 함수 
def multi_returns(_book, s_code):
    rtn = 1
    buy_dict = dict()
    sell_dict = dict()

    for i in _book.index:
        for code in s_code:
            # 매수 (p+code 2일전에 "" 1일전에 ready 오늘이 buy)
            if (_book.shift(2).loc[i, "p"+code] == "") & \
                (_book.shift(1).loc[i, 'p'+code] == "ready"+code) & \
                (_book.loc[i, 'p'+code] == "buy"+code):
                buy_dict[code] = book.loc[i, code]
                print(f"매수일 : {i}, 종목코드 : {code}, 매수가 : {buy_dict[code]}")
            # 매도 (1일 전의 pcode가 buy 오늘의 pcode가 "")
            elif (_book.shift(1).loc[i, "p"+code] == "buy"+code) & \
                (_book.loc[i, 'p'+code] == ""):
                sell_dict[code] = _book.loc[i, code]
                # 수익율 계산 
                rtn = sell_dict[code] / buy_dict[code]
                _book.loc[i, 'r'+code] = rtn
                print(f"매도일 : {i}, 종목코드 : {code}, 매도가 : {sell_dict[code]}, 수익율 : {rtn}")
            # buy_dict, sell_dict 데이터를 초기화
            if _book.loc[i, 'p'+code] == "":
                buy_dict[code] = 0
                sell_dict[code] = 0
    return _book


In [107]:
rtn_book = multi_returns(book, stock_codes)

매수일 : 2010-02-01 00:00:00, 종목코드 : BND, 매수가 : 61.280487
매수일 : 2010-02-01 00:00:00, 종목코드 : GLD, 매수가 : 108.349998
매수일 : 2010-02-01 00:00:00, 종목코드 : SPY, 매수가 : 90.145805
매수일 : 2010-03-01 00:00:00, 종목코드 : AAPL, 매수가 : 26.154476
매도일 : 2010-03-01 00:00:00, 종목코드 : BND, 매도가 : 61.585163, 수익율 : 1.0049718273289832
매수일 : 2010-03-01 00:00:00, 종목코드 : GDX, 매수가 : 42.019035
매도일 : 2010-03-01 00:00:00, 종목코드 : GLD, 매도가 : 109.43, 수익율 : 1.0099677159200318
매수일 : 2010-04-01 00:00:00, 종목코드 : AMZN, 매수가 : 131.809998
매도일 : 2010-04-01 00:00:00, 종목코드 : GDX, 매도가 : 43.675705, 수익율 : 1.0394266550862008
매도일 : 2010-04-01 00:00:00, 종목코드 : SPY, 매도가 : 97.770996, 수익율 : 1.0845873083056943
매수일 : 2010-04-01 00:00:00, 종목코드 : USM, 매수가 : 36.212452
매도일 : 2010-05-03 00:00:00, 종목코드 : AMZN, 매도가 : 137.490005, 수익율 : 1.043092383629351
매수일 : 2010-05-03 00:00:00, 종목코드 : GDX, 매수가 : 46.951374
매수일 : 2010-05-03 00:00:00, 종목코드 : MSFT, 매수가 : 24.56805
매도일 : 2010-05-03 00:00:00, 종목코드 : USM, 매도가 : 36.744232, 수익율 : 1.0146850039317967
매도일 : 2010-06-01 

In [108]:
# 누적 수익율 계산 함수 
def multi_acc_returns(_book, s_code):
    # 누적 수익율 변수 생성
    acc_rtn = 1
    for i in _book.index:
        count = 0
        rtn = 0
        for code in s_code:
            # 수익율 데이터가 존재하는 경우
            if _book.loc[i, 'r'+code]:
                count += 1
                rtn += _book.loc[i, "r"+code]
        if (rtn != 0) & (count != 0):
            acc_rtn *= (rtn / count)
            print(f"누적 매도일 : {i}, 매도 종목수 : {count}, 수익율 : {round(rtn/count, 2)}")
        _book.loc[i, 'acc_rtn'] = acc_rtn
    
    return _book, acc_rtn

In [109]:
multi_acc_returns(rtn_book, stock_codes)

누적 매도일 : 2010-03-01 00:00:00, 매도 종목수 : 2, 수익율 : 1.01
누적 매도일 : 2010-04-01 00:00:00, 매도 종목수 : 2, 수익율 : 1.06
누적 매도일 : 2010-05-03 00:00:00, 매도 종목수 : 2, 수익율 : 1.03
누적 매도일 : 2010-06-01 00:00:00, 매도 종목수 : 2, 수익율 : 1.05
누적 매도일 : 2010-08-02 00:00:00, 매도 종목수 : 3, 수익율 : 0.98
누적 매도일 : 2010-09-01 00:00:00, 매도 종목수 : 3, 수익율 : 0.93
누적 매도일 : 2010-10-01 00:00:00, 매도 종목수 : 2, 수익율 : 1.07
누적 매도일 : 2010-11-01 00:00:00, 매도 종목수 : 1, 수익율 : 1.08
누적 매도일 : 2010-12-01 00:00:00, 매도 종목수 : 1, 수익율 : 0.97
누적 매도일 : 2011-01-03 00:00:00, 매도 종목수 : 2, 수익율 : 1.11
누적 매도일 : 2011-02-01 00:00:00, 매도 종목수 : 3, 수익율 : 1.15
누적 매도일 : 2011-03-01 00:00:00, 매도 종목수 : 3, 수익율 : 1.01
누적 매도일 : 2011-04-01 00:00:00, 매도 종목수 : 2, 수익율 : 0.99
누적 매도일 : 2011-05-02 00:00:00, 매도 종목수 : 1, 수익율 : 0.94
누적 매도일 : 2011-06-01 00:00:00, 매도 종목수 : 3, 수익율 : 1.04
누적 매도일 : 2011-07-01 00:00:00, 매도 종목수 : 3, 수익율 : 1.0
누적 매도일 : 2011-08-01 00:00:00, 매도 종목수 : 3, 수익율 : 1.02
누적 매도일 : 2011-09-01 00:00:00, 매도 종목수 : 1, 수익율 : 0.96
누적 매도일 : 2011-10-03 00:00:00, 매도 종목수 : 3, 수익율 :

(CODE              AAPL         AMZN        BND        GDX         GLD  \
 Date                                                                    
 2010-01-04   26.782711   133.899994  60.611969  44.908779  109.800003   
 2010-01-05   26.829010   134.690002  60.789135  45.341774  109.699997   
 2010-01-06   26.402260   132.250000  60.766037  46.443077  111.510002   
 2010-01-07   26.353460   130.000000  60.719822  46.217175  110.820000   
 2010-01-08   26.528664   133.520004  60.781410  46.913723  111.370003   
 ...                ...          ...        ...        ...         ...   
 2019-06-18  198.449997  1901.369995  82.397118  23.670000  127.120003   
 2019-06-19  197.869995  1908.790039  82.676468  24.000000  127.889999   
 2019-06-20  199.460007  1918.189941  82.806168  25.049999  131.110001   
 2019-06-21  198.779999  1911.300049  82.576698  25.209999  131.979996   
 2019-06-24  199.169998  1907.953857  82.726349  25.703501  133.501907   
 
 CODE               GM        MSFT  