In [409]:
import os
import pandas as pd
import numpy as np
import datetime
import glob

In [410]:
# 파일 목록 불러오기
files = glob.glob('./csv/*.csv')
files

['./csv\\AAPL.csv', './csv\\AMZN.csv', './csv\\BND.csv']

In [411]:
# 빈 데이터프레임 생성 컬럼만 지정
month_last_df = pd.DataFrame(columns=['Date', 'CODE', '1M_RET'])
month_last_df

Unnamed: 0,Date,CODE,1M_RET


In [412]:
# 종목 데이터프레임 생성
stock_df = pd.DataFrame(columns=['Date', 'CODE', 'Adj Close'])

In [413]:
for file in files:
    print('%s <DIR>'%file)

./csv\AAPL.csv <DIR>
./csv\AMZN.csv <DIR>
./csv\BND.csv <DIR>


In [414]:
for file in files:
    if os.path.isdir(file):
        print('%s <DIR>'%file)
    else:
        folder, name = os.path.split(file)
        head, tail = os.path.splitext(name)
        print(folder, name, head, tail)
        read_df = pd.read_csv(file)

        print(read_df.head(1))


        

./csv AAPL.csv AAPL .csv
         Date      Open      High       Low     Close  Adj Close       Volume
0  1980-12-12  0.513393  0.515625  0.513393  0.513393   0.410525  117258400.0
./csv AMZN.csv AMZN .csv
         Date    Open  High       Low     Close  Adj Close    Volume
0  1997-05-15  2.4375   2.5  1.927083  1.958333   1.958333  72156000
./csv BND.csv BND .csv
         Date   Open       High   Low      Close  Adj Close  Volume
0  2007-04-10  75.07  75.260002  75.0  75.239998  51.523693   35000


In [415]:
def data_preprocessing(sample, ticker, base_date):
    ## sample = AAPL.csv 읽은 후 데이터프레임화
    ## ticker = AAPL
    ## base_date = '2010-01-01'
    sample['CODE'] = ticker #종목코드 추가
    sample = sample[sample['Date'] >= base_date][['Date', 'CODE', 'Adj Close']]
    sample.reset_index(inplace=True, drop=True) 
    ## inplace=True 기준이 되는 데이터프레임을 수정
    ## drop=True 기존 인덱스를 삭제
    
    # 기준년월
    sample["STD-YM"] = sample['Date'].map(lambda x : \
        datetime.datetime.strptime(x, '%Y-%m-%d').strftime("%Y-%m"))
    # 수익률
    sample["1M_RET"] = 0.0      
    
    # 기준년월의 중복 데이터를 삭제한 리스트
    ym_keys = list(sample["STD-YM"].unique())

    return sample, ym_keys

In [416]:
df = pd.read_csv("./csv/AAPL.csv")
price_df, ym_keys = data_preprocessing(df, 'AAPL', '2010-01-01')
print(price_df)
print(ym_keys)

            Date  CODE   Adj Close   STD-YM  1M_RET
0     2010-01-04  AAPL   26.782711  2010-01     0.0
1     2010-01-05  AAPL   26.829010  2010-01     0.0
2     2010-01-06  AAPL   26.402260  2010-01     0.0
3     2010-01-07  AAPL   26.353460  2010-01     0.0
4     2010-01-08  AAPL   26.528664  2010-01     0.0
...          ...   ...         ...      ...     ...
2379  2019-06-18  AAPL  198.449997  2019-06     0.0
2380  2019-06-19  AAPL  197.869995  2019-06     0.0
2381  2019-06-20  AAPL  199.460007  2019-06     0.0
2382  2019-06-21  AAPL  198.779999  2019-06     0.0
2383  2019-06-24  AAPL  199.169998  2019-06     0.0

[2384 rows x 5 columns]
['2010-01', '2010-02', '2010-03', '2010-04', '2010-05', '2010-06', '2010-07', '2010-08', '2010-09', '2010-10', '2010-11', '2010-12', '2011-01', '2011-02', '2011-03', '2011-04', '2011-05', '2011-06', '2011-07', '2011-08', '2011-09', '2011-10', '2011-11', '2011-12', '2012-01', '2012-02', '2012-03', '2012-04', '2012-05', '2012-06', '2012-07', '2012-08'

In [417]:
def create_trade_book(sample, sample_codes):
    book = pd.DataFrame()
    book = sample[sample_codes]
    book["STD-YM"] = book.index.map(lambda x : \
        datetime.datetime.strptime(x, "%Y-%m-%d").strftime("%Y-%m"))
    for i in sample_codes:
        book['p '+i] = ""
        book["r "+i] = ""
    return book

In [418]:
## trading 함수 생성
def tradings(book, s_codes):
    std_ym = ""
    buy_phase = ""
    for i in s_codes:
        print(i)
        # 종목코드의 인덱스 반복
        for j in book.index:
            if book.loc[j, "p "+i] == "" and book.shift(1).loc[j, "p "+i] == "ready " + i:
                std_ym = book.loc[j, "STD-YM"]
                buy_phase = True
            if book.loc[j, "p "+i] == "" and book.loc[j, "STD-YM"] == std_ym and buy_phase == True:
                book.loc[j, "p "+i] = "buy " + i
                print(j)
            
            if book.loc[j, "p "+i] == "":
                std_ym = None
                buy_phase = False
    return book


In [419]:
def multi_returns(book, s_codes):
    rtn = 1.0
    buy_dict = {}
    num = len(s_codes)
    sell_dict = {}

    for i in book.index:
        for s in s_codes:
            if book.loc[i, 'p '+s] == "buy " + s and \
            book.shift(1).loc[i, "p "+s] == "ready " + s and \
            book.shift(2).loc[i, 'p '+s] == "":
                buy_dict[s] = book.loc[i, s]
                print("진입일 : ", i, '종목코드 : ', s, 'long 진입 가격 : ', buy_dict[s])
            elif book.loc[i, "p "+s] == "" and \
                book.shift(1).loc[i, "p "+s] == "buy "+s:
                sell_dict[s] = book.loc[i, s]
                #손익 계산
                rtn = (sell_dict[s] / buy_dict[s]) - 1
                book.loc[i, "r "+s] =rtn
                print("개별 청산일 : ", i, "종목코드 : ", s, "long 진입 가격 : ", \
                   buy_dict[s], "long 청산 가격 : ", sell_dict[s], "return : ", round(rtn*100, 2))
            
            if book.loc[i, "p " + s] == "":
                buy_dict[s] = 0.0
                sell_dict[s] = 0.0

    acc_rtn = 1.0
    for i in book.index:
        rtn = 0.0
        count = 0
        for s in s_codes:
            if book.loc[i, "p "+s] == "" and \
                book.shift(1).loc[i, "p "+s] == "buy "+s:
                count += 1  #count = count + 1
                rtn += book.loc[i, "r "+s]
        if (rtn != 0.0) and (count != 0):
            acc_rtn *= (rtn/count) + 1
            print("누적 청산일 : ", i, "청산 종목 수 : ", count, \
                "청산 수익율 : ", round((rtn/count), 4), "누적 수익율 : ", round(acc_rtn, 4))
            book.loc[i, "acc_rtn"] = acc_rtn
    print("누적 수익율 : ", round(acc_rtn, 4))

In [420]:
for file in files:
    if os.path.isdir(file):
        print('%s <DIR>'%file)
    else:
        ## file = "./csv\AAPL.csv"
        ## split() -> folder = "./csv", name = "AAPL.csv"
        ## splitext() - > head = "AAPL", tail = ".csv"
        folder, name = os.path.split(file)
        head, tail = os.path.splitext(name)
        # print(folder, name, head, tail)
        read_df = pd.read_csv(file)

        # print(read_df.head(1))
        price_df, ym_keys = data_preprocessing(read_df, head, "2010-01-01")

        stock_df = pd.concat([stock_df, price_df[["Date", "CODE", "Adj Close"]]])

        for ym in ym_keys:
            ## 기준년월 마지막 종가 / 기준년월 첫 종가
            # m_ret이라는 변수를 만들어서
            # ym = "2020-01"
            # price_df["STD-YM"] == "2020-01" -> STD-YM의 값이 2020-01이면 True 아니면 False
            # price_df[price_df["STD-YM"] == "2020-01"] -> []안의 조건식이 True 출력 False 미출력(필터)
            # price_df[price_df["STD-YM"] == "2020-01"].index[-1] -> 
            # STD-YM의 값이 2020-01인 값들 중에 가장 마지막에 있는 인덱스의 값
            # price_df.loc[index, column] -> 
            # index -> price_df[price_df["STD-YM"] == "2020-01"].index[-1]
            # column -> "Adj Close"
            # price_df.loc[price_df[price_df["STD-YM"] == "2020-01"].index[-1], 'Adj Close']
            # 결과적으로 price_df에 있는 STD-YM컬럼의 값이 
            # 2020-01인 행들 중에 가장 마지막에 있는 행의 Adj Close 값
            m_ret = price_df.loc[price_df[price_df["STD-YM"] == ym].index[-1], 'Adj Close'] \
                / price_df.loc[price_df[price_df["STD-YM"] == ym].index[0], 'Adj Close']
            price_df.loc[price_df['STD-YM'] == ym, ['1M_RET']] = m_ret
            
            month_last_df = month_last_df.append(\
                price_df.loc[price_df[price_df["STD-YM"] == ym].index[-1], ['Date', 'CODE', '1M_RET']])

In [421]:
month_ret_df = month_last_df.pivot('Date', 'CODE', '1M_RET')
month_ret_df

CODE,AAPL,AMZN,BND
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01-29,0.897435,0.936595,1.011692
2010-02-26,1.050789,0.996046,1.003909
2010-03-31,1.124456,1.090172,0.996728
2010-04-30,1.106454,1.040134,1.011023
2010-05-28,0.964445,0.912503,1.011821
...,...,...,...
2019-02-28,1.044274,1.008363,1.001883
2019-03-29,1.085615,1.065214,1.022547
2019-04-30,1.049310,1.061917,1.003719
2019-05-31,0.834810,0.928617,1.018200


In [422]:
# 투자종목 선택할 rank
month_ret_df = month_ret_df.rank(axis=1, ascending=False, method="max", pct=True)
month_ret_df


CODE,AAPL,AMZN,BND
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01-29,1.000000,0.666667,0.333333
2010-02-26,0.333333,1.000000,0.666667
2010-03-31,0.333333,0.666667,1.000000
2010-04-30,0.333333,0.666667,1.000000
2010-05-28,0.666667,1.000000,0.333333
...,...,...,...
2019-02-28,0.333333,0.666667,1.000000
2019-03-29,0.333333,0.666667,1.000000
2019-04-30,0.666667,0.333333,1.000000
2019-05-31,1.000000,0.666667,0.333333


In [423]:
# 상위 40%에 드는 종목을 체크
month_ret_df = month_ret_df.where(month_ret_df < 0.4, np.nan).fillna(0)
month_ret_df[month_ret_df != 0] = 1
month_ret_df

CODE,AAPL,AMZN,BND
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01-29,0.0,0.0,1.0
2010-02-26,1.0,0.0,0.0
2010-03-31,1.0,0.0,0.0
2010-04-30,1.0,0.0,0.0
2010-05-28,0.0,0.0,1.0
...,...,...,...
2019-02-28,1.0,0.0,0.0
2019-03-29,1.0,0.0,0.0
2019-04-30,0.0,1.0,0.0
2019-05-31,0.0,0.0,1.0


In [424]:
stock_list = list(stock_df["CODE"].unique())
stock_list

['AAPL', 'AMZN', 'BND']

In [425]:
stock_c_matrix.head(1)

CODE,AAPL,AMZN,BND
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01-04,26.782711,133.899994,60.611969


In [426]:
# 비어있는 딕셔너리 형태 데이터
sig_dict = dict()
for date in month_ret_df.index:
    ticker_list = list(month_ret_df.loc[date, month_ret_df.loc[date, :] >= 1.0].index)
    # 날짜별로 종목코드 저장
    sig_dict[date] = ticker_list
stock_c_matrix = stock_df.pivot('Date', 'CODE', 'Adj Close')
book = create_trade_book(stock_c_matrix, stock_list)
book.head()

CODE,AAPL,AMZN,BND,STD-YM,p AAPL,r AAPL,p AMZN,r AMZN,p BND,r BND
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2010-01-04,26.782711,133.899994,60.611969,2010-01,,,,,,
2010-01-05,26.82901,134.690002,60.789135,2010-01,,,,,,
2010-01-06,26.40226,132.25,60.766037,2010-01,,,,,,
2010-01-07,26.35346,130.0,60.719822,2010-01,,,,,,
2010-01-08,26.528664,133.520004,60.78141,2010-01,,,,,,


In [427]:
for date, values in sig_dict.items():
    for stock in values:
        book.loc[date, "p "+stock] = "ready " + stock

## 반복문이 처음 시작하는 경우
## date = 2010-01-29
## values = ['BND']
## stock = 'BND'
## bool.loc["2010-01-29", "p BND"] = "readyBND"
book.value_counts("p AAPL")

p AAPL
              2345
ready AAPL      39
dtype: int64

In [428]:
stock_list

['AAPL', 'AMZN', 'BND']

In [429]:
book = tradings(book, stock_list)

AAPL
2010-03-01
2010-03-02
2010-03-03
2010-03-04
2010-03-05
2010-03-08
2010-03-09
2010-03-10
2010-03-11
2010-03-12
2010-03-15
2010-03-16
2010-03-17
2010-03-18
2010-03-19
2010-03-22
2010-03-23
2010-03-24
2010-03-25
2010-03-26
2010-03-29
2010-03-30
2010-04-01
2010-04-05
2010-04-06
2010-04-07
2010-04-08
2010-04-09
2010-04-12
2010-04-13
2010-04-14
2010-04-15
2010-04-16
2010-04-19
2010-04-20
2010-04-21
2010-04-22
2010-04-23
2010-04-26
2010-04-27
2010-04-28
2010-04-29
2010-05-03
2010-05-04
2010-05-05
2010-05-06
2010-05-07
2010-05-10
2010-05-11
2010-05-12
2010-05-13
2010-05-14
2010-05-17
2010-05-18
2010-05-19
2010-05-20
2010-05-21
2010-05-24
2010-05-25
2010-05-26
2010-05-27
2010-05-28
2011-02-01
2011-02-02
2011-02-03
2011-02-04
2011-02-07
2011-02-08
2011-02-09
2011-02-10
2011-02-11
2011-02-14
2011-02-15
2011-02-16
2011-02-17
2011-02-18
2011-02-22
2011-02-23
2011-02-24
2011-02-25
2011-03-01
2011-03-02
2011-03-03
2011-03-04
2011-03-07
2011-03-08
2011-03-09
2011-03-10
2011-03-11
2011-03-14
2011-

In [437]:
book[book["p AAPL"] != ""]

CODE,AAPL,AMZN,BND,STD-YM,p AAPL,r AAPL,p AMZN,r AMZN,p BND,r BND,acc_rtn
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2010-02-26,25.607582,118.400002,61.520061,2010-02,ready AAPL,,,,buy BND,,
2010-03-01,26.154476,124.540001,61.585163,2010-03,buy AAPL,,,,,0.00497183,1.004972
2010-03-02,26.136955,125.529999,61.631660,2010-03,buy AAPL,,,,,,
2010-03-03,26.197020,125.889999,61.639446,2010-03,buy AAPL,,,,,,
2010-03-04,26.369726,128.529999,61.685913,2010-03,buy AAPL,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
2019-04-25,204.492508,1902.250000,80.258209,2019-04,buy AAPL,,,,,,
2019-04-26,203.516266,1950.630005,80.436928,2019-04,buy AAPL,,,,,,
2019-04-29,203.825073,1938.430054,80.268127,2019-04,buy AAPL,,,,,,
2019-04-30,199.900192,1926.520020,80.387283,2019-04,buy AAPL,,ready AMZN,,,,


In [None]:
multi_returns(book, stock_list)

61.280487
26.154476000000003
62.79006999999999
120.07
43.179466
180.130005
66.480408
209.490005
49.652069
68.38459
211.979996
49.622044
68.562042
51.464207
198.050003
71.17840600000001
229.32000699999998
75.94046
252.00999500000003
71.91355899999999
250.330002
71.360786
54.46909
266.880005
58.37959300000001
320.950012
71.591667
70.937912
68.716904
332.390015
88.105499
342.380005
72.65773
100.766525
326.0
73.617195
364.47000099999997
119.88951899999999
74.701561
422.869995
73.615601
520.719971
74.202827
104.04441800000001
683.849976
93.70474200000001
77.698959
100.928177
836.73999
106.680237
832.349976
135.028976
891.51001
76.933464
145.53784199999998
77.825485
1103.680054
76.91954
1582.26001
187.387604
1713.780029
225.72489900000002
76.678947
1772.359985
78.23925799999999
1626.22998
174.29878200000002
1911.52002
82.087845
누적 청산일 :  2010-03-01 청산 종목 수 :  1 청산 수익율 :  0.005 누적 수익율 :  1.005
누적 청산일 :  2010-06-01 청산 종목 수 :  1 청산 수익율 :  0.248 누적 수익율 :  1.2543
누적 청산일 :  2010-08-02 청산 종목 수 :  1