In [31]:
import os
import pandas as pd
import numpy as np
import datetime
import glob

In [32]:
# 파일 목록 불러오기
files = glob.glob('./csv/*.csv')
files

['./csv\\AAPL.csv', './csv\\AMZN.csv', './csv\\BND.csv']

In [33]:
# 빈 데이터프레임 생성 컬럼만 지정
month_last_df = pd.DataFrame(columns=['Date', 'CODE', '1M_RET'])
month_last_df

Unnamed: 0,Date,CODE,1M_RET


In [34]:
# 종목 데이터프레임 생성
stock_df = pd.DataFrame(columns=['Date', 'CODE', 'Adj Close'])

In [35]:
for file in files:
    if os.path.isdir(file):
        print('%s <DIR>'%file)
    else:
        folder, name = os.path.split(file)
        head, tail = os.path.splitext(name)
        print(folder, name, head, tail)
        read_df = pd.read_csv(file)

        print(read_df.head(1))


        

./csv AAPL.csv AAPL .csv
         Date      Open      High       Low     Close  Adj Close       Volume
0  1980-12-12  0.513393  0.515625  0.513393  0.513393   0.410525  117258400.0
./csv AMZN.csv AMZN .csv
         Date    Open  High       Low     Close  Adj Close    Volume
0  1997-05-15  2.4375   2.5  1.927083  1.958333   1.958333  72156000
./csv BND.csv BND .csv
         Date   Open       High   Low      Close  Adj Close  Volume
0  2007-04-10  75.07  75.260002  75.0  75.239998  51.523693   35000


In [36]:
def data_preprocessing(sample, ticker, base_date):
    sample['CODE'] = ticker #종목코드 추가
    sample = sample[sample['Date'] >= base_date][['Date', 'CODE', 'Adj Close']]
    sample.reset_index(inplace=True, drop=True) 
    ## inplace=True 기준이 되는 데이터프레임을 수정
    ## drop=True 기존 인덱스를 삭제
    
    # 기준년월
    sample["STD-YM"] = sample['Date'].map(lambda x : \
        datetime.datetime.strptime(x, '%Y-%m-%d').strftime("%Y-%m"))
    # 수익률
    sample["1M_RET"] = 0.0      
    
    # 기준년월의 중복 데이터를 삭제한 리스트
    ym_keys = list(sample["STD-YM"].unique())

    return sample, ym_keys

In [37]:
df = pd.read_csv("./csv/AAPL.csv")
price_df, ym_keys = data_preprocessing(df, 'AAPL', '2010-01-01')
print(price_df)
print(ym_keys)

            Date  CODE   Adj Close   STD-YM  1M_RET
0     2010-01-04  AAPL   26.782711  2010-01     0.0
1     2010-01-05  AAPL   26.829010  2010-01     0.0
2     2010-01-06  AAPL   26.402260  2010-01     0.0
3     2010-01-07  AAPL   26.353460  2010-01     0.0
4     2010-01-08  AAPL   26.528664  2010-01     0.0
...          ...   ...         ...      ...     ...
2379  2019-06-18  AAPL  198.449997  2019-06     0.0
2380  2019-06-19  AAPL  197.869995  2019-06     0.0
2381  2019-06-20  AAPL  199.460007  2019-06     0.0
2382  2019-06-21  AAPL  198.779999  2019-06     0.0
2383  2019-06-24  AAPL  199.169998  2019-06     0.0

[2384 rows x 5 columns]
['2010-01', '2010-02', '2010-03', '2010-04', '2010-05', '2010-06', '2010-07', '2010-08', '2010-09', '2010-10', '2010-11', '2010-12', '2011-01', '2011-02', '2011-03', '2011-04', '2011-05', '2011-06', '2011-07', '2011-08', '2011-09', '2011-10', '2011-11', '2011-12', '2012-01', '2012-02', '2012-03', '2012-04', '2012-05', '2012-06', '2012-07', '2012-08'

In [38]:
def create_trade_book(sample, sample_codes):
    book = pd.DataFrame()
    book = sample[sample_codes]
    book["STD-YM"] = book.index.map(lambda x : \
        datetime.datetime.strptime(x, "%Y-%m-%d").strftime("%Y-%m"))
    for i in sample_codes:
        book['p '+i] = ""
        book["r "+i] = ""
    return book

In [39]:
## trading 함수 생성
def tradings(book, s_codes):
    std_ym = ""
    buy_phase = ""
    for i in s_codes:
        print(i)
        # 종목코드의 인덱스 반복
        for j in book.index:
            if book.loc[j, "p "+i] == "" and book.shift(1).loc[j, "r "+i] == "ready " + i:
                std_ym = book.loc[i, "STD-YM"]
                buy_phase = True
            if book.loc[j, "p "+i] == "" and book.loc[j, "STD-YM"] == std_ym and buy_phase == True:
                book.loc[j, "p "+i] = "buy" + i
            
            if book.loc[j, "p "+i] == "":
                std_ym = None
                buy_phase = False
    return book


In [40]:
def multi_returns(book, s_codes):
    rtn = 1.0
    buy_dict = {}
    num = len(s_codes)
    sell_dict = {}

    for i in book.index:
        for s in s_codes:
            if book.loc[i, 'p '+s] == "buy" and \
                book.shift(1).loc[i, "p "+s] == "ready" and \
                    book.shift(2).loc[i, 'p '+s] == "":
                buy_dict[s] = book.loc[i, s]
                print("진입일 : ", i, '종목코드 : ', s, 'long 진입 가격 : ', buy_dict[s])
            elif book.loc[i, "p "+s] == "" and \
                book.shift(1).loc[i, "p "+s] == "buy "+s:
                sell_dict[s] = book.loc[i, s]
                #손익 계산
                rtn = (sell_dict[s] / buy_dict[s]) - 1
                book.loc[i, "r "+s] =rtn
                print("개별 청산일 : ", i, "종목코드 : ", s, "long 진입 가격 : ", \
                    buy_dict[s], "long 청산 가격 : ", sell_dict[s], "return : ", round(rtn*100, 2))
            
            if book.loc[i, "p " + s] == "":
                buy_dict[s] = 0.0
                sell_dict[s] = 0.0

    acc_rtn = 1.0
    for i in book.index:
        rtn = 0.0
        count = 0
        for s in s_codes:
            if book.loc[i, "p "+s] == "" and \
                book.shift(1).loc[i, "p "+s] == "buy"+s:
                count += 1  #count = count + 1
                rtn += book.loc[i, "r "+s]
        if (rtn != 0.0) and (count != 0):
            acc_rtn *= (rtn/count) + 1
            print("누적 청산일 : ", i, "청산 종목 수 : ", count, \
                "청산 수익율 : ", round((rtn/count), 4), "누적 수익율 : ", round(acc_rtn, 4))
            book.loc[i, "acc_rtn"] = acc_rtn
    print("누적 수익율 : ", round(acc_rtn, 4))

In [41]:
for file in files:
    if os.path.isdir(file):
        print('%s <DIR>'%file)
    else:
        folder, name = os.path.split(file)
        head, tail = os.path.splitext(name)
        # print(folder, name, head, tail)
        read_df = pd.read_csv(file)

        # print(read_df.head(1))
        price_df, ym_keys = data_preprocessing(read_df, head, "2010-01-01")

        stock_df = pd.concat([stock_df, price_df])

        for ym in ym_keys:
            ## 기준년월 마지막 종가 / 기준년월 첫 종가
            m_ret = price_df.loc[price_df[price_df["STD-YM"] == ym].index[-1], 'Adj Close'] \
                / price_df.loc[price_df[price_df["STD-YM"] == ym].index[0], 'Adj Close']
            price_df.loc[price_df['STD-YM'] == ym, ['1M_RET']] = m_ret
            
            month_last_df = month_last_df.append(\
                price_df.loc[price_df[price_df["STD-YM"] == ym].index[-1], ['Date', 'CODE', '1M_RET']])

Date      2010-01-29
CODE            AAPL
1M_RET      0.897435
Name: 18, dtype: object
Date      2010-02-26
CODE            AAPL
1M_RET       1.05079
Name: 37, dtype: object
Date      2010-03-31
CODE            AAPL
1M_RET       1.12446
Name: 60, dtype: object
Date      2010-04-30
CODE            AAPL
1M_RET       1.10645
Name: 81, dtype: object
Date      2010-05-28
CODE            AAPL
1M_RET      0.964445
Name: 101, dtype: object
Date      2010-06-30
CODE            AAPL
1M_RET      0.964345
Name: 123, dtype: object
Date      2010-07-30
CODE            AAPL
1M_RET       1.03529
Name: 144, dtype: object
Date      2010-08-31
CODE            AAPL
1M_RET      0.928395
Name: 166, dtype: object
Date      2010-09-30
CODE            AAPL
1M_RET        1.1335
Name: 187, dtype: object
Date      2010-10-29
CODE            AAPL
1M_RET       1.06534
Name: 208, dtype: object
Date      2010-11-30
CODE            AAPL
1M_RET       1.02291
Name: 229, dtype: object
Date      2010-12-31
CODE           

In [42]:
month_last_df.head(10)

Unnamed: 0,Date,CODE,1M_RET
18,2010-01-29,AAPL,0.897435
37,2010-02-26,AAPL,1.050789
60,2010-03-31,AAPL,1.124456
81,2010-04-30,AAPL,1.106454
101,2010-05-28,AAPL,0.964445
123,2010-06-30,AAPL,0.964345
144,2010-07-30,AAPL,1.035294
166,2010-08-31,AAPL,0.928395
187,2010-09-30,AAPL,1.133504
208,2010-10-29,AAPL,1.06534
