In [59]:
# !pip install -q mpl_finance
# !pip install -q finance-datareader
# !pip install -q pandas_datareader

In [58]:
import pandas_datareader as data_reader
import FinanceDataReader as fdr
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import time
import os
import shutil
import multiprocessing
from mpl_finance import candlestick_ohlc

## Candle Chart image file 생성

In [60]:
df = pd.read_csv("data/stockcode.csv", index_col=0, encoding="cp949")
print(df.shape)
df.head()

(5646, 4)


Unnamed: 0,code,code_name,smarket,kospi200
0,20,동화약품,0,0
1,30,우리은행,0,1
2,40,KR모터스,0,0
3,50,경방,0,0
4,60,메리츠화재,0,0


In [61]:
df.smarket.value_counts()

3     2480
0     1489
10    1284
8      393
Name: smarket, dtype: int64

In [62]:
df_smarket = df[(df.smarket == 0) | (df.smarket == 10)]
print(df_smarket.shape)
df_smarket.head()

(2773, 4)


Unnamed: 0,code,code_name,smarket,kospi200
0,20,동화약품,0,0
1,30,우리은행,0,1
2,40,KR모터스,0,0
3,50,경방,0,0
4,60,메리츠화재,0,0


In [63]:
# tickers = df[df['kospi200'] == 1]['code'].values
tickers = df_smarket['code'].values

In [64]:
n1 = int(np.floor(len(tickers)*0.7))
n2 = int(np.floor(len(tickers)*0.2))
n3 = int(np.floor(len(tickers)*0.1))
n1, n2, n3

(1941, 554, 277)

In [65]:
train, val, test = tickers[:n1], tickers[n1:n1+n2], tickers[n1+n2:]
train

array(['000020', '000030', '000040', ..., '052710', '052770', '052790'],
      dtype=object)

In [66]:
# tmp = "/Users/ohyoungjea/Downloads/"

tmp = "C:\\Users\\trimu\\Downloads\\"

os.mkdir(tmp+"\\candles")
for path in [tmp+"candles\\train", tmp+"candles\\val", tmp+"candles\\test"]:
    try:
        shutil.rmtree(path)
    except:
        pass
    os.mkdir(path)
    os.mkdir(path+"\\ups")
    os.mkdir(path+"\\downs")
    os.mkdir(path+"\\flats")

In [67]:
def image_creation(ticker, up_ratio, down_ratio, window=180, future=7, print_y=False):
    plt.rcParams.update({'figure.max_open_warning': 0})
    
    df = fdr.DataReader(ticker)
    df['MA6'] = df['Close'].rolling(6).mean()
    df['MA20'] = df['Close'].rolling(20).mean()

    start_idx = 0
    seq = 0

    while start_idx+window+future < df.shape[0]:
        sec = df.loc[df.index[start_idx]:df.index[start_idx+window+future]]
        sec.reset_index(inplace=True)

        seq += 1

        last_price = \
            sec.loc[sec.index[0]:sec.index[window]].tail(1)['Close'].values[0]

        future_price = sec.loc[sec.index[window+future]]['Close'] 

        fig, ax = plt.subplots(1, 1, figsize=(6, 4))

        x = np.arange(len(sec.index))
        ohlc = sec[['Open', 'High', 'Low', 'Close']].astype(int).values
        dohlc = np.hstack((np.reshape(x, (-1, 1)), ohlc))
        # 봉차트
        candlestick_ohlc(ax, dohlc, width=0.5, colorup='r', colordown='b')
        # 이동 평균
        sec.MA6.plot(ax=ax)
        sec.MA20.plot(ax=ax)

        plt.xticks([])
        plt.yticks([])

        if future_price > last_price * up_ratio:
            plt.savefig(f'{path}\\ups\\{ticker}{seq}.png')
            if print_y:
                print(f'{path}\\ups\\{ticker}{seq}.png....last',
                  f'last price={last_price}, future price={future_price}')
        elif future_price < last_price * down_ratio:
            plt.savefig(f'{path}\\downs\\{ticker}{seq}.png')
            if print_y:
                print(f'{path}\\downs\\{ticker}{seq}.png....',
                  f'last price={last_price}, future price={future_price}')
        else:
            plt.savefig(f'{path}\\flats\\{ticker}{seq}.png')
            if print_y:
                print(f'{path}\\flats\\{ticker}{seq}.png....',
                  f'last price={last_price}, future price={future_price}')

        plt.close()   

        start_idx += window

    print(f"processing {ticker} completed...seq#: {seq}", f"elapse time: {(time.time() - s)/60:.2f} mins"))

In [68]:
LAST_PROCESSED_CODE = '009290'  # Restart from here

In [None]:
up_ratio = 1.03    # 3
down_ratio = 0.97  # 3% 하락

s = time.time()

for tickers, folder in zip([train, val, test], ["train", "val", "test"]):
    path = tmp+"candles\\" + folder
    
    for ticker in tickers:
        if ticker > LAST_PROCESSED_CODE:
            image_creation(ticker, up_ratio, down_ratio, window=180, future=7, print_y=False)
        
print(f"Total elapse time: {(time.time() - s)/60:.2f} mins")

processing 000020 completed...seq#: 33
processing 000030 completed...seq#: 5
processing 000040 completed...seq#: 33
processing 000050 completed...seq#: 33
processing 000060 completed...seq#: 33
processing 000070 completed...seq#: 33
processing 000075 completed...seq#: 33
processing 000080 completed...seq#: 16
processing 000087 completed...seq#: 13
processing 000100 completed...seq#: 33
processing 000105 completed...seq#: 33
processing 000120 completed...seq#: 33
processing 000140 completed...seq#: 33
processing 000145 completed...seq#: 33
processing 000150 completed...seq#: 33
processing 000155 completed...seq#: 33
processing 000157 completed...seq#: 30
processing 000180 completed...seq#: 33
processing 000210 completed...seq#: 33
processing 000215 completed...seq#: 33
processing 000220 completed...seq#: 33
processing 000225 completed...seq#: 33
processing 000227 completed...seq#: 29
processing 000230 completed...seq#: 33
processing 000240 completed...seq#: 33
processing 000270 complete

processing 003610 completed...seq#: 33
processing 003620 completed...seq#: 33
processing 003650 completed...seq#: 33
processing 003680 completed...seq#: 33
processing 003690 completed...seq#: 33
processing 003720 completed...seq#: 33
processing 003780 completed...seq#: 33
processing 003830 completed...seq#: 33
processing 003850 completed...seq#: 33
processing 003920 completed...seq#: 33
processing 003925 completed...seq#: 33
processing 003960 completed...seq#: 33
processing 004000 completed...seq#: 33
processing 004020 completed...seq#: 33
processing 004060 completed...seq#: 33
processing 004080 completed...seq#: 33
processing 004090 completed...seq#: 33
processing 004100 completed...seq#: 33
processing 004105 completed...seq#: 33
processing 004130 completed...seq#: 33
processing 004135 completed...seq#: 33
processing 004140 completed...seq#: 33
processing 004150 completed...seq#: 33
processing 004170 completed...seq#: 33
processing 004200 completed...seq#: 33
processing 004250 complet

In [None]:
for path in [tmp+"candles\\train\\", tmp+"candles\\val\\", tmp+"candles\\test\\"]:
    for subpath in ['ups', 'downs', 'flats']:
        print(path+subpath, len(os.listdir(path + subpath)))