In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [105]:
import pickle
with open('raw_data_20190115.pickle', 'rb') as f:
    raw_data = pickle.load(f)

In [106]:
df = raw_data['삼성전자']

In [107]:
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-01-04,6000.0,6110.0,5660.0,6110.0,0.091443,74195000
2000-01-05,5800.0,6060.0,5520.0,5580.0,0.083511,74680000
2000-01-06,5750.0,5780.0,5580.0,5620.0,0.08411,54390000
2000-01-07,5560.0,5670.0,5360.0,5540.0,0.082912,40305000
2000-01-10,5600.0,5770.0,5580.0,5770.0,0.086355,46880000


In [108]:
class StockData:
    
    def __init__(self, df=None):
        self.df = df
        vacant_df = pd.DataFrame(index=self.df.index)
        self.X_df = vacant_df.copy()
        self.y_df = vacant_df.copy()
        self.y_df_dict = {}
        self.X_train = None
        self.y_train = None
        self.X_test = None
        self.y_test = None
        self.split_ratio = None
        self.split_date = None
        
        
    def set_df(self, df=None):
        self.df = df
    
    def reset(self):
        vacant_df = pd.DataFrame(index=self.df.index)
        self.X_df = vacant_df
        self.y_df = vacant_df
        self.y_df_dict = {}
        self.X_train = None
        self.y_train = None
        self.X_test = None
        self.y_test = None
        self.split_ratio = None
        self.split_date = None
        
    def get_price_ratio(self, OHLC=None):
        if OHLC == None:
            OPEN, HIGH, LOW, CLOSE, VOLUME = 'Open', 'High', 'Low', 'Close', 'Volume'
        else:
            OPEN, HIGH, LOW, CLOSE, VOLUME = OHLCV
        
        self.X_df['open_lastclose_ratio'] = self.df[OPEN] / self.df[CLOSE].shift(-1) - 1
        self.X_df['high_close_ratio'] = self.df[HIGH] / self.df[CLOSE] - 1
        self.X_df['low_close_ratio'] = self.df[LOW] / self.df[CLOSE] - 1
        self.X_df['close_lastclose_ratio'] = self.df[CLOSE] / self.df[CLOSE].shift(-1) - 1
        self.X_df['volume_lastvolume_ratio'] = self.df[VOLUME] / self.df[VOLUME].shift(-1) - 1
    
    def get_ma_ratio(self, window, x, exp=False):
        if x.upper() not in ['CLOSE', 'VOLUME']:
            print('Must use CLOSE or VOLUME')
            return None
        e = 'e' if exp else ''
        if type(window) == tuple:
            windows = range(*window)
        elif type(window) == list:
            windows = window
        else:
            print("window's type is must to be LIST or TUPLE")
        for i in windows:
            if e:
                self.X_df['{}_{}ma{}_ratio'.format(x, e, i)] = \
                    self.df[x] / self.df[x].ewm(span=i).mean()
            else:
                self.X_df['{}_{}ma{}_ratio'.format(x, e, i)] = \
                    self.df[x] / self.df[x].rolling(i).mean()        
                
    def make_target(self, use_fn='f', period=1, window=1, 
                    method='regress', thresh=.02):
        f = lambda x, i : np.log(x.shift(-1 * i) / x)
        g = lambda x, i, j : np.log(x.shift(-1 * i) / x.rolling(j).mean())
#         y = 'sh{:02}_Y'.format(i)
        self.y_df['target'] = f(self.df['Close'], period) if use_fn == 'f' \
                            else g(self.df['Close'], period, window)
        if method != 'regress':
            self.y_df['target'] = \
                np.where(self.y_df['target'] > thresh, 1,
                         np.where(self.y_df['target'] < -1 * thresh, -1, 0))
    
    def set_index(self, start, end):
        self.df = self.df.loc[start:end]
        self.X_df = self.X_df.loc[start:end]
        self.y_df = self.y_df.loc[start:end]
    
    def set_target(self):
        d


In [85]:
import seaborn as sns
color='#899ab8'
color='#bbc2e0'
color='#2e3642' # 각 cell의 color
color='#384152' # 실행창의 색깔
sns.set(rc={'axes.facecolor':color, 'figure.facecolor':color})
# sns.set_style('darkgrid')
# plt.style.use(['tableau-colorblind10'])

In [109]:
test = StockData(raw_data['삼성전자'])
test.get_price_ratio()
test.get_ma_ratio(window=(1, 90, 3), x='Close')
test.get_ma_ratio(window=(1, 90, 3), x='Volume')
test.make_target(use_fn='f', period=1, window=1, method='classification')

In [111]:
test.set_index(start='2007-01-01', end='2019-01-01')

In [112]:
test.y_df.head(), test.y_df.tail()

(            target
 Date              
 2007-01-02       0
 2007-01-03       0
 2007-01-04       0
 2007-01-05       0
 2007-01-08       0,             target
 Date              
 2018-09-20       0
 2018-09-21       0
 2018-09-27      -1
 2018-09-28       0
 2018-10-01       0)

In [118]:
test.y_df['target'].value_counts()

 0    2171
 1     376
-1     367
Name: target, dtype: int64

In [119]:
'X_df의 instance는 {}개이고 feature는 {}입니다.'.format(*test.X_df.shape)

'X_df의 instance는 2914개이고 feature는 65입니다.'

In [None]:
class OneStockClient:
    def __init__(self, StockData):
        self.StockData = StockData
        

## Logging Technique

In [121]:
import logging
import os
# import settings

In [125]:
# settings.py

import time
import datetime
import locale
import logging
import os
import platform

import pandas as pd

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

# Strategy
# DEBUG = True
DEBUG = False
DEMO = True
# DEMO = False
TRANSACTION_TERM = 2  # 2 seconds
PROCESSING_TERM = 2  # 2 seconds
MARKET_WAIT_TERM = 10  # 10 seconds
MAX_TARGET_STOCK_PRICE = 500000
MAX_BUY_PRICE_AGG = 1000000
MAX_BUY_PRICE_DEF = 500000
BUY_UNIT_AGG = 500000
BUY_UNIT_DEF = 100000
TGT_TOP_DIFF = 10
TGT_BOTTOM_DIFF = -3
MIN_PRICE_VOLUME = 10000 * 10000
# Number of Holdings
MAX_NUM_HOLDINGS_AGG = 12
MAX_NUM_HOLDINGS_DEF = 5
# MAX_NUM_HOLDINGS_DEF = 0
# Monitoring Stocks
MAX_STOCKS_MONITOR_ITR = 5 # Each of KOSDAQ and KOSPI
FIVEMIN_INCDEC_RATE = 0.025


# # Settings for Server/
# SERVER_ADDR = "localhost"
# SERVER_PORT = 8000
# SERVER_URL = "http://%s:%s" % (SERVER_ADDR, SERVER_PORT)
# SERVER_API_URL = "http://%s:%s/api" % (SERVER_ADDR, SERVER_PORT)
# SERVER_WS_URL = "ws://%s:%s/ws" % (SERVER_ADDR, SERVER_PORT)


# # Settings for Project
# BASE_DIR = os.path.dirname(os.path.abspath(__file__))


# # Settings for Templates
# TEMPLATES_DIR = os.path.join(BASE_DIR, "templates")


# # Settings for Static
# STATIC_DIR = os.path.join(BASE_DIR, "static")
# STATIC_URL = "/static/"


# # Settings for Data
# DATA_DIR = os.path.join(BASE_DIR, "database")


# Date Time Format
timestr = None
FORMAT_DATE = "%Y%m%d"
FORMAT_DATETIME = "%Y%m%d%H%M%S"


# # 로케일 설정
# if 'Darwin' in platform.system():
#     locale.setlocale(locale.LC_ALL, 'ko_KR.UTF-8')
# elif 'Windows' in platform.system():
#     locale.setlocale(locale.LC_ALL, '')


# Settings on Logging
def get_today_str():
    today = datetime.datetime.combine(datetime.date.today(), datetime.datetime.min.time())
    today_str = today.strftime('%Y%m%d')
    return today_str


def get_time_str():
    global timestr
    timestr = datetime.datetime.fromtimestamp(
        int(time.time())).strftime(FORMAT_DATETIME)
    return timestr


In [130]:
get_time_str()

'20190527165329'

In [None]:
import pandas as pd
import numpy as np


def load_chart_data(fpath):
    chart_data = pd.read_csv(fpath, thousands=',', header=None)
    chart_data.columns = ['date', 'open', 'high', 'low', 'close', 'volume']
    return chart_data


def preprocess(chart_data):
    prep_data = chart_data
    windows = [5, 10, 20, 60, 120]
    for window in windows:
        prep_data['close_ma{}'.format(window)] = prep_data['close'].rolling(window).mean()
        prep_data['volume_ma{}'.format(window)] = (
            prep_data['volume'].rolling(window).mean())
    return prep_data


def build_training_data(prep_data):
    training_data = prep_data

    training_data['open_lastclose_ratio'] = np.zeros(len(training_data))
    training_data.loc[1:, 'open_lastclose_ratio'] = \
        (training_data['open'][1:].values - training_data['close'][:-1].values) / \
        training_data['close'][:-1].values
    training_data['high_close_ratio'] = \
        (training_data['high'].values - training_data['close'].values) / \
        training_data['close'].values
    training_data['low_close_ratio'] = \
        (training_data['low'].values - training_data['close'].values) / \
        training_data['close'].values
    training_data['close_lastclose_ratio'] = np.zeros(len(training_data))
    training_data.loc[1:, 'close_lastclose_ratio'] = \
        (training_data['close'][1:].values - training_data['close'][:-1].values) / \
        training_data['close'][:-1].values
    training_data['volume_lastvolume_ratio'] = np.zeros(len(training_data))
    training_data.loc[1:, 'volume_lastvolume_ratio'] = \
        (training_data['volume'][1:].values - training_data['volume'][:-1].values) / \
        training_data['volume'][:-1]\
            .replace(to_replace=0, method='ffill') \
            .replace(to_replace=0, method='bfill').values

    windows = [5, 10, 20, 60, 120]
    for window in windows:
        training_data['close_ma%d_ratio' % window] = \
            (training_data['close'] - training_data['close_ma%d' % window]) / \
            training_data['close_ma%d' % window]
        training_data['volume_ma%d_ratio' % window] = \
            (training_data['volume'] - training_data['volume_ma%d' % window]) / \
            training_data['volume_ma%d' % window]

    return training_data


In [None]:
# main

# 주식 데이터 준비
chart_data = data_manager.load_chart_data(
    os.path.join(settings.BASE_DIR,
                 'data/chart_data/{}.csv'.format(stock_code)))
prep_data = data_manager.preprocess(chart_data)
training_data = data_manager.build_training_data(prep_data)

# 기간 필터링
training_data = training_data[(training_data['date'] >= '2017-01-01') &
                              (training_data['date'] <= '2017-12-31')]
training_data = training_data.dropna()

# 차트 데이터 분리
features_chart_data = ['date', 'open', 'high', 'low', 'close', 'volume']
chart_data = training_data[features_chart_data]

# 학습 데이터 분리
features_training_data = [
    'open_lastclose_ratio', 'high_close_ratio', 'low_close_ratio',
    'close_lastclose_ratio', 'volume_lastvolume_ratio',
    'close_ma5_ratio', 'volume_ma5_ratio',
    'close_ma10_ratio', 'volume_ma10_ratio',
    'close_ma20_ratio', 'volume_ma20_ratio',
    'close_ma60_ratio', 'volume_ma60_ratio',
    'close_ma120_ratio', 'volume_ma120_ratio'
]
training_data = training_data[features_training_data]

# 강화학습 시작
policy_learner = PolicyLearner(
    stock_code=stock_code, chart_data=chart_data, training_data=training_data,
    min_trading_unit=1, max_trading_unit=2, delayed_reward_threshold=.2, lr=.001)
policy_learner.fit(balance=10000000, num_epoches=1000,
                   discount_factor=0, start_epsilon=.5)

# 정책 신경망을 파일로 저장
model_dir = os.path.join(settings.BASE_DIR, 'models/%s' % stock_code)
if not os.path.isdir(model_dir):
    os.makedirs(model_dir)
model_path = os.path.join(model_dir, 'model_%s.h5' % timestr)
policy_learner.policy_network.save_model(model_path)


In [175]:
class StockData:
    
    def __init__(self, df=None):
        self.df = df
        vacant_df = pd.DataFrame(index=self.df.index)
        self.X_df = vacant_df
        self.y_df = vacant_df
        self.y_df_dict = {}
        self.X_train = None
        self.y_train = None
        self.X_test = None
        self.y_test = None
        self.split_ratio = None
        self.split_date = None
        
        
    def set_df(self, df=None):
        self.df = df
    
    def reset(self):
        vacant_df = pd.DataFrame(index=self.df.index)
        self.X_df = vacant_df
        self.y_df = vacant_df
        self.y_df_dict = {}
        self.X_train = None
        self.y_train = None
        self.X_test = None
        self.y_test = None
        self.split_ratio = None
        self.split_date = None
        
    def get_price_ratio(self, OHLC=None):
        if OHLC == None:
            OPEN, HIGH, LOW, CLOSE, VOLUME = 'Open', 'High', 'Low', 'Close', 'Volume'
        else:
            OPEN, HIGH, LOW, CLOSE, VOLUME = OHLCV
        
        self.X_df['open_lastclose_ratio'] = self.df[OPEN] / self.df[CLOSE].shift(-1) - 1
        self.X_df['high_close_ratio'] = self.df[HIGH] / self.df[CLOSE] - 1
        self.X_df['low_close_ratio'] = self.df[LOW] / self.df[CLOSE] - 1
        self.X_df['close_lastclose_ratio'] = self.df[CLOSE] / self.df[CLOSE].shift(-1) - 1
        self.X_df['volume_lastvolume_ratio'] = self.df[VOLUME] / self.df[VOLUME].shift(-1) - 1
    
    def get_ma_ratio(self, window, x, exp=False):
        if x.upper() not in ['CLOSE', 'VOLUME']:
            print('Must use CLOSE or VOLUME')
            return None
        e = 'e' if exp else ''
        if type(window) == tuple:
            windows = range(*window)
        elif type(window) == list:
            windows = window
        else:
            print("window's type is must to be LIST or TUPLE")
        for i in windows:
            if e:
                self.X_df['{}_{}ma{}_ratio'.format(x, e, i)] = \
                    self.df[x] / self.df[x].ewm(span=i).mean()
            else:
                self.X_df['{}_{}ma{}_ratio'.format(x, e, i)] = \
                    self.df[x] / self.df[x].rolling(i).mean()
        
    def get_cv(self, window):
        for i in range(*window):
            self.X_df['his_vol_{:02}'.format(i)] = \
                np.log(
                    df['Close'] / df['Close'].shift(1)
                ).rolling(i).std()*(252**0.5)
        
    def get_rsi(self, window=(14,15)):
        for n in range(*window):
            U = np.where(self.df['Close'].diff(1)>0, 
                         self.df['Close'].diff(1), 0)
            D = np.where(self.df['Close'].diff(1)<0, 
                         self.df['Close'].diff(1)*(-1), 0)

            AU = pd.Series(U).rolling(window=n, min_periods=n).mean()
            AD = pd.Series(D).rolling(window=n, min_periods=n).mean()

            RSI = AU.div(AD+AU) * 100
            self.X_df['RSI_{:02}'.format(n)] = RSI.values
        
#     def get_stochastic(self, window=(14,15)):
#         sto_K = 100 * ((df.Close - df.Low.rolling(n).min()) / 
#                   (df.High.rolling(n).max() - df.Low.rolling(n).min()))
#         sto_D = df['sto_K_{}'.format(n)].rolling(3).mean()
    
#     def get_williamr(self, window):
#         d
                
    def make_target_dict(self, period, window=None):
        f = lambda x, i : np.log(x.shift(-1 * i) / x)
        g = lambda x, i, j : np.log(x.shift(-1 * i) / x.rolling(j).mean())
        for i in range(*period): # Predict period
            y = 'sh{:02}_Y'.format(i)
            self.y_df_dict[y] = pd.DataFrame(index=self.df.index)
            self.y_df_dict[y][y] = f(self.df['Close'], i)
            for j in range(*window): # Time window
                self.y_df_dict[y][y+'_{:02}'.format(j)] = g(self.df['Close'], i, j)

    
#     def set_same_index(self):
        
    
#     def train_test_split(self, split_ratio=.8):
#         self.split_ratio = split_ratio
#         self.X_train = self.X_df.iloc[:int(len(self.X_df) * self.split_ratio)]
#         self.y_train = self.y_df.iloc[:int(len(self.y_df) * self.split_ratio)]
#         self.X_test = self.X_df.iloc[int(len(self.X_df) * self.split_ratio):]
#         self.y_test = self.y_df.iloc[int(len(self.y_df) * self.split_ratio):]
        
    def train_test_split_with_date(self, split_date=None):
        self.split_date = split_date
        self.X_train = self.X_df.loc[:split_date]
        self.y_train = self.y_df.loc[:split_date]
        self.X_test = self.X_df.loc[split_date:]
        self.y_test = self.y_df.loc[split_date:]

#     def correl_analysis(self):
        

In [176]:
data = StockData(raw_data['SK하이닉스'])
data.get_price_ratio()
data.get_ma_ratio(window=[5,10,20,60,120], x='Close')
data.get_ma_ratio(window=[5,10,20,60,120], x='Volume')
# data.get_cv(window=(4, 90, 3))
# data.get_rsi(window=(4, 90, 3))

data.make_target_dict(period=(1, 90, 3), window=(1, 90, 3))

In [178]:
data.y_df_dict['sh01_Y']

Unnamed: 0_level_0,sh01_Y,sh01_Y_01,sh01_Y_04,sh01_Y_07,sh01_Y_10,sh01_Y_13,sh01_Y_16,sh01_Y_19,sh01_Y_22,sh01_Y_25,...,sh01_Y_61,sh01_Y_64,sh01_Y_67,sh01_Y_70,sh01_Y_73,sh01_Y_76,sh01_Y_79,sh01_Y_82,sh01_Y_85,sh01_Y_88
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000-01-04,-0.108600,-0.108600,,,,,,,,,...,,,,,,,,,,
2000-01-05,-0.053346,-0.053346,,,,,,,,,...,,,,,,,,,,
2000-01-06,-0.009174,-0.009174,,,,,,,,,...,,,,,,,,,,
2000-01-07,-0.009259,-0.009259,-0.072320,,,,,,,,...,,,,,,,,,,
2000-01-10,0.036534,0.036534,0.011274,,,,,,,,...,,,,,,,,,,
2000-01-11,0.013363,0.013363,0.033749,,,,,,,,...,,,,,,,,,,
2000-01-12,0.039051,0.039051,0.064822,0.034951,,,,,,,...,,,,,,,,,,
2000-01-13,-0.030240,-0.030240,0.014357,0.018976,,,,,,,...,,,,,,,,,,
2000-01-14,0.088105,0.088105,0.088105,0.108999,,,,,,,...,,,,,,,,,,
2000-01-17,0.004010,0.004010,0.064006,0.093997,0.083165,,,,,,...,,,,,,,,,,
