In [13]:
import pandas as pd
import numpy as np

In [14]:
def load_chart_data(fpath):
    chart_data = pd.read_csv(fpath)
    chart_data.pop("Unnamed: 0")
    chart_data.columns = ['date', 'open', 'high', 'low', 'close', 'volume']
    return chart_data

def preprocess(chart_data):
    prep_data = chart_data
    windows = [5, 10, 20, 60, 120]
    for window in windows:
        prep_data['close_ma{}'.format(window)] = prep_data['close'].rolling(window).mean()
        prep_data['volume_ma{}'.format(window)] = (
            prep_data['volume'].rolling(window).mean())
    return prep_data

def build_training_data(prep_data):
    training_data = prep_data
    
    # 전일 종가 대비 당일 시가
    training_data['open_lastclose_ratio'] = np.zeros(len(training_data))
    training_data.loc[1:, 'open_lastclose_ratio'] = \
        (training_data['open'][1:].values - training_data['close'][:-1].values) / \
        training_data['close'][:-1].values
    # 당일 종가 대비 당일 고가
    training_data['high_close_ratio'] = \
        (training_data['high'].values - training_data['close'].values) / \
        training_data['close'].values
    # 당일 종가 대비 당일 저가 비율
    training_data['low_close_ratio'] = \
        (training_data['low'].values - training_data['close'].values) / \
        training_data['close'].values
    # 당일 종가 대비 전일 종가 비율
    training_data['close_lastclose_ratio'] = np.zeros(len(training_data))
    training_data.loc[1:, 'close_lastclose_ratio'] = \
        (training_data['close'][1:].values - training_data['close'][:-1].values) / \
        training_data['close'][:-1].values
    # 전일 거래량 대비 당일 거래량
    training_data['volume_lastvolume_ratio'] = np.zeros(len(training_data))
    training_data.loc[1:, 'volume_lastvolume_ratio'] = \
        (training_data['volume'][1:].values - training_data['volume'][:-1].values) / \
        training_data['volume'][:-1]\
            .replace(to_replace=0, method='ffill') \
            .replace(to_replace=0, method='bfill').values

    # window 평균 종가 대비 당일 종가
    # window 평균 거래량 대비 당일 거래량
    windows = [5, 10, 20, 60, 120]
    for window in windows:
        training_data['close_ma%d_ratio' % window] = \
            (training_data['close'] - training_data['close_ma%d' % window]) / \
            training_data['close_ma%d' % window]
        training_data['volume_ma%d_ratio' % window] = \
            (training_data['volume'] - training_data['volume_ma%d' % window]) / \
            training_data['volume_ma%d' % window]

    return training_data

In [26]:
coin_chart = load_chart_data('data/chart_data/Bitcoin_hour_candle2.csv')
coin_chart.tail()

Unnamed: 0,date,open,high,low,close,volume
41275,2018101403,7199500.0,7200000.0,7180500.0,7180500.0,4.571257
41276,2018101404,7180000.0,7200000.0,7172500.0,7192500.0,3.81675
41277,2018101405,7195000.0,7195000.0,7190000.0,7190000.0,1.212798
41278,2018101406,7186000.0,7186000.0,7186000.0,7186000.0,0.049723
41279,2018101407,7187000.0,7187000.0,7187000.0,7187000.0,0.1709


In [27]:
import time
import datetime

In [28]:
coin_chart.date = [datetime.datetime.strptime(str(i),"%Y%m%d%H") for i in coin_chart.date]

In [29]:
k = pd.date_range('2013-09-03 15','2018-10-14 07',freq='H')

In [30]:
data = pd.DataFrame()
data["date"] = k

In [33]:
data.tail()

Unnamed: 0,date
44796,2018-10-14 03:00:00
44797,2018-10-14 04:00:00
44798,2018-10-14 05:00:00
44799,2018-10-14 06:00:00
44800,2018-10-14 07:00:00


In [34]:
df = coin_chart
df.head()

Unnamed: 0,date,open,high,low,close,volume
0,2013-09-03 15:00:00,160000.0,160000.0,160000.0,160000.0,0.1
1,2013-09-04 01:00:00,150000.0,150000.0,150000.0,150000.0,0.2
2,2013-09-04 09:00:00,180000.0,180000.0,180000.0,180000.0,2.94
3,2013-09-04 14:00:00,180000.0,180000.0,180000.0,180000.0,0.5
4,2013-09-05 10:00:00,150000.0,150000.0,150000.0,150000.0,0.1


In [49]:
final = data.merge(df, how='outer')

In [51]:
final.volume = final.volume.fillna(0)
final = final.fillna(method='ffill')

In [54]:
final = final[9:]

In [55]:
final.index = [i for i in range(len(final))]

In [59]:
final.to_csv("final.csv")

In [102]:
def preprocess(chart_data):
    prep_data = chart_data
    windows = [24*5, 24*10, 24*20, 24*60, 24*120]
    for window in windows:
        prep_data['close_ma{}'.format(window)] = prep_data['close'].rolling(window).mean()
        prep_data['volume_ma{}'.format(window)] = (
            prep_data['volume'].rolling(window).mean())
    return prep_data

In [45]:
training_data = training_data[(training_data['date'] <= '2017123100')]
training_data = training_data.dropna()

In [48]:
features_chart_data = ['date', 'open', 'high', 'low', 'close', 'volume']
coin_chart = training_data[features_chart_data]

In [49]:
features_training_data = [
    'open_lastclose_ratio', 'high_close_ratio', 'low_close_ratio',
    'close_lastclose_ratio', 'volume_lastvolume_ratio',
    'close_ma5_ratio', 'volume_ma5_ratio',
    'close_ma10_ratio', 'volume_ma10_ratio',
    'close_ma20_ratio', 'volume_ma20_ratio',
    'close_ma60_ratio', 'volume_ma60_ratio',
    'close_ma120_ratio', 'volume_ma120_ratio'
]
training_data = training_data[features_training_data]

In [50]:
training_data.head()

Unnamed: 0,open_lastclose_ratio,high_close_ratio,low_close_ratio,close_lastclose_ratio,volume_lastvolume_ratio,close_ma5_ratio,volume_ma5_ratio,close_ma10_ratio,volume_ma10_ratio,close_ma20_ratio,volume_ma20_ratio,close_ma60_ratio,volume_ma60_ratio,close_ma120_ratio,volume_ma120_ratio
119,0.0,0.0,0.0,0.0,-0.5,-0.031818,-0.995777,-0.031818,-0.996179,-0.035457,-0.996947,0.112554,-0.99636,0.266446,-0.994297
120,0.056338,0.0,0.0,0.056338,910.0,0.020871,1.511025,0.013514,1.619272,0.022356,1.74229,0.1665,2.151626,0.333501,3.98228
121,0.0,0.0,0.0,0.0,-0.336992,0.021798,0.496531,0.006711,0.576174,0.027022,0.805658,0.157896,1.024872,0.32858,2.217649
122,0.0,0.0,0.0,0.0,-0.621742,0.021798,-0.345915,0.004464,-0.434369,0.023541,-0.23019,0.149417,-0.242765,0.325644,0.220652
123,0.031111,0.0,-0.004292,0.035556,-0.757077,0.039251,-0.845831,0.034177,-0.855605,0.064048,-0.78535,0.180835,-0.809212,0.369215,-0.703548
