## Setup

In [1]:
import pandas as pd
import numpy as np
import datetime

## Load Data

In [2]:
data = pd.read_csv('./data/USDT_BTC.csv')

In [3]:
data.head()

Unnamed: 0,snap_time,ticker,data_source,high,low,open,close,weighted_avg,base_volume,quote_volume
0,2017-01-01 05:00:00,USDT_BTC,poloniex,962.579267,961.59241,962.579267,961.59241,961.794408,2467.667218,2.565691
1,2017-01-01 05:05:00,USDT_BTC,poloniex,963.0,961.592412,963.0,961.592412,961.730303,163.944328,0.170468
2,2017-01-01 05:10:00,USDT_BTC,poloniex,961.592412,961.592412,961.592412,961.592412,961.592412,,
3,2017-01-01 05:15:00,USDT_BTC,poloniex,964.30361,961.089964,964.30361,964.303602,964.147252,53.214035,0.055193
4,2017-01-01 05:20:00,USDT_BTC,poloniex,964.219127,961.089969,961.089969,964.219127,963.865251,118.64813,0.123096


In [4]:
data.info(null_counts=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 71762 entries, 0 to 71761
Data columns (total 10 columns):
snap_time       71762 non-null object
ticker          71762 non-null object
data_source     71762 non-null object
high            71762 non-null float64
low             71762 non-null float64
open            71762 non-null float64
close           71762 non-null float64
weighted_avg    71762 non-null float64
base_volume     71521 non-null float64
quote_volume    71521 non-null float64
dtypes: float64(7), object(3)
memory usage: 5.5+ MB


In [5]:
['Column %s null pct %s' % (str(col), (data.shape[0]-data[col].count())/df.shape[0]) for col in data.columns]

NameError: name 'df' is not defined

## Code Strategy

In [6]:
data['snap_dt'] = data['snap_time'].apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))

In [73]:
data['MA_1_hr'] = data['close'].shift(1).rolling(window=12).mean()

In [74]:
data[['snap_time', 'close', 'MA_1_hr' ,'buy', 'sell']]

Unnamed: 0,snap_time,close,MA_1_hr,buy,sell
0,2017-01-01 05:00:00,961.592410,,False,False
1,2017-01-01 05:05:00,961.592412,,False,False
2,2017-01-01 05:10:00,961.592412,,False,False
3,2017-01-01 05:15:00,964.303602,,False,False
4,2017-01-01 05:20:00,964.219127,,False,False
5,2017-01-01 05:25:00,963.000000,,False,False
6,2017-01-01 05:30:00,963.000000,,False,False
7,2017-01-01 05:35:00,964.689990,,False,False
8,2017-01-01 05:40:00,963.000000,,False,False
9,2017-01-01 05:45:00,961.637765,,False,False


In [90]:
def make_decision(df):
    buy = data['close'] < data['MA_1_hr']
    sell = data['close'] > data['MA_1_hr']
    
    ## 0 is hold, 1 is buy, 2 is sell
    decision = pd.Series([0] * df.shape[0], index=df.index)
    decision.loc[buy] = 1
    decision.loc[sell] = 2
    
    ## indicator if the previous row has the same decision
    ## i.e. decision = [1, 1, 1, 0, 2, 2, 2], match will be [False, True, True, False, False, True, True]
    match = np.ediff1d(decision, to_begin=np.NaN) == 0
    
    ## if the decision is the same as the above, override to 0 (i.e hold)
    decision.loc[match] = 0
    return decision

In [87]:
data['decision'] = make_decision(data)

In [109]:
def transaction_price(df):
    buy_price = df['high'].shift(-1)
    sell_price = df['low'].shift(-1)

    buy = df['decision'] == 1
    sell = df['decision'] == 2

    transation_price = pd.Series([np.nan] * df.shape[0], index=df.index)
    transation_price.loc[buy] = buy_price
    transation_price.loc[sell] = sell_price
    return transation_price

In [110]:
buy_price = data['high'].shift(-1)
sell_price = data['low'].shift(-1)

buy = data['decision'] == 1
sell = data['decision'] == 2
transation_price = pd.Series([0] * data.shape[0], index=data.index)

In [111]:
data['transaction_price'] = transaction_price(data)

In [118]:
data[['snap_time', 'close', 'high','low', 'MA_1_hr' ,'buy', 'sell', 'decision', 'transaction_price']]
data.shape
# data.groupby('decision')['decision'].count()

(71762, 20)

## Plotting

In [77]:
from bokeh.plotting import figure, show, output_file
from bokeh.io import output_notebook
from math import pi

output_notebook()

In [78]:
inc = data.close > data.open
dec = data.open > data.close

In [55]:
TOOLS = "pan,wheel_zoom,box_zoom,reset,save"

w = 5*60*1000/2 # 2.5 min in ms

p = figure(x_axis_type="datetime", tools=TOOLS, plot_width=1000, title = "USDT BTC Candlestick")
p.xaxis.major_label_orientation = pi/4
p.grid.grid_line_alpha=0.3

p.segment(data.snap_dt, data.high, data.snap_dt, data.low, color="black")
p.vbar(data.snap_dt[inc], w, data.open[inc], data.close[inc], fill_color="#D5E1DD", line_color="black")
p.vbar(data.snap_dt[dec], w, data.open[dec], data.close[dec], fill_color="#F2583E", line_color="black")

# output_file("candlestick.html", title="candlestick.py example")

show(p, notebook_handle=True)  