In [1]:
from config import CONFIG

## Scikit learn for mapping metrics
from sklearn.metrics import mean_squared_error
from sklearn import preprocessing, svm, model_selection
from sklearn.model_selection import cross_validate, train_test_split, cross_val_predict
from sklearn.linear_model import LinearRegression
from itertools import cycle, islice
import pickle
import time
import scipy
import peakutils
import math
import pandas as pd
import sys
import plotly.offline as py
import plotly.graph_objs as go
from plotly.tools import FigureFactory as FF
import numpy as np
import matplotlib as plt
import datetime as dt
%matplotlib inline
%matplotlib notebook
py.init_notebook_mode(connected=True)

# Import & Merge Data

In [4]:
path = CONFIG['PATH']
pair = CONFIG['BASE'] + "/" + CONFIG['QUOTE']
one_day = 86400
sequence_length = 1
indicators = pd.read_csv(path + "/data/indicators.csv")
indicators = indicators[['date','momentum','movingAverage', 'RSI']]
candlesticks = pd.read_csv(path + "/data/candlesticks.csv")
candlesticks = candlesticks[['date','open','high','low','close','volume','weightedAverage',]]
merged = indicators.merge(candlesticks, on='date', left_index=False, right_index=False, how="inner")

graph_data = merged[['date','movingAverage', 'weightedAverage', 'volume', 'close', 'RSI']]
# Condense and add features to the data frame
merged['HL_PCT'] = (merged['high'] - merged['close']) / merged['close'] * 100
merged['PCT_change'] = (merged['close'] - merged['open']) / merged['open'] * 100

merged_csv = merged[['date','close','HL_PCT', 'PCT_change', 'volume', 'movingAverage','weightedAverage', 'momentum', 'RSI']]
merged = merged[['close','HL_PCT', 'PCT_change', 'volume', 'momentum', 'RSI']]

forecast_col = 'close'
merged.fillna(-9999, inplace=True)

# How many days we are forecasting out
forecast_out = int(math.ceil(0.04*len(merged)))

merged['label'] = merged[forecast_col].shift(-forecast_out)

merged_csv = merged_csv.to_csv(path + '/data/merged_data.csv', index=False)

In [5]:
print(merged.head())

      close        HL_PCT  PCT_change       volume  momentum   RSI     label
0  0.003125  10460.033792  -99.053033  1205.803321       0.0  50.0  0.004095
1  0.002581     58.872240  -13.976713   898.123434       0.0  50.0  0.003933
2  0.002645      9.724837   -0.187927   718.365266       0.0  50.0  0.003788
3  0.003950     11.389867   49.083066  3007.274111       0.0  50.0  0.003668
4  0.004500      8.493333   13.790383  4690.075032       0.0  50.0  0.003790


In [6]:
# How many days we are forecasting out
print(forecast_out, "day(s)")

38 day(s)


In [7]:
merged_data = pd.read_csv(path + '/data/merged_data.csv', index_col='date', parse_dates=True)
merged_data_values = merged_data.values

# Graph Indicators

In [8]:
layout = go.Layout(
    title=pair + ' Time Series',
    xaxis=dict(
        title='Time (epoch)',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='Price',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    )
)

price_trace = go.Scatter(x=graph_data['date'], y=graph_data['weightedAverage'], name= 'Price')
movAVG_trace = go.Scatter(x=graph_data['date'], y=graph_data['movingAverage'], name= 'Moving Average')

plot_data = [price_trace, movAVG_trace]
fig = go.Figure(data=plot_data, layout=layout)
py.iplot(fig, filename=indicators)

# Build Model

In [9]:
# Features
X = np.array(merged.drop(['label'],1))
X = preprocessing.scale(X)
X_lately = X[-forecast_out:]
X = X[:-forecast_out]
merged.dropna(inplace=True)
# Labels
y = np.array(merged['label'])

print(len(X), len(y))

892 892


# Train & Test

In [10]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.4)

clf = LinearRegression(n_jobs=-1, normalize=True)
clf.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=-1, normalize=True)

# Pickle & Scale

In [11]:
# with open (path + '/pickles/linear_regression/15m_linearregression.pickle', 'wb') as f:
#     pickle.dump(clf, f)

# pickle_in = open(path + '/pickles/linear_regression/15m_linearregression.pickle', 'rb')
# clf = pickle.load(pickle_in)

In [13]:
accuracy = clf.score(X_test, y_test)
predicted = clf.predict(X_lately)
size = len(predicted)

# The mean score and the 95% confidence interval of the accuracy estimate
print("Accuracy: %0.2f (+/- %0.2f)" % (accuracy.mean(), accuracy.std() * 2))
print(forecast_out, 'day sample size')

Accuracy: 0.57 (+/- 0.00)
38 day sample size


# Detect Peaks
Create a new dataframe from peaks

In [14]:
indices = peakutils.indexes(predicted)
predict_data = graph_data.tail(size)
predict_data = predict_data[['date', 'close']]
predict_data['prediction'] = predicted

min_max = pd.DataFrame()
for i in predicted[indices]:
    min_max_rows = predict_data[predict_data.prediction == i]
    min_max = min_max.append(min_max_rows)
print(min_max)

           date     close  prediction
898  1516579200  0.092362    0.088332
908  1517443200  0.113210    0.108907
913  1517875200  0.102175    0.092753
916  1518134400  0.101173    0.091269
918  1518307200  0.100275    0.091037
920  1518480000  0.098621    0.089198


# Buy, Sell, & Hold indicators

In [15]:
# Calculate Percent change between peaks and find the best peak to buy and sell
min_max_cycle = cycle(min_max['prediction'])
min_max_size = len(min_max)
next_peak = next(min_max_cycle)
profit = 0
pct_gain = 0
hold = False
buy_sell = pd.DataFrame()
count = 1
for peak in min_max['prediction']:
    
    next_peak = next(min_max_cycle)
    dif = next_peak - peak
    pct_dif = (next_peak - peak) / peak * 100
    
    if(count != min_max_size):
        if(pct_dif > 4):
            print("BUY")
            print('peak', peak)
            print('next peak',next_peak)
            print('dif', pct_dif)
            profit += dif
            pct_gain += pct_dif
            hold = False
            print()
        elif (pct_dif > 0 and pct_dif < 4):
            hold = True
            print("HOLD")
            print('peak', peak)
            print('next peak',next_peak)
            print('dif', pct_dif)
            print()
        elif(pct_dif < -3 and not hold):
            print("SELL")
            print('peak', peak)
            print('next peak',next_peak)
            print('dif', pct_dif)
            print()
        elif(pct_dif < 0 and pct_dif > -3):
            hold = True
            print("HOLD")
            print('peak', peak)
            print('next peak',next_peak)
            print('dif', pct_dif)
            print()

    count += 1
        
print("profit", profit)
print("pct gain %" , pct_gain)

BUY
peak 0.0883316089648
next peak 0.108906595686
dif 23.292892502

SELL
peak 0.108906595686
next peak 0.0927532052089
dif -14.8323344197

HOLD
peak 0.0927532052089
next peak 0.0912687358843
dif -1.6004507028

HOLD
peak 0.0912687358843
next peak 0.0910365942358
dif -0.254349582265

HOLD
peak 0.0910365942358
next peak 0.089197574117
dif -2.02008888212

profit 0.0205749867214
pct gain % 23.292892502


# Graph Prediction - RSI & Linear Regression

In [16]:
# RSI
layout = go.Layout(
    title=pair+' RSI',
    xaxis=dict(
        title='Time (epoch)',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='Scale',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    )
)
x1=graph_data['date']
RSI_trace = go.Scatter(x=x1, y=graph_data['RSI'], name= 'RSI')

plot_data = [RSI_trace]
fig = go.Figure(data=plot_data, layout=layout)
py.iplot(fig, filename=pair+'_rsi')


# Prediction

layout = go.Layout(
    title=pair+' Prediction',
    xaxis=dict(
        title='Time (epoch)',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='Price',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    )
)
pred_date = predict_data['date']

mvAVG_trace = go.Scatter(x=x1, y=graph_data['movingAverage'], name= 'Moving Average')
wAVG_trace = go.Scatter(x=x1, y=graph_data['close'], name= 'Price')
prediction_trace = go.Scatter(x=pred_date, y=predicted, name= 'Prediction')
peaks_trace = go.Scatter(
    x=min_max['date'],
    y=min_max['prediction'],
    mode='markers',
    marker=dict(
        size=8,
        color='rgb(255,0,0)',
        symbol='cross'
    ),
    name='Detected Peaks'
)
plot_data = [prediction_trace, peaks_trace, wAVG_trace]
fig = go.Figure(data=plot_data, layout=layout)
py.iplot(fig, filename=pair+'_prediction')