In [1]:
import config

## Scikit learn for mapping metrics
from sklearn.metrics import mean_squared_error
from sklearn import preprocessing, svm, model_selection
from sklearn.model_selection import cross_validate, train_test_split, cross_val_predict
from sklearn.linear_model import LinearRegression
import pickle

import math
import pandas as pd
import sys
import plotly.offline as py
import plotly.graph_objs as go
import numpy as np
import matplotlib as plt
import datetime as dt
%matplotlib inline
%matplotlib notebook
py.init_notebook_mode(connected=True)

# Import & Merge Data

In [2]:
path = config.CONSTS['PATH']
pair = config.CONSTS['BASE'] + "/" + config.CONSTS['QUOTE']
sequence_length = 1
indicators = pd.read_csv(path + "/data/indicators.csv")
indicators = indicators[['date','momentum','movingAverage']]
candlesticks = pd.read_csv(path + "/data/candlesticks.csv")
candlesticks = candlesticks[['date','open','high','low','close','volume','weightedAverage',]]
merged = indicators.merge(candlesticks, on='date', left_index=False, right_index=False, how="inner")

graph_data = merged[['date','movingAverage', 'weightedAverage', 'volume', 'close']]

# Condense and add features to the data frame
merged['HL_PCT'] = (merged['high'] - merged['close']) / merged['close'] * 100
merged['PCT_change'] = (merged['close'] - merged['open']) / merged['open'] * 100

merged_csv = merged[['date','close','HL_PCT', 'PCT_change', 'volume', 'movingAverage','weightedAverage', 'momentum']]
merged = merged[['movingAverage','HL_PCT', 'PCT_change', 'volume', 'momentum']]

forecast_col = 'movingAverage'
merged.fillna(-9999, inplace=True)

# How many days we are forecasting out
forecast_out = int(math.ceil(0.03*len(merged)))

merged['label'] = merged[forecast_col].shift(-forecast_out)

merged_csv = merged_csv.to_csv(path + '/data/merged_data.csv', index=False)

In [3]:
print(merged.head())

   movingAverage        HL_PCT  PCT_change       volume  momentum     label
0       0.000000  1.599905e+06  -99.993750  1205.803321   -9999.0  0.005526
1       0.000000  5.887224e+01  -13.976713   898.123434   -9999.0  0.005580
2       0.003695  9.724837e+00   -0.187927   718.365266   -9999.0  0.005642
3       0.003305  1.138987e+01   49.083066  3007.274111   -9999.0  0.005644
4       0.003300  8.493333e+00   13.790383  4690.075032   -9999.0  0.005623


In [4]:
# How many days we are forecasting out
print(forecast_out, "day(s)")

24 day(s)


In [5]:
merged_data = pd.read_csv(path + '/data/merged_data.csv', index_col='date', parse_dates=True)
merged_data_values = merged_data.values

# Graph Indicators

In [6]:
layout = go.Layout(
    title=pair + ' Indicators',
    xaxis=dict(
        title='Time (epoch)',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='Price',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    )
)

price_trace = go.Scatter(x=graph_data['date'], y=graph_data['weightedAverage'], name= 'Price')
movAVG_trace = go.Scatter(x=graph_data['date'], y=graph_data['movingAverage'], name= 'Moving Average')

plot_data = [price_trace, movAVG_trace]
fig = go.Figure(data=plot_data, layout=layout)
py.iplot(fig, filename=indicators)

# Build Model

In [7]:
# Features
X = np.array(merged.drop(['label'],1))
X = preprocessing.scale(X)
X_lately = X[-forecast_out:]
X = X[:-forecast_out]
merged.dropna(inplace=True)
# Labels
y = np.array(merged['label'])

print(len(X), len(y))

761 761


# Train & Test

In [14]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.2)

clf = LinearRegression(n_jobs=-1, normalize=True)
clf.fit(X_train, y_train)

# The mean score and the 95% confidence interval of the accuracy estimate
# print("Accuracy: %0.2f (+/- %0.2f)" % (accuracy.mean(), accuracy.std() * 2))
# print(accuracy)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=-1, normalize=True)

# Pickle & Scale

In [15]:
# with open ('linearregression.pickle', 'wb') as f:
#     pickle.dump(clf, f)

# pickle_in = open('linearregression.pickle', 'rb')
# clf = pickle.load(pickle_in)

In [16]:
accuracy = clf.score(X_test, y_test)
forecast_set = clf.predict(X_lately)
print("Accuracy: %0.2f (+/- %0.2f)" % (accuracy.mean(), accuracy.std() * 2))
print(forecast_out, 'day sample size')

Accuracy: 0.83 (+/- 0.00)
24 day sample size


In [17]:
predicted = forecast_set
size = len(predicted)

# Graph Prediction - Linear Regression

In [18]:
predict_data = graph_data.tail(size)
layout = go.Layout(
    title=pair+' Predict Moving Average',
    xaxis=dict(
        title='Time (epoch)',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='Price',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    )
)

mvAVG_trace = go.Scatter(x=graph_data['date'], y=graph_data['movingAverage'], name= 'Moving Average')
wAVG_trace = go.Scatter(x=graph_data['date'], y=graph_data['close'], name= 'Price')
prediction_trace = go.Scatter(x=predict_data['date'], y=predicted, name= 'Prediction')


plot_data = [prediction_trace, wAVG_trace, mvAVG_trace]
fig = go.Figure(data=plot_data, layout=layout)
py.iplot(fig, filename=pair+'_prediction')