In [19]:
from config import CONFIG

## Scikit learn for mapping metrics
from sklearn.metrics import mean_squared_error
from sklearn import preprocessing, svm, model_selection
from sklearn.model_selection import cross_validate, train_test_split, cross_val_predict
from sklearn.linear_model import LinearRegression
import pickle

import math
import pandas as pd
import sys
import plotly.offline as py
import plotly.graph_objs as go
import numpy as np
import matplotlib as plt
import datetime as dt
%matplotlib inline
%matplotlib notebook
py.init_notebook_mode(connected=True)

# Import & Merge Data

In [25]:
path = CONFIG['PATH']
pair = CONFIG['BASE'] + "/" + CONFIG['QUOTE']
sequence_length = 1
indicators = pd.read_csv(path + "/data/periods/15m_indicators.csv")
indicators = indicators[['date','momentum','movingAverage']]
candlesticks = pd.read_csv(path + "/data/periods/15m_candlesticks.csv")
candlesticks = candlesticks[['date','open','high','low','close','volume','weightedAverage',]]
merged = indicators.merge(candlesticks, on='date', left_index=False, right_index=False, how="inner")

graph_data = merged[['date','movingAverage', 'weightedAverage', 'volume', 'close']]

# Condense and add features to the data frame
merged['HL_PCT'] = (merged['high'] - merged['close']) / merged['close'] * 100
merged['PCT_change'] = (merged['close'] - merged['open']) / merged['open'] * 100

merged_csv = merged[['date','close','HL_PCT', 'PCT_change', 'volume', 'movingAverage','weightedAverage', 'momentum']]
merged = merged[['close','HL_PCT', 'PCT_change', 'volume', 'momentum']]

forecast_col = 'close'
merged.fillna(-9999, inplace=True)

# How many days we are forecasting out
forecast_out = int(math.ceil(0.001*len(merged)))

merged['label'] = merged[forecast_col].shift(-forecast_out)

merged_csv = merged_csv.to_csv(path + '/data/merged_data.csv', index=False)

In [26]:
print(merged.head())

      close         HL_PCT  PCT_change     volume  momentum     label
0  0.009000  555452.469153  -99.982000  10.654961   -9999.0  0.005291
1  0.005501      63.788766  -38.877914  23.277061   -9999.0  0.004970
2  0.006900      85.506667   25.431740  10.068957   -9999.0  0.004575
3  0.006001      41.654862   -4.753968  11.680869   -9999.0  0.004680
4  0.007000       2.714286    1.449275  23.597482   -9999.0  0.004280


In [27]:
# How many days we are forecasting out
print(forecast_out, "day(s)")

20 day(s)


In [28]:
merged_data = pd.read_csv(path + '/data/merged_data.csv', index_col='date', parse_dates=True)
merged_data_values = merged_data.values

# Graph Indicators

In [29]:
layout = go.Layout(
    title=pair + ' Indicators',
    xaxis=dict(
        title='Time (epoch)',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='Price',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    )
)

price_trace = go.Scatter(x=graph_data['date'], y=graph_data['weightedAverage'], name= 'Price')
movAVG_trace = go.Scatter(x=graph_data['date'], y=graph_data['movingAverage'], name= 'Moving Average')

plot_data = [price_trace, movAVG_trace]
fig = go.Figure(data=plot_data, layout=layout)
py.iplot(fig, filename=indicators)

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.


# Build Model

In [30]:
# Features
X = np.array(merged.drop(['label'],1))
X = preprocessing.scale(X)
X_lately = X[-forecast_out:]
X = X[:-forecast_out]
merged.dropna(inplace=True)
# Labels
y = np.array(merged['label'])

print(len(X), len(y))

19415 19415


# Train & Test

In [31]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.2)

clf = LinearRegression(n_jobs=-1, normalize=True)
clf.fit(X_train, y_train)

# The mean score and the 95% confidence interval of the accuracy estimate
# print("Accuracy: %0.2f (+/- %0.2f)" % (accuracy.mean(), accuracy.std() * 2))
# print(accuracy)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=-1, normalize=True)

# Pickle & Scale

In [43]:
# with open (path + '/pickles/linear_regression/15m_linearregression.pickle', 'wb') as f:
#     pickle.dump(clf, f)

pickle_in = open(path + '/pickles/linear_regression/15m_linearregression.pickle', 'rb')
clf = pickle.load(pickle_in)

In [44]:
accuracy = clf.score(X_test, y_test)
forecast_set = clf.predict(X_lately)
print("Accuracy: %0.2f (+/- %0.2f)" % (accuracy.mean(), accuracy.std() * 2))
print(forecast_out, 'day sample size')

Accuracy: 0.99 (+/- 0.00)
20 day sample size


In [38]:
predicted = forecast_set
size = len(predicted)

# Graph Prediction - Linear Regression

In [39]:
predict_data = graph_data.tail(size)
layout = go.Layout(
    title=pair+' Predict Moving Average',
    xaxis=dict(
        title='Time (epoch)',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='Price',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    )
)

mvAVG_trace = go.Scatter(x=graph_data['date'], y=graph_data['movingAverage'], name= 'Moving Average')
wAVG_trace = go.Scatter(x=graph_data['date'], y=graph_data['close'], name= 'Price')
prediction_trace = go.Scatter(x=predict_data['date'], y=predicted, name= 'Prediction')


plot_data = [prediction_trace, mvAVG_trace, wAVG_trace]
fig = go.Figure(data=plot_data, layout=layout)
py.iplot(fig, filename=pair+'_prediction')

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.
