In [22]:
import config

## Keras for deep learning
# from keras.layers.core import Dense, Activation, Dropout
# from keras.layers.recurrent import LSTM
# from keras.layers import Bidirectional
# from keras.models import Sequential

## Scikit learn for mapping metrics
from sklearn.metrics import mean_squared_error
from sklearn import preprocessing, svm, model_selection
from sklearn.model_selection import cross_validate, train_test_split, cross_val_predict
from sklearn.linear_model import LinearRegression

import math
import pandas as pd
import sys
import plotly.offline as py
import plotly.graph_objs as go
import numpy as np
%matplotlib inline
%matplotlib notebook
py.init_notebook_mode(connected=True)

# Import & Merge Data

In [23]:
path = config.CONSTS['PATH']
pair = config.CONSTS['BASE'] + "/" + config.CONSTS['QUOTE']
sequence_length = 1
indicators = pd.read_csv(path + "/data/indicators.csv")
indicators = indicators[['date','momentum','movingAverage']]
candlesticks = pd.read_csv(path + "/data/candlesticks.csv")
candlesticks = candlesticks[['date','open','high','low','close','volume','weightedAverage',]]
merged = indicators.merge(candlesticks, on='date', left_index=False, right_index=False, how="inner")

graph_data = merged[['date','movingAverage', 'weightedAverage', 'volume']]

# Condense and add features to the data frame
merged['HL_PCT'] = (merged['high'] - merged['close']) / merged['close'] * 100
merged['PCT_change'] = (merged['close'] - merged['open']) / merged['open'] * 100

merged = merged[['close','HL_PCT', 'PCT_change', 'volume']]

forecast_col = 'close'
merged.fillna(-9999, inplace=True)

forecast_out = int(math.ceil(0.01*len(merged)))

merged['label'] = merged[forecast_col].shift(-forecast_out)

merged.dropna(inplace=True)

merged_csv = merged.to_csv(path + '/data/merged_data.csv', index=False)

In [24]:
print(merged.head())

      close        HL_PCT  PCT_change       volume     label
0  0.003125  1.599905e+06  -99.993750  1205.803321  0.006190
1  0.002581  5.887224e+01  -13.976713   898.123434  0.004747
2  0.002645  9.724837e+00   -0.187927   718.365266  0.005139
3  0.003950  1.138987e+01   49.083066  3007.274111  0.005500
4  0.004500  8.493333e+00   13.790383  4690.075032  0.006220


In [25]:
merged_data = pd.read_csv(path + '/data/merged_data.csv', dtype=float)
merged_data_values = merged_data.values

# Graph currency data

In [26]:
layout = go.Layout(
    title=pair,
    xaxis=dict(
        title='Time',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='Price',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    )
)

price_trace = go.Scatter(x=graph_data['date'], y=graph_data['weightedAverage'], name= 'Price')
movAVG_trace = go.Scatter(x=graph_data['date'], y=graph_data['movingAverage'], name= 'Moving Average')

plot_data = [price_trace, movAVG_trace]
fig = go.Figure(data=plot_data, layout=layout)
py.iplot(fig, filename=pair)

# Build Model

In [33]:
X = np.array(merged.drop(['label'],1))
y = np.array(merged['label'])
X = preprocessing.scale(X)
merged.dropna(inplace=True)
y = np.array(merged['label'])

print(len(X), len(y))

777 777


# Train & Test

In [49]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.2)

clf = LinearRegression(n_jobs=-1)
clf.fit(X_train, y_train)
accuracy = clf.score(X_test, y_test)

print(accuracy)

0.940721135623
