In [1]:
from keras.models import Sequential, load_model
from keras.layers.core import Dense, Activation
from keras.layers import Dropout, Flatten
from keras.wrappers.scikit_learn import KerasRegressor
from keras.optimizers import SGD
from keras.utils import to_categorical
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [10]:
# This function retrieves the previous 4 days given a single day for input from the .csv file.
def getEvalData(nparray,date,predict_delta):
    res = [[]]
    idx = np.argwhere(nparray==date)[0][0] - 4
    # if the given date does not have 5 previous values...
    if idx < 4 or idx + 7 > nparray.shape[0]:
        return
    else:
        for i in range(5):
            res[0].append(nparray[i+idx][1])
        # the day we are predicting is t
        predict_day = nparray[4+idx+predict_delta][1]
        predict_day_percent = (predict_day - res[0][4])/predict_day
        res.append(predict_day_percent)
        res.append(predict_day)
    return res
        

In [12]:
# Trying to convert the input data, which starts off as dataframeX, must take 5 days and then transpose it
# and then convert it into a numpy array

rawdata = pd.read_csv('data/bitcoin.csv',usecols=[8])
days = 5                                                      # Number of days we are looking at
predict_delta = 7                                             # Number of days we are predicting into the future 
number_of_items = rawdata.shape[0] - (predict_delta + days)   # number of tests
# size of our test 
test_size = number_of_items - 733

# five day blocks in a list
Xtrain = []    
# the values of the day we are trying to predict
Ytrain = []                                                   



# Splits the array of closing prices up into multiple arrays, each holding five days worth of prices
# then transposes each array and finally appends them onto one large matrix
countup = 0
counteven = 0
countdown = 0
for i in range(number_of_items - 200): 
    # finding the predict day
    predict = rawdata.head(days + i + predict_delta).tail(1).as_matrix().transpose().tolist()[0][0]
    last_day = rawdata.head(days + i).tail(1).as_matrix().transpose().tolist()[0][0]
    # Converting and adding data to arrays
    res = rawdata.head(days + i).tail(days).as_matrix().transpose().tolist()[0]
    Xtrain.append(res)
    y_val = (predict - last_day)/predict
    
    if y_val > .03:
        countup = countup + 1
        # a value of [ 1 0 0 ] represents that it will increase by 3% in a week
        Ytrain.append(np.array([1, 0, 0]))
        
    elif y_val > -.03 and y_val < .03:
        counteven = counteven + 1
        # a 
        Ytrain.append(np.array([0, 1, 0]))
        
    else:
        countdown = countdown + 1
        Ytrain.append(np.array([0, 0, 1]))
        
    
print(countup)
print(counteven)
print(countdown)
# converts python arrays into numpy arrays
Xtrain = np.array(Xtrain)
Ytrain = np.array(Ytrain)
#print(Xtrain)
#print(Ytrain)

576
518
439
[[ 134.21  144.54  139.    116.99  105.21]
 [ 144.54  139.    116.99  105.21   97.75]
 [ 139.    116.99  105.21   97.75  112.5 ]
 ...
 [2608.56 2518.66 2571.34 2518.44 2372.56]
 [2518.66 2571.34 2518.44 2372.56 2337.79]
 [2571.34 2518.44 2372.56 2337.79 2398.84]]
[[1 0 0]
 [1 0 0]
 [0 1 0]
 ...
 [0 0 1]
 [0 1 0]
 [0 0 1]]


In [104]:
# number of evalutaion data points
eval_size = number_of_items - 200

# evaluation input data
XEvaluation = []
# evaluation output data
YEvaluation = []

# appending evaluation data to appropriate arrays
for i in range(eval_size):
    # finding the day we want to predict a week out
    predict = rawdata.head(days + i + test_size + predict_delta).tail(1).as_matrix().transpose().tolist()[0][0]
    
    # finding the last day out of the 5 we are using as input to later calculate the percent difference 
    # of the change.  The percentage change is not the absolute value as we want to calculate negative percentages as well
    last_day = rawdata.head(days + i + test_size).tail(1).as_matrix().transpose().tolist()[0][0]
    
    # result after running the AvgPrevious function on the 5 days used as input
    res = rawdata.head(days + i + test_size).tail(days).as_matrix().transpose().tolist()[0]
    XEvaluation.append(res)
    
    
    # putting y-values into the correct bucket
    y_val = (predict - last_day)/predict

    if y_val > .03:
        countup = countup + 1
        YEvaluation.append(np.array([1, 0, 0]))
        
    elif y_val > -.03 and y_val < .03:
        counteven = counteven + 1
        YEvaluation.append(np.array([0, 1, 0]))
        
    else:
        countdown = countdown + 1
        YEvaluation.append(np.array([0, 0, 1]))
    
XEvaluation = np.array(XEvaluation)
YEvaluation = np.array(YEvaluation)

print(XEvaluation)
print(YEvaluation)

[[ 387.49  402.97  391.73  392.15  394.97]
 [ 402.97  391.73  392.15  394.97  380.29]
 [ 391.73  392.15  394.97  380.29  379.47]
 ...
 [9170.54 8830.75 9174.91 8277.01 6955.27]
 [9170.54 8830.75 9174.91 8277.01 6955.27]
 [9170.54 8830.75 9174.91 8277.01 6955.27]]
[[0 0 1]
 [0 1 0]
 [0 1 0]
 ...
 [0 1 0]
 [0 1 0]
 [0 1 0]]


In [6]:
model = Sequential()

model.add(Dense(48,input_dim=5))
model.add(Activation('sigmoid'))
model.add(Dropout(1.0))
model.add(Dense(48, activation='relu'))
model.add(Dense(24, activation='relu'))
model.add(Dense(3, activation='softmax'))

model.compile(loss='categorical_crossentropy',optimizer='rmsprop', metrics=['accuracy'])

H = model.fit(Xtrain, Ytrain, epochs=10,shuffle=True)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [7]:
Test = [8070.8, 8891.21, 8516.24, 9477.84, 10016.49]
XVal = np.array([Test])
r = model.predict(XVal)

print(r)

[[0.47745916 0.26772314 0.2548177 ]]


In [105]:
score = model.evaluate(XEvaluation,YEvaluation,batch_size=100, verbose=1)
test_loss = score[0]
print("The evaluation loss is: " + str(test_loss))

The evaluation loss is: 1.1030109018884449


In [106]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_92 (Dense)             (None, 48)                288       
_________________________________________________________________
activation_32 (Activation)   (None, 48)                0         
_________________________________________________________________
dropout_29 (Dropout)         (None, 48)                0         
_________________________________________________________________
dense_93 (Dense)             (None, 48)                2352      
_________________________________________________________________
dense_94 (Dense)             (None, 3)                 147       
Total params: 2,787
Trainable params: 2,787
Non-trainable params: 0
_________________________________________________________________


In [11]:
rawdata = pd.read_csv('data/bitcoin.csv',usecols=[3,8])
data = rawdata.as_matrix()
#print(np.argwhere(data=='2016-10-08')[0][0])

res = getEvalData(data,'2017-10-18',predict_delta)

evaluation = np.array([res[0]])
print(model.predict(evaluation))
print(res)

[[0.3150419  0.4252535  0.25970456]]
[[5831.79, 5678.19, 5725.59, 5605.51, 5590.69], 0.027841343813034807, 5750.8]


In [92]:
model.save('classifier_working.h5')