In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#from matplotlib.finance import candlestick_ohlc
#import matplotlib.dates as mdates
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.wrappers.scikit_learn import KerasClassifier
from keras.optimizers import SGD
from keras.constraints import maxnorm
from keras import models, layers, optimizers, regularizers
from sklearn.model_selection import train_test_split

%matplotlib inline
plt.rcParams["figure.figsize"] = (14, 10)
plt.rcParams["font.size"] = 14

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
df_train = pd.read_csv("train.csv", index_col=0)
df_test = pd.read_csv("test.csv", index_col=0)

In [3]:
df_train.head()

Unnamed: 0_level_0,Open Price,Close Price,High Price,Low Price,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
02-Jan-2009,902.99,931.8,934.73,899.35,4048270080
05-Jan-2009,929.17,927.45,936.63,919.53,5413910016
06-Jan-2009,931.17,934.7,943.85,927.28,5392620032
07-Jan-2009,927.45,906.65,927.45,902.37,4704940032
08-Jan-2009,905.73,909.73,910.0,896.81,4991549952


## Check if any null data in the dataset

In [4]:
df_train.isnull().sum()

Open Price     0
Close Price    0
High Price     0
Low Price      0
Volume         0
dtype: int64

## preprocessing
calculate the difference between the prices, and calculate the price is higher or lower than the day before that day

In [5]:
df_train["return"] = df_train["High Price"] - df_train["Low Price"]
df_train["close to open"] = np.abs(df_train["Close Price"] - df_train["Open Price"])
df_train["close to high"] = np.abs(df_train["Close Price"] - df_train["High Price"])
df_train["close to low"] = np.abs(df_train["Close Price"] - df_train["Low Price"])

df_test["return"] = df_test["High Price"] - df_test["Low Price"]
df_test["close to open"] = np.abs(df_test["Close Price"] - df_test["Open Price"])
df_test["close to high"] = np.abs(df_test["Close Price"] - df_test["High Price"])
df_test["close to low"] = np.abs(df_test["Close Price"] - df_test["Low Price"])

def result_calc(x):
    if x > 0:
        return 1
    elif x < 0:
        return 0
    elif x == 0:
        return 2

df_train["result"] = df_train["Close Price"] - df_train["Close Price"].shift(1)
df_train["result"] = df_train["result"].apply(lambda x: result_calc(x))

df_test["result"] = df_test["Close Price"] - df_test["Close Price"].shift(1)
df_test["result"] = df_test["result"].apply(lambda x: result_calc(x))

df_train.head()

Unnamed: 0_level_0,Open Price,Close Price,High Price,Low Price,Volume,return,close to open,close to high,close to low,result
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
02-Jan-2009,902.99,931.8,934.73,899.35,4048270080,35.38,28.81,2.93,32.45,
05-Jan-2009,929.17,927.45,936.63,919.53,5413910016,17.1,1.72,9.18,7.92,0.0
06-Jan-2009,931.17,934.7,943.85,927.28,5392620032,16.57,3.53,9.15,7.42,1.0
07-Jan-2009,927.45,906.65,927.45,902.37,4704940032,25.08,20.8,20.8,4.28,0.0
08-Jan-2009,905.73,909.73,910.0,896.81,4991549952,13.19,4.0,0.27,12.92,1.0


### Because the result of the first row is null, drop it

In [6]:
df_train["result"].isnull().sum()

1

In [7]:
df_train = df_train.dropna()
df_test = df_test.dropna()
df_train.head()

Unnamed: 0_level_0,Open Price,Close Price,High Price,Low Price,Volume,return,close to open,close to high,close to low,result
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
05-Jan-2009,929.17,927.45,936.63,919.53,5413910016,17.1,1.72,9.18,7.92,0.0
06-Jan-2009,931.17,934.7,943.85,927.28,5392620032,16.57,3.53,9.15,7.42,1.0
07-Jan-2009,927.45,906.65,927.45,902.37,4704940032,25.08,20.8,20.8,4.28,0.0
08-Jan-2009,905.73,909.73,910.0,896.81,4991549952,13.19,4.0,0.27,12.92,1.0
09-Jan-2009,909.91,890.35,911.93,888.31,4716499968,23.62,19.56,21.58,2.04,0.0


## Normalize the elements

In [8]:
scaler = StandardScaler()
scaler.fit(df_train[["Open Price", "Close Price", "High Price", "Low Price", "Volume", "return", "close to open", "close to high", "close to low"]])
df_train[["Open Price", "Close Price", "High Price", "Low Price", "Volume", "return", "close to open", "close to high", "close to low"]] = scaler.transform(df_train[["Open Price", "Close Price", "High Price", "Low Price", "Volume", "return", "close to open", "close to high", "close to low"]])
df_train.head()


Unnamed: 0_level_0,Open Price,Close Price,High Price,Low Price,Volume,return,close to open,close to high,close to low,result
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
05-Jan-2009,-1.499495,-1.50445,-1.502622,-1.500502,1.824404,0.010751,-0.836174,0.168867,-0.171102,0.0
06-Jan-2009,-1.495371,-1.489501,-1.487722,-1.484533,1.808651,-0.042903,-0.644738,0.165531,-0.231612,1.0
07-Jan-2009,-1.503041,-1.547337,-1.521567,-1.53586,1.299815,0.818598,1.181833,1.461159,-0.611611,0.0
08-Jan-2009,-1.547821,-1.540987,-1.557579,-1.547317,1.511887,-0.385074,-0.595028,-0.822038,0.433992,1.0
09-Jan-2009,-1.539203,-1.580946,-1.553596,-1.564831,1.308369,0.670796,1.050684,1.547905,-0.882693,0.0


In [9]:
scaler.fit(df_test[["Open Price", "Close Price", "High Price", "Low Price", "Volume", "return", "close to open", "close to high", "close to low"]])
df_test[["Open Price", "Close Price", "High Price", "Low Price", "Volume", "return", "close to open", "close to high", "close to low"]] = scaler.transform(df_test[["Open Price", "Close Price", "High Price", "Low Price", "Volume", "return", "close to open", "close to high", "close to low"]])

df_test.head()

Unnamed: 0_level_0,Open Price,Close Price,High Price,Low Price,Volume,return,close to open,close to high,close to low,result
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
03-Jan-2018,-0.508581,-0.331044,-0.523646,-0.305917,-0.142641,-0.689624,-0.135579,-0.75311,-0.035561,1.0
04-Jan-2018,-0.291669,-0.221964,-0.363034,-0.10582,-0.120207,-0.963246,-0.686423,-0.558909,-0.643437,1.0
05-Jan-2018,-0.170175,-0.030748,-0.210605,-0.022681,-0.521355,-0.735513,-0.312916,-0.802268,-0.039078,1.0
08-Jan-2018,-0.055553,0.014761,-0.156135,0.068256,-0.574383,-0.933653,-0.66759,-0.777933,-0.339206,1.0
09-Jan-2018,0.03016,0.050489,-0.041704,0.164641,-0.436682,-0.917785,-0.923918,-0.434795,-0.730779,1.0


## Model fitting and evaluation

In [10]:
y = df_train["result"]
y_test = df_test["result"]
df_train = df_train.drop("result", 1)
df_test = df_test.drop("result", 1)

df_train.head()

Unnamed: 0_level_0,Open Price,Close Price,High Price,Low Price,Volume,return,close to open,close to high,close to low
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
05-Jan-2009,-1.499495,-1.50445,-1.502622,-1.500502,1.824404,0.010751,-0.836174,0.168867,-0.171102
06-Jan-2009,-1.495371,-1.489501,-1.487722,-1.484533,1.808651,-0.042903,-0.644738,0.165531,-0.231612
07-Jan-2009,-1.503041,-1.547337,-1.521567,-1.53586,1.299815,0.818598,1.181833,1.461159,-0.611611
08-Jan-2009,-1.547821,-1.540987,-1.557579,-1.547317,1.511887,-0.385074,-0.595028,-0.822038,0.433992
09-Jan-2009,-1.539203,-1.580946,-1.553596,-1.564831,1.308369,0.670796,1.050684,1.547905,-0.882693


## keras neural network model creation

In [11]:
def create_model():
    # default values
    activation='relu' 
    dropout_rate=0.0 
    init_mode='uniform'
    weight_constraint=0 
    optimizer='adam'
    lr = 0.01
    momemntum=0
    input_dim = df_train.shape[1]
    # create model
    model = Sequential()
    model.add(Dense(8, 
                    input_dim=input_dim, kernel_initializer=init_mode, 
                    activation=activation,
                    kernel_constraint=maxnorm(weight_constraint)))
    model.add(Dropout(dropout_rate)) 
    model.add(Dense(1, kernel_initializer=init_mode, activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=optimizer, 
                  metrics=['accuracy'])
    return model

In [12]:
model = KerasClassifier(build_fn=create_model, batch_size=1000, epochs=10)

## some data for finding the best hyperparameters

In [13]:
activation =  ['relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear'] # softmax, softplus, softsign 
momentum = [0.0, 0.2, 0.4, 0.6, 0.8, 0.9]
learn_rate = [0.001, 0.01, 0.1, 0.2, 0.3]
dropout_rate = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
weight_constraint=[1, 2, 3, 4, 5]
neurons = [1, 5, 10, 15, 20, 25, 30]
init = ['uniform', 'lecun_uniform', 'normal', 'zero', 'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform']
optimizer = [ 'SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']

In [14]:
epochs = [1, 10] # add 50, 100, 150 etc
batch_size = [1000, 5000] # add 5, 10, 20, 40, 60, 80, 100 etc
param_grid = dict(epochs=epochs, batch_size=batch_size)

grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(df_train, y) 



Instructions for updating:
Instructions for updating:
Colocations handled automatically by placer.

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use tf.cast instead.
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 1/10
Epoch 1/1
Epoch 1/1
Epoch 2/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 4/10
Epoch 5/10
Epoch 5/10
Epoch 6/10
Epoch 6/10
Epoch 7/10
Epoch 7/10
Epoch 8/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 10/10
Epoch 1/1
Epoch 1/10
Epoch 1/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 2/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 3/10
Epoch 8/10
Epoch 9/10
Epoch 4/10
Epoch 10/10
Epoch 5/10


Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Epoch 1/1


In [15]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: 0.546178 using {'batch_size': 1000, 'epochs': 1}


## model with best hyperparameters

Best: 0.546178 using {'batch_size': 1000, 'epochs': 1}

In [16]:
real_nn = KerasClassifier(build_fn=create_model, batch_size=1000, epochs=1)
real_nn.fit(df_train, y)
y_predict = real_nn.predict(df_test)

Epoch 1/1


In [17]:
accuracy_score(y_predict, y_test)

0.5219123505976095

## accuracy test with train_test_split

In [18]:
x1, x2, y1, y2 = train_test_split(df_train, y, train_size=0.7)
a = KerasClassifier(build_fn=create_model, batch_size=1000, epochs=1)
a.fit(x1, y1)
yp = a.predict(x2)
accuracy_score(yp, y2)



Epoch 1/1


0.5449189985272459