In [14]:
import json
import csv
import pandas as pd
import numpy as np
import scipy as sci
import keras
from keras.models import Sequential
from keras import regularizers
from keras.layers.core import Dense, Activation
from keras.layers import Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.callbacks import EarlyStopping, ModelCheckpoint
import io
import requests
import tensorflow as tf
from scipy import sparse
from sklearn.model_selection import train_test_split
import sklearn.feature_extraction.text as sk_text
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn import preprocessing
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import accuracy_score, precision_score, f1_score
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import confusion_matrix, classification_report
import collections
from sklearn import preprocessing
import matplotlib.pyplot as plt
import shutil
import os

# Useful Functions

In [15]:
#Function to normalize columns
def normalize_numeric_minmax(df, name):
        df[name] = ((df[name] - df[name].min()) / (df[name].max() - df[name].min())).astype(np.float32)
    

In [16]:
# Convert a Pandas dataframe to the x,y inputs that TensorFlow needs
import collections
def to_xy(df, target):
    result = []
    for x in df.columns:
        if x != target:
            result.append(x)
    # find out the type of the target column. 
    target_type = df[target].dtypes
    target_type = target_type[0] if isinstance(target_type, collections.Sequence) else target_type
    # Encode to int for classification, float otherwise. TensorFlow likes 32 bits.
    if target_type in (np.int64, np.int32):
        # Classification
        dummies = pd.get_dummies(df[target])
        return df[result].values.astype(np.float32), dummies.values.astype(np.float32)
    else:
        # Regression
        return df[result].values.astype(np.float32), df[target].values.astype(np.float32)

In [37]:
# Regression chart.
def chart_regression(pred,y,sort=True):
    t = pd.DataFrame({'pred' : pred, 'y' : y.flatten()})
    if sort:
        t.sort_values(by=['y'],inplace=True)
    a = plt.plot(t['y'].tolist(),label='expected')
    b = plt.plot(t['pred'].tolist(),label='prediction')
    plt.ylabel('output')
    plt.legend()
    plt.show()


# Data Pre-Processing

In [17]:
#Read Stock_Price_MAX.csv file and load data into stock_df dataframe 
stock_df= pd.read_csv('Stock_Price_MAX.csv')
stock_df

Unnamed: 0,Date,Open,High,Low,Close,Adj_Close,Volume
0,2000/3/27,3.812500,4.156250,3.812500,4.125000,4.125000,3675600
1,2000/3/28,4.125000,4.125000,4.000000,4.015625,4.015625,1077600
2,2000/3/29,4.000000,4.031250,3.953125,4.000000,4.000000,437200
3,2000/3/30,4.000000,4.000000,3.843750,3.843750,3.843750,1883600
4,2000/3/31,3.734375,3.734375,3.390625,3.390625,3.390625,7931600
5,2000/4/3,3.500000,3.703125,3.437500,3.437500,3.437500,11486800
6,2000/4/4,3.531250,3.578125,3.093750,3.500000,3.500000,13136800
7,2000/4/5,3.468750,3.562500,3.453125,3.484375,3.484375,6349600
8,2000/4/6,3.500000,3.593750,3.468750,3.578125,3.578125,7181200
9,2000/4/7,3.593750,3.812500,3.593750,3.609375,3.609375,13904800


In [18]:
stock_df = stock_df.drop(['Date', 'Adj_Close'], axis = 1)
stock_df

Unnamed: 0,Open,High,Low,Close,Volume
0,3.812500,4.156250,3.812500,4.125000,3675600
1,4.125000,4.125000,4.000000,4.015625,1077600
2,4.000000,4.031250,3.953125,4.000000,437200
3,4.000000,4.000000,3.843750,3.843750,1883600
4,3.734375,3.734375,3.390625,3.390625,7931600
5,3.500000,3.703125,3.437500,3.437500,11486800
6,3.531250,3.578125,3.093750,3.500000,13136800
7,3.468750,3.562500,3.453125,3.484375,6349600
8,3.500000,3.593750,3.468750,3.578125,7181200
9,3.593750,3.812500,3.593750,3.609375,13904800


In [19]:
# Normalize the columns
    
normalize_numeric_minmax(stock_df,"Open")
normalize_numeric_minmax(stock_df,"High") 
normalize_numeric_minmax(stock_df,"Low") 
normalize_numeric_minmax(stock_df,"Volume") 
normalize_numeric_minmax(stock_df,"Close") 
stock_df

Unnamed: 0,Open,High,Low,Close,Volume
0,0.004378,0.006469,0.006934,0.007408,0.075401
1,0.007031,0.006205,0.008535,0.006482,0.019194
2,0.005970,0.005413,0.008135,0.006350,0.005339
3,0.005970,0.005149,0.007201,0.005027,0.036632
4,0.003714,0.002904,0.003334,0.001191,0.167478
5,0.001725,0.002640,0.003734,0.001588,0.244393
6,0.001990,0.001584,0.000800,0.002117,0.280091
7,0.001459,0.001452,0.003867,0.001984,0.133252
8,0.001725,0.001716,0.004001,0.002778,0.151243
9,0.002521,0.003564,0.005067,0.003043,0.296706


In [20]:
# to xy to convert pandas to tensor flow
x,y=to_xy(stock_df,"Close")

# Fully Connected Neural Network

In [21]:
#Split for train and test
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.3, random_state=42)

In [22]:
x_train.shape

(3074, 4)

In [23]:
x_test.shape

(1318, 4)

In [24]:
y_train.shape

(3074,)

In [25]:
y_test.shape

(1318,)

## ReLU

### ReLU, adam, 2 layers, early stopping and Model checkpoint  - score_relu_2l

In [26]:
# set up checkpointer
checkpointer_relu = ModelCheckpoint(filepath="./best_weights_relu_2l.hdf5", verbose=1, save_best_only=True)

In [28]:
# relu adam 2 layers
for i in range(10):
    print(i)
    
    # Build network
    model_relu = Sequential()
    model_relu.add(Dense(60, input_dim=x_train.shape[1], activation='relu')) 
    model_relu.add(Dense(30, activation='relu')) # Hidden 2
    model_relu.add(Dense(1)) # Output
    model_relu.compile(loss='mean_squared_error', optimizer='adam')
    monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=5, verbose=1, mode='auto')
    model_relu.fit(x_train,y_train,validation_data=(x_test,y_test),callbacks=[monitor,checkpointer_relu],verbose=2,epochs=100) 
    
print('Training finished...Loading the best model') 
print()
model_reg_relu.load_weights('./best_weights_relu_2l.hdf5')


0
Train on 3074 samples, validate on 1318 samples
Epoch 1/100
 - 0s - loss: 0.0104 - val_loss: 9.5655e-05

Epoch 00001: val_loss improved from inf to 0.00010, saving model to ./best_weights_relu_2l_nn.hdf5
Epoch 2/100
 - 0s - loss: 7.5660e-05 - val_loss: 3.7997e-05

Epoch 00002: val_loss improved from 0.00010 to 0.00004, saving model to ./best_weights_relu_2l_nn.hdf5
Epoch 3/100
 - 0s - loss: 3.1386e-05 - val_loss: 2.1262e-05

Epoch 00003: val_loss improved from 0.00004 to 0.00002, saving model to ./best_weights_relu_2l_nn.hdf5
Epoch 4/100
 - 0s - loss: 2.0685e-05 - val_loss: 1.5795e-05

Epoch 00004: val_loss improved from 0.00002 to 0.00002, saving model to ./best_weights_relu_2l_nn.hdf5
Epoch 5/100
 - 0s - loss: 1.7797e-05 - val_loss: 1.4677e-05

Epoch 00005: val_loss improved from 0.00002 to 0.00001, saving model to ./best_weights_relu_2l_nn.hdf5
Epoch 6/100
 - 0s - loss: 1.7891e-05 - val_loss: 1.4893e-05

Epoch 00006: val_loss did not improve from 0.00001
Epoch 00006: early stoppin

NameError: name 'model_reg_relu' is not defined

In [29]:
# Predict stars
pred_relu_2l = model_relu.predict(x_test)
print("Shape: {}".format(pred_relu_2l.shape))


Shape: (1318, 1)


In [31]:
# Measure RMSE error.  RMSE is common for regression.
score_relu_2l = np.sqrt(mean_squared_error(y_test,pred_relu_2l))
print("Final score (RMSE): {}".format(score_relu_2l))
print('R2 score: %.2f' % r2_score(y_test, pred_relu_2l))

Final score (RMSE): 0.00341482344083488
R2 score: 1.00


### ReLU, adam, 3 layer, early stopping and Model checkpoint  - score_relu_3l

In [32]:
# set up checkpointer
checkpointer_relu = ModelCheckpoint(filepath="./best_weights_relu_3l.hdf5", verbose=1, save_best_only=True)

In [33]:
for i in range(10):
    print(i)
    
    # Build network
    model_reg_relu = Sequential()

    model_relu.add(Dense(60, input_dim=x_train.shape[1], activation='relu'))  
    model_relu.add(Dense(30, activation='relu')) # Hidden 2
    model_relu.add(Dense(10, activation='relu')) # Hidden 3
    model_relu.add(Dense(1)) # Output
    model_relu.compile(loss='mean_squared_error', optimizer='adam')
    monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=5, verbose=1, mode='auto')
    model_relu.fit(x_train,y_train,validation_data=(x_test,y_test),callbacks=[monitor,checkpointer_relu],verbose=2,epochs=100) 
    
print('Training finished...Loading the best model') 
print()
model_reg_relu.load_weights('./best_weights_relu_3l.hdf5')

0
Train on 3074 samples, validate on 1318 samples
Epoch 1/100
 - 1s - loss: 0.0538 - val_loss: 2.0855e-04

Epoch 00001: val_loss improved from inf to 0.00021, saving model to ./best_weights_relu_3l.hdf5
Epoch 2/100
 - 0s - loss: 6.9398e-05 - val_loss: 2.4523e-05

Epoch 00002: val_loss improved from 0.00021 to 0.00002, saving model to ./best_weights_relu_3l.hdf5
Epoch 3/100
 - 0s - loss: 2.0924e-05 - val_loss: 1.6575e-05

Epoch 00003: val_loss improved from 0.00002 to 0.00002, saving model to ./best_weights_relu_3l.hdf5
Epoch 4/100
 - 0s - loss: 1.7167e-05 - val_loss: 1.9958e-05

Epoch 00004: val_loss did not improve from 0.00002
Epoch 5/100
 - 0s - loss: 1.8122e-05 - val_loss: 1.4409e-05

Epoch 00005: val_loss improved from 0.00002 to 0.00001, saving model to ./best_weights_relu_3l.hdf5
Epoch 6/100
 - 0s - loss: 1.6714e-05 - val_loss: 1.3740e-05

Epoch 00006: val_loss improved from 0.00001 to 0.00001, saving model to ./best_weights_relu_3l.hdf5
Epoch 00006: early stopping
1
Train on 30

ValueError: You are trying to load a weight file containing 35 layers into a model with 0 layers.

In [34]:
# Predict stars
pred_hl_3 = model_relu.predict(x_test)

In [35]:
# Measure RMSE error.  RMSE is common for regression.
score_3l = np.sqrt(mean_squared_error(y_test,pred_hl_3))
print("Final score (RMSE): {}".format(score_3l))
print('R2 score: %.2f' % r2_score(y_test, pred_hl_3))

Final score (RMSE): 0.004518930334597826
R2 score: 1.00


### ReLU, adam, 4 layer, early stopping and Model checkpoint  - score_relu_4l

In [40]:
# set up checkpointer
checkpointer_relu = ModelCheckpoint(filepath="./best_weights_relu_4l.hdf5", verbose=1, save_best_only=True)

In [41]:
for i in range(10):
    print(i)
    
    # Build network
    model_relu = Sequential()

    model_relu.add(Dense(80, input_dim=x_train.shape[1], activation='relu'))  
    model_relu.add(Dense(60, activation='relu')) # Hidden 2
    model_relu.add(Dense(20, activation='relu')) # Hidden 3
    model_relu.add(Dense(10, activation='relu')) # Hidden 4
    model_relu.add(Dense(1)) # Output
    model_relu.compile(loss='mean_squared_error', optimizer='adam')
    monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=5, verbose=1, mode='auto')
    model_relu.fit(x_train,y_train,validation_data=(x_test,y_test),callbacks=[monitor,checkpointer_relu],verbose=2,epochs=100) 
    
print('Training finished...Loading the best model') 
print()
model_reg_relu.load_weights('./best_weights_relu_4l.hdf5')

0
Train on 3074 samples, validate on 1318 samples
Epoch 1/100
 - 2s - loss: 0.0099 - val_loss: 1.8032e-05

Epoch 00001: val_loss improved from inf to 0.00002, saving model to ./best_weights_relu_4l.hdf5
Epoch 2/100
 - 0s - loss: 1.5572e-05 - val_loss: 1.4469e-05

Epoch 00002: val_loss improved from 0.00002 to 0.00001, saving model to ./best_weights_relu_4l.hdf5
Epoch 3/100
 - 0s - loss: 1.4089e-05 - val_loss: 1.4323e-05

Epoch 00003: val_loss improved from 0.00001 to 0.00001, saving model to ./best_weights_relu_4l.hdf5
Epoch 4/100
 - 0s - loss: 1.5336e-05 - val_loss: 2.0596e-05

Epoch 00004: val_loss did not improve from 0.00001
Epoch 5/100
 - 0s - loss: 1.5336e-05 - val_loss: 2.8708e-05

Epoch 00005: val_loss did not improve from 0.00001
Epoch 6/100
 - 0s - loss: 1.3487e-05 - val_loss: 1.0825e-05

Epoch 00006: val_loss improved from 0.00001 to 0.00001, saving model to ./best_weights_relu_4l.hdf5
Epoch 00006: early stopping
1
Train on 3074 samples, validate on 1318 samples
Epoch 1/100


ValueError: You are trying to load a weight file containing 5 layers into a model with 0 layers.

In [53]:
# Predict stars
pred_hl4 = model_relu.predict(x_test)

In [55]:
# Measure RMSE error.  RMSE is common for regression.
score_4l = np.sqrt(mean_squared_error(y_test,pred_hl4))
print("Final score (RMSE): {}".format(score_4l))
print('R2 score: %.2f' % r2_score(y_test, pred_hl4))

Final score (RMSE): 0.021769022569060326
R2 score: 0.99


### ReLU, adam, 4 layer and dropout - score_relu_4l_do

In [56]:
    # Build network
    model_relu = Sequential()

    model_relu.add(Dense(80, input_dim=x_train.shape[1]))  
    model_relu.add(Dropout(0.1))
    model_relu.add(Dense(60, activation='relu')) # Hidden 2
    model_relu.add(Dense(20, activation='relu')) # Hidden 3
    model_relu.add(Dense(10, activation='relu')) # Hidden 4
    model_relu.add(Dense(1)) # Output
    model_relu.compile(loss='mean_squared_error', optimizer='adam')
    monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=5, verbose=1, mode='auto')
    model_relu.fit(x_train,y_train,validation_data=(x_test,y_test),callbacks=[monitor,checkpointer_relu],verbose=2,epochs=100) 


Train on 3074 samples, validate on 1318 samples
Epoch 1/100
 - 4s - loss: 0.0069 - val_loss: 1.1118e-04

Epoch 00001: val_loss did not improve from 0.00002
Epoch 2/100
 - 0s - loss: 4.9394e-04 - val_loss: 5.0593e-05

Epoch 00002: val_loss did not improve from 0.00002
Epoch 3/100
 - 0s - loss: 3.9193e-04 - val_loss: 1.8051e-04

Epoch 00003: val_loss did not improve from 0.00002
Epoch 4/100
 - 0s - loss: 3.1749e-04 - val_loss: 1.5043e-04

Epoch 00004: val_loss did not improve from 0.00002
Epoch 5/100
 - 0s - loss: 2.5651e-04 - val_loss: 4.7390e-04

Epoch 00005: val_loss did not improve from 0.00002
Epoch 6/100
 - 0s - loss: 1.9608e-04 - val_loss: 5.0287e-04

Epoch 00006: val_loss did not improve from 0.00002
Epoch 00006: early stopping


<keras.callbacks.History at 0x1b39621bc18>

In [57]:
# Predict stars
pred_hl4_do = model_relu.predict(x_test)

In [58]:
# Measure RMSE error.  RMSE is common for regression.
score_4l_do = np.sqrt(mean_squared_error(y_test,pred_hl4_do))
print("Final score (RMSE): {}".format(score_4l_do))
print('R2 score: %.2f' % r2_score(y_test, pred_hl4_do))

Final score (RMSE): 0.022424694150686264
R2 score: 0.99


### ReLU, sgd, 3 layers, early stopping and Model checkpoint  - score_relu_3l

In [64]:
# set up checkpointer
checkpointer_relu = ModelCheckpoint(filepath="./best_weights_relu_3l_sgd.hdf5", verbose=1, save_best_only=True)

In [None]:
for i in range(10):
    print(i)
    
    # Build network
    model_reg_relu = Sequential()

    model_relu.add(Dense(60, input_dim=x_train.shape[1], activation='relu'))  
    model_relu.add(Dense(30, activation='relu')) # Hidden 2
    model_relu.add(Dense(10, activation='relu')) # Hidden 3
    model_relu.add(Dense(1)) # Output
    model_relu.compile(loss='mean_squared_error', optimizer='sgd')
    monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=5, verbose=1, mode='auto')
    model_relu.fit(x_train,y_train,validation_data=(x_test,y_test),callbacks=[monitor,checkpointer_relu],verbose=2,epochs=100) 
    
print('Training finished...Loading the best model') 
print()
model_reg_relu.load_weights('./best_weights_relu_3l_sgd.hdf5')

0
Train on 3074 samples, validate on 1318 samples
Epoch 1/100
 - 16s - loss: 0.0681 - val_loss: 0.0641

Epoch 00001: val_loss improved from inf to 0.06412, saving model to ./best_weights_relu_3l_sgd.hdf5
Epoch 2/100
 - 2s - loss: 0.0641 - val_loss: 0.0645

Epoch 00002: val_loss did not improve from 0.06412
Epoch 3/100
 - 2s - loss: 0.0641 - val_loss: 0.0645

Epoch 00003: val_loss did not improve from 0.06412
Epoch 4/100
 - 2s - loss: 0.0641 - val_loss: 0.0641

Epoch 00004: val_loss did not improve from 0.06412
Epoch 5/100
 - 2s - loss: 0.0641 - val_loss: 0.0641

Epoch 00005: val_loss improved from 0.06412 to 0.06412, saving model to ./best_weights_relu_3l_sgd.hdf5
Epoch 6/100
 - 2s - loss: 0.0641 - val_loss: 0.0641

Epoch 00006: val_loss did not improve from 0.06412
Epoch 00006: early stopping
1
Train on 3074 samples, validate on 1318 samples
Epoch 1/100
 - 18s - loss: 0.0721 - val_loss: 0.0641

Epoch 00001: val_loss did not improve from 0.06412
Epoch 2/100
 - 2s - loss: 0.0641 - val_l

In [None]:
# Predict stars
pred_relu_3l_sgd = model_relu.predict(x_test)
print("Shape: {}".format(pred_relu_3l_sgd.shape))


In [None]:
# Measure RMSE error.  RMSE is common for regression.
score_relu_3l_sgd = np.sqrt(mean_squared_error(y_test,pred_relu_3l_sgd))
print("Final score (RMSE): {}".format(score_relu_3l_sgd))
print('R2 score: %.2f' % r2_score(y_test, pred_relu_3l_sgd))

### ReLU, sgd, 3 layer and dropout - score_relu_3l_do

In [None]:
    # Build network
    model_relu = Sequential()

    model_relu.add(Dense(50, input_dim=x_train.shape[1]))  
    model_relu.add(Dropout(0.1))
    model_relu.add(Dense(25, activation='relu')) # Hidden 2
    model_relu.add(Dense(10, activation='relu')) # Hidden 3
    model_relu.add(Dense(1)) # Output
    model_relu.compile(loss='mean_squared_error', optimizer='sgd')
    monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=5, verbose=1, mode='auto')
    model_relu.fit(x_train,y_train,validation_data=(x_test,y_test),callbacks=[monitor,checkpointer_relu],verbose=2,epochs=100) 


In [None]:
# Predict stars
pred_hl3_do = model_relu.predict(x_test)

In [None]:
# Measure RMSE error.  RMSE is common for regression.
score_3l_do = np.sqrt(mean_squared_error(y_test,pred_hl3_do))
print("Final score (RMSE): {}".format(score_3l_do))
print('R2 score: %.2f' % r2_score(y_test, pred_hl3_do))

### ReLU, rmsprop, 3 layers, early stopping and Model checkpoint  - score_relu_3l_rms

In [None]:
# set up checkpointer
checkpointer_relu = ModelCheckpoint(filepath="./best_weights_relu_3l_rms.hdf5", verbose=1, save_best_only=True)

In [None]:
for i in range(10):
    print(i)
    
    # Build network
    model_reg_relu = Sequential()

    model_relu.add(Dense(60, input_dim=x_train.shape[1], activation='relu'))  
    model_relu.add(Dense(30, activation='relu')) # Hidden 2
    model_relu.add(Dense(10, activation='relu')) # Hidden 3
    model_relu.add(Dense(1)) # Output
    model_relu.compile(loss='mean_squared_error', optimizer='rmsprop')
    monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=5, verbose=1, mode='auto')
    model_relu.fit(x_train,y_train,validation_data=(x_test,y_test),callbacks=[monitor,checkpointer_relu],verbose=2,epochs=100) 
    
print('Training finished...Loading the best model') 
print()
model_reg_relu.load_weights('./best_weights_relu_3l_rms.hdf5')

In [None]:
# Predict stars
pred_relu_3l_rms = model_relu.predict(x_test)
print("Shape: {}".format(pred_relu_3l_rms.shape))


In [None]:
# Measure RMSE error.  RMSE is common for regression.
score_relu_3l_rms = np.sqrt(mean_squared_error(y_test,pred_relu_3l_rms))
print("Final score (RMSE): {}".format(score_relu_3l_rms))
print('R2 score: %.2f' % r2_score(y_test, pred_relu_3l_rms))

### ReLU, rmsprop, 3 layer and dropout - score_relu_3l_do

In [None]:
    # Build network
    model_relu = Sequential()

    model_relu.add(Dense(50, input_dim=x_train.shape[1]))  
    model_relu.add(Dropout(0.1))
    model_relu.add(Dense(25, activation='relu')) # Hidden 2
    model_relu.add(Dense(10, activation='relu')) # Hidden 3
    model_relu.add(Dense(1)) # Output
    model_relu.compile(loss='mean_squared_error', optimizer='rmsprop')
    monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=5, verbose=1, mode='auto')
    model_relu.fit(x_train,y_train,validation_data=(x_test,y_test),callbacks=[monitor,checkpointer_relu],verbose=2,epochs=100) 


In [None]:
# Predict stars
pred_hl3_rms_do = model_relu.predict(x_test)

In [None]:
# Measure RMSE error.  RMSE is common for regression.
score_3l_rms_do = np.sqrt(mean_squared_error(y_test,pred_hl3_rms_do))
print("Final score (RMSE): {}".format(score_3l_rms_do))
print('R2 score: %.2f' % r2_score(y_test, pred_hl3_rms_do))