In [10]:
%matplotlib inline
from __future__ import print_function
import numpy as np
np.random.seed(1337)  # for reproducibility
from sklearn import decomposition
import matplotlib.pyplot as plt
from ggplot import *
import pandas as pd
import sklearn.preprocessing as skpr

from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.optimizers import SGD, Adam, RMSprop
from keras.utils import np_utils
from keras.layers.convolutional import Convolution2D, MaxPooling2D, Convolution1D

In [11]:
train = pd.read_hdf("/Users/Anuar_The_Great/desktop/project/3/train.h5", "train")
test = pd.read_hdf("/Users/Anuar_The_Great/desktop/project/3/test.h5", "test")

In [12]:
print(train.shape)
print(test.shape)

(45324, 101)
(8137, 100)


In [13]:
X_train = np.array(train.values)[0:40000, 1:101]
Y_train = np.array(train.values)[0:40000, 0].reshape(40000, 1)
X_test = np.array(train.values)[40000:, 1:101]
Y_test = np.array(train.values)[40000:, 0].reshape(5324, 1)
X_test_submit = np.array(test.values)

Y_train = Y_train.astype('int')
Y_test = Y_test.astype('int')
print(X_train.shape)
print(Y_test.shape)
print(X_test_submit.shape)

(40000, 100)
(5324, 1)
(8137, 100)


First we tried to build the ANN on the data without the  zero columns, but later we discovered that PCA decomposition produces superior results.
Different number of n_components have been chosen, 73 and 89 are the most optimal.
After constructing a single ANN, we didn't beat the hard baseline, however, we decided that an "average" (ensemble) of several neural nets with different architectures and parameters might produce better results.

In [15]:
pca = decomposition.PCA()
pca.n_components = 89
Xtrain_reduced = pca.fit_transform(X_train)  #This is the one we're going to train on
Xtest_reduced = pca.transform(X_test)  #This is the one from the train dataset for testing
Xtest_submit_reduced = pca.transform(X_test_submit)
testdata_forprediction = pca.transform(np.array(test.values)) 
print('Size of Xtrain_reduced = ', Xtrain_reduced.shape)
print('Size of Xtest_reduced = ', Xtest_reduced.shape)
print('Size of testdata_forprediction = ', testdata_forprediction.shape)

Size of Xtrain_reduced =  (40000, 89)
Size of Xtest_reduced =  (5324, 89)
Size of testdata_forprediction =  (8137, 89)


In [16]:
nb_classes = 5
batch_size = 128

Y_train = np_utils.to_categorical(Y_train, nb_classes)
Y_test = np_utils.to_categorical(Y_test, nb_classes)

In [17]:
model = Sequential()
model.add(Dense(512, input_shape=(89,), init='lecun_uniform'))
model.add(Activation('relu'))
model.add(Dropout(0.4))
model.add(Dense(512, init='lecun_uniform'))
model.add(Activation('relu'))
model.add(Dropout(0.4))
model.add(Dense(5, init='lecun_uniform'))
model.add(Activation('softmax'))

model.summary()

____________________________________________________________________________________________________
Layer (type)                       Output Shape        Param #     Connected to                     
dense_4 (Dense)                    (None, 512)         46080       dense_input_2[0][0]              
____________________________________________________________________________________________________
activation_4 (Activation)          (None, 512)         0           dense_4[0][0]                    
____________________________________________________________________________________________________
dropout_3 (Dropout)                (None, 512)         0           activation_4[0][0]               
____________________________________________________________________________________________________
dense_5 (Dense)                    (None, 512)         262656      dropout_3[0][0]                  
___________________________________________________________________________________________

In [18]:
model2 = Sequential()
model2.add(Dense(400, input_shape=(89,), init='lecun_uniform'))
model2.add(Activation('relu'))
model2.add(Dropout(0.4))
model2.add(Dense(200, init='lecun_uniform'))
model2.add(Activation('relu'))
model2.add(Dropout(0.4))
model2.add(Dense(5, init='lecun_uniform'))
model2.add(Activation('softmax'))

model2.summary()

____________________________________________________________________________________________________
Layer (type)                       Output Shape        Param #     Connected to                     
dense_7 (Dense)                    (None, 400)         36000       dense_input_3[0][0]              
____________________________________________________________________________________________________
activation_7 (Activation)          (None, 400)         0           dense_7[0][0]                    
____________________________________________________________________________________________________
dropout_5 (Dropout)                (None, 400)         0           activation_7[0][0]               
____________________________________________________________________________________________________
dense_8 (Dense)                    (None, 200)         80200       dropout_5[0][0]                  
___________________________________________________________________________________________

In [19]:
model3 = Sequential()
model3.add(Dense(1000, input_shape=(89,), init='lecun_uniform'))
model3.add(Activation('relu'))
model3.add(Dropout(0.7))
model3.add(Dense(400, init='lecun_uniform'))
model3.add(Activation('relu'))
model3.add(Dropout(0.6))
model3.add(Dense(5, init='lecun_uniform'))
model3.add(Activation('softmax'))

model3.summary()

____________________________________________________________________________________________________
Layer (type)                       Output Shape        Param #     Connected to                     
dense_10 (Dense)                   (None, 1000)        90000       dense_input_4[0][0]              
____________________________________________________________________________________________________
activation_10 (Activation)         (None, 1000)        0           dense_10[0][0]                   
____________________________________________________________________________________________________
dropout_7 (Dropout)                (None, 1000)        0           activation_10[0][0]              
____________________________________________________________________________________________________
dense_11 (Dense)                   (None, 400)         400400      dropout_7[0][0]                  
___________________________________________________________________________________________

In [21]:
nb_epoch = 10

model.compile(loss='categorical_crossentropy',
              optimizer=RMSprop(),
              metrics=['accuracy'])
# Setting versose=1 has a bug in jupyter notebook
history = model.fit(Xtrain_reduced, Y_train,
                    batch_size=batch_size, nb_epoch=nb_epoch,
                    verbose=2, validation_data=(Xtest_reduced, Y_test), shuffle=True)
score = model.evaluate(Xtest_reduced, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Train on 40000 samples, validate on 5324 samples
Epoch 1/10
4s - loss: 0.3320 - acc: 0.8898 - val_loss: 0.2581 - val_acc: 0.9097
Epoch 2/10
4s - loss: 0.2683 - acc: 0.9097 - val_loss: 0.2158 - val_acc: 0.9269
Epoch 3/10
6s - loss: 0.2342 - acc: 0.9220 - val_loss: 0.1977 - val_acc: 0.9294
Epoch 4/10
5s - loss: 0.2106 - acc: 0.9297 - val_loss: 0.1806 - val_acc: 0.9391
Epoch 5/10
4s - loss: 0.1942 - acc: 0.9347 - val_loss: 0.1697 - val_acc: 0.9448
Epoch 6/10
5s - loss: 0.1782 - acc: 0.9403 - val_loss: 0.1603 - val_acc: 0.9450
Epoch 7/10
5s - loss: 0.1690 - acc: 0.9437 - val_loss: 0.1552 - val_acc: 0.9452
Epoch 8/10
5s - loss: 0.1599 - acc: 0.9461 - val_loss: 0.1529 - val_acc: 0.9478
Epoch 9/10
4s - loss: 0.1501 - acc: 0.9506 - val_loss: 0.1462 - val_acc: 0.9502
Epoch 10/10
4s - loss: 0.1442 - acc: 0.9521 - val_loss: 0.1440 - val_acc: 0.9498
Test score: 0.143979247659
Test accuracy: 0.949849737085


In [22]:
nb_epoch = 10

model2.compile(loss='categorical_crossentropy',
              optimizer=RMSprop(),
              metrics=['accuracy'])

history = model2.fit(Xtrain_reduced, Y_train,
                    batch_size=batch_size, nb_epoch=nb_epoch,
                    verbose=2, validation_data=(Xtest_reduced, Y_test), shuffle=True)
score = model2.evaluate(Xtest_reduced, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Train on 40000 samples, validate on 5324 samples
Epoch 1/10
2s - loss: 0.5453 - acc: 0.8123 - val_loss: 0.3267 - val_acc: 0.8894
Epoch 2/10
2s - loss: 0.3339 - acc: 0.8868 - val_loss: 0.2547 - val_acc: 0.9087
Epoch 3/10
2s - loss: 0.2807 - acc: 0.9067 - val_loss: 0.2231 - val_acc: 0.9198
Epoch 4/10
2s - loss: 0.2479 - acc: 0.9170 - val_loss: 0.2001 - val_acc: 0.9324
Epoch 5/10
2s - loss: 0.2274 - acc: 0.9253 - val_loss: 0.1833 - val_acc: 0.9348
Epoch 6/10
2s - loss: 0.2097 - acc: 0.9306 - val_loss: 0.1781 - val_acc: 0.9386
Epoch 7/10
1s - loss: 0.1960 - acc: 0.9353 - val_loss: 0.1676 - val_acc: 0.9395
Epoch 8/10
2s - loss: 0.1877 - acc: 0.9382 - val_loss: 0.1668 - val_acc: 0.9450
Epoch 9/10
2s - loss: 0.1778 - acc: 0.9402 - val_loss: 0.1593 - val_acc: 0.9455
Epoch 10/10
1s - loss: 0.1727 - acc: 0.9440 - val_loss: 0.1566 - val_acc: 0.9446
Test score: 0.156554451063
Test accuracy: 0.944590533478


In [23]:
nb_epoch = 10

model3.compile(loss='categorical_crossentropy',
              optimizer=RMSprop(),
              metrics=['accuracy'])

history = model3.fit(Xtrain_reduced, Y_train,
                    batch_size=batch_size, nb_epoch=nb_epoch,
                    verbose=2, validation_data=(Xtest_reduced, Y_test), shuffle=True)
score = model3.evaluate(Xtest_reduced, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Train on 40000 samples, validate on 5324 samples
Epoch 1/10
5s - loss: 0.5763 - acc: 0.7931 - val_loss: 0.3394 - val_acc: 0.8854
Epoch 2/10
5s - loss: 0.3839 - acc: 0.8677 - val_loss: 0.2710 - val_acc: 0.9100
Epoch 3/10
5s - loss: 0.3281 - acc: 0.8886 - val_loss: 0.2377 - val_acc: 0.9160
Epoch 4/10
5s - loss: 0.2938 - acc: 0.9032 - val_loss: 0.2141 - val_acc: 0.9251
Epoch 5/10
5s - loss: 0.2736 - acc: 0.9092 - val_loss: 0.2009 - val_acc: 0.9305
Epoch 6/10
5s - loss: 0.2558 - acc: 0.9156 - val_loss: 0.1905 - val_acc: 0.9324
Epoch 7/10
5s - loss: 0.2423 - acc: 0.9211 - val_loss: 0.1836 - val_acc: 0.9361
Epoch 8/10
5s - loss: 0.2293 - acc: 0.9261 - val_loss: 0.1784 - val_acc: 0.9405
Epoch 9/10
5s - loss: 0.2201 - acc: 0.9284 - val_loss: 0.1695 - val_acc: 0.9423
Epoch 10/10
6s - loss: 0.2073 - acc: 0.9316 - val_loss: 0.1693 - val_acc: 0.9421
Test score: 0.169306696654
Test accuracy: 0.942148760375


In [24]:
probs1 = model.predict_proba(Xtest_submit_reduced, batch_size=32)
probs2 = model2.predict_proba(Xtest_submit_reduced, batch_size=32)
probs3 = model3.predict_proba(Xtest_submit_reduced, batch_size=32)

# Creating an ensemble of 3 Neural Nets
# Takes the max of the Average of probabilities
probs = (probs1 + probs2 + probs3)/3
output = np.argmax(probs, axis=1)



In [25]:
# Takes the value of the maximum probability
np.fmax(probs1, probs2, probs3)
output = np.argmax(np.fmax(probs1, probs2, probs3), axis=1)

In [26]:
output = output.reshape(8137, 1)
range1 = np.array(range(45324, 53461)).reshape(8137, 1)
submission = np.hstack((range1, output))
np.savetxt('ensemble2.csv', submission, delimiter=',', fmt='%d')

In [27]:
classes = model.predict_classes(Xtest_submit_reduced, batch_size=32)
classes = classes.reshape(8137, 1)
range1 = np.array(range(45324, 53461)).reshape(8137, 1)
submission = np.hstack((range1, classes))
np.savetxt('huge_ann8.csv', submission, delimiter=',', fmt='%d')

