In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.utils import shuffle
from scipy.ndimage import convolve
from sklearn.model_selection import train_test_split
from sklearn.neural_network import BernoulliRBM
from sklearn.pipeline import Pipeline
from sklearn import linear_model, datasets, metrics
from sklearn.decomposition import PCA

import keras
from keras.models import Sequential
from keras.layers import Dense

Using TensorFlow backend.


In [6]:
def nudge_dataset(X, Y):
    """
    This produces a dataset 5 times bigger than the original one,
    by moving the 8x8 images in X around by 1px to left, right, down, up
    """
    direction_vectors = [
        [[0, 1, 0],
         [0, 0, 0],
         [0, 0, 0]],

        [[0, 0, 0],
         [1, 0, 0],
         [0, 0, 0]],

        [[0, 0, 0],
          [0, 0, 1],
          [0, 0, 0]],

        [[0, 0, 0],
         [0, 0, 0],
         [0, 1, 0]]
      ]

    shift = lambda x, w: convolve(x.reshape((8, 8)), mode='constant',
                                  weights=w).ravel()
    X = np.concatenate([X] +
                       [np.apply_along_axis(shift, 1, X, vector)
                        for vector in direction_vectors])
    Y = np.concatenate([Y for _ in range(5)], axis=0)
    
    return X, Y


In [7]:
# Load Data
data = pd.read_csv("data/train.csv")
test = pd.read_csv('data/test.csv')

X  = data.drop('label', axis=1)
target = data['label']

#PCA
n_components = 64
pca = PCA(n_components=n_components, svd_solver='randomized', whiten=True).fit(X)

X_pca = pca.transform(X)
test_pca = pca.transform(test)

#OneHotEncoding
target = pd.get_dummies(target)

In [8]:
#BernoulliRBM
X, Y = nudge_dataset(X_pca, target)

#0-1 scaling
X = (X - np.min(X, 0)) / (np.max(X, 0) + 0.0001)  
test = (test_pca - np.min(test_pca, 0)) / (np.max(test_pca, 0) + 0.0001)

#split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0, random_state=0)

In [9]:
#Initializing Neural Network
classifier = Sequential()
classifier.add(Dense(units = 128, kernel_initializer = 'uniform', activation = 'tanh', input_dim = 64))
classifier.add(Dense(units = 64, kernel_initializer = 'uniform', activation = 'tanh'))
classifier.add(Dense(units = 32, kernel_initializer = 'uniform', activation = 'tanh'))
classifier.add(Dense(units = 10, kernel_initializer = 'uniform', activation = 'softmax'))


In [41]:
# Compiling Neural Network
classifier.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
classifier.fit(X_train, Y_train, batch_size = 150, epochs = 30) 

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x2940fda0>

In [50]:
# Predicting the Test set
y_pred = classifier.predict(test)
y_pred = np.argmax(y_pred, axis=1)
y_pred = y_pred.tolist()

In [51]:
#Edit submission file
li = [i+1 for i in range(len(test))]

submission = pd.DataFrame({
        "ImageId": li,
        "Label": y_pred
     })

submission.to_csv('submission.csv', index=False)