In [197]:
# First Kaggle project using FFN: Digit Recognizer

# href: https://www.kaggle.com/c/digit-recognizer

import pandas as pd
import keras.callbacks as cb
from keras.regularizers import l1, l2
from keras.layers.core import Activation, Dense, Dropout

from keras.models import Sequential
from keras.optimizers import SGD
from keras.utils import np_utils

import numpy as np
import time

In [200]:
def PreprocessDataset():
    # The competition datafiles are in the directory ../input
    # Read competition data files:
    train = pd.read_csv("./input/train.csv")
    test  = pd.read_csv("./input/test.csv")

    # Write to the log:
    print("Training set has {0[0]} rows and {0[1]} columns".format(train.shape))
    print("Test set has {0[0]} rows and {0[1]} columns".format(test.shape))
    # Any files you write to the current directory get shown as outputs
    
    if type(train) is pd.core.frame.DataFrame:
        train = train.as_matrix()


    if type(test) is pd.core.frame.DataFrame:
        test = test.as_matrix()

    x_train = train[:,1:]
    y_train = train[:,:1]
    y_label = y_train

    x_test = test
    
    ## Transform labels to one-hot encoding
    ## i.e., from '7' to [0,0,0,0,0,0,0,1,0,0]
    y_train = np_utils.to_categorical(y_train, 10)
    
    ## Process features. Set numeric type
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    
    # Activity 1 (Pre-processing):
    # Group A: w/o pre-processing datasets.
    #
    # Group B: Min-Max Normalize value to [0, 1]
    # x_train /= 255
    # x_test /= 255
    #
    # Group C: proceed w/ standardizing datasets by z-scoring (de-mean, uni-variance).
    # x_train = preprocessing.scale(x_train)
    # x_test = preprocessing.scale(x_test)
    ################################################################  
    ## YOUR TURN: CHANGE HERE
    x_train /= 255
    x_test /= 255
    return x_train, y_train, y_label, x_test 

In [201]:
x_train, y_train, y_label, x_test = PreprocessDataset()
print("x_train type: " + str(x_train.shape))
print("x_test type: " + str(x_test.shape))
print("y_train type: " + str(y_train.shape))

Training set has 42000 rows and 785 columns
Test set has 28000 rows and 784 columns
x_train type: (42000, 784)
x_test type: (28000, 784)
y_train type: (42000, 10)


In [202]:
## Show part of training data: features and labels
## Each row is a sample, and each column represents a feature.
print("{:^43}".format("x"), "|", "{:^4}".format("y"))
print("="*50)
for sample_id in range(10):
    print("{:.2f} {:.2f} ... {:.2f} {:.2f} {:.2f} ...  {:.2f} {:.2f}".format(
            x_train[sample_id][0], x_train[sample_id][1],
            x_train[sample_id][156], x_train[sample_id][157], x_train[sample_id][158],
            x_train[sample_id][-2], x_train[sample_id][-1]), "| ",
           "{:.0f}".format(y_train[sample_id][0]))

                     x                      |  y  
0.00 0.00 ... 0.00 0.00 0.00 ...  0.00 0.00 |  0
0.00 0.00 ... 0.96 0.59 0.13 ...  0.00 0.00 |  1
0.00 0.00 ... 0.00 0.00 0.00 ...  0.00 0.00 |  0
0.00 0.00 ... 0.00 0.04 0.30 ...  0.00 0.00 |  0
0.00 0.00 ... 0.99 0.99 0.45 ...  0.00 0.00 |  1
0.00 0.00 ... 0.98 0.96 0.28 ...  0.00 0.00 |  1
0.00 0.00 ... 0.00 0.00 0.00 ...  0.00 0.00 |  0
0.00 0.00 ... 0.00 0.00 0.00 ...  0.00 0.00 |  0
0.00 0.00 ... 0.70 1.00 1.00 ...  0.00 0.00 |  0
0.00 0.00 ... 0.99 0.49 0.00 ...  0.00 0.00 |  0


In [203]:
def DefineModel():

    ################################################################
    # Activity 2 (Network Structure):
    # Group A: uses only 1 layer
    # second_layer_width = 0
    #
    # Group B: uses 2 layers of a tower-shaped (same width) network.
    # second_layer_width = 128
    #
    # Group C: uses 2 layers of a pyramid-shaped (shrink width) network.
    # second_layer_width = 64
    ################################################################
    first_layer_width = 128
    second_layer_width = 64   
    
    ################################################################
    # Activity 3 (Activation Function):
    # Group A uses ReLU.
    # activation_func = 'relu' 
    # 
    # Group B uses Sigmoid.
    # activation_func = 'sigmoid'
    #
    # Group C uses Tanh.
    # activation_func = 'tanh'
    ################################################################
    activation_func = 'relu' 

    ################################################################    
    # Activity 4 (Loss Function):
    # Group A uses cross entropy.
    # loss_function = 'categorical_crossentropy'
    # 
    # Group B uses cross entropy.
    # loss_function = 'categorical_crossentropy'
    # 
    # Group C uses squared error.
    # loss_function = 'mean_squared_error'
    ################################################################    
    loss_function = 'categorical_crossentropy'
    
    #################################################################    
    # Activity 5 (Dropout):
    # Group A uses 0% dropout.
    #
    # Group B uses 50% dropout.
    # dropout_rate = 0.5
    #
    # Group C uses 90% dropout.
    # dropout_rate = 0.9
    #################################################################    
    dropout_rate = 0
    
    ################################################################    
    # Activity 6 (Regularization):
    # Group A uses L1 regularizer
    # weight_regularizer = l1(0.01)
    #
    # Group B uses L2 regularizer
    # weight_regularizer = l2(0.01)
    # 
    # Group C uses no regularizer
    # weight_regularizer = None
    ################################################################
    weight_regularizer = l2(0.01)

    ################################################################    
    # Activity 8 (Learning Rate):
    # Group A uses learning rate of 0.1.
    # learning_rate = 0.1
    # 
    # Group B uses learning rate of 0.01.
    # learning_rate = 0.01
    #
    # Group C uses learning rate of 0.5.    
    # learning_rate = 0.5
    ################################################################
    learning_rate = 0.1
    
    ## Initialize model.
    model = Sequential()

    ## First hidden layer with 'first_layer_width' neurons. 
    ## Also need to specify input dimension.
    ## 'Dense' means fully-connected.
    model.add(Dense(first_layer_width, input_dim=784, W_regularizer=weight_regularizer))
    model.add(Activation(activation_func))
    if dropout_rate > 0:
        model.add(Dropout(0.5))

    ## Second hidden layer.
    if second_layer_width > 0:
        model.add(Dense(second_layer_width))
        model.add(Activation(activation_func))
        if dropout_rate > 0:
            model.add(Dropout(0.5))         
    
    ## Last layer has the same dimension as the number of classes
    ## For classification, the activation is softmax
    model.add(Dense(10, activation='softmax'))
    ## Define optimizer. In this tutorial/codelab, we select SGD.
    ## You can also use other methods, e.g., opt = RMSprop()
    opt = SGD(lr=learning_rate, clipnorm=5.)
    ## Define loss function = 'categorical_crossentropy' or 'mean_squared_error'
    model.compile(loss=loss_function, optimizer=opt, metrics=["accuracy"])

    return model

In [204]:
def TrainModel(data=None, epochs=20):
    ################################################################
    # Activity 7 (Mini-batch):
    # Group A uses mini-batch of size 128.
    # batch = 128
    #
    # Group B uses mini-batch of size 256.
    # batch = 256
    # 
    # Group C uses mini-batch of size 512.
    # batch = 512
    ################################################################
    batch=128
    start_time = time.time()
    model = DefineModel()
    if data is None:
        print("Must provide data.")
        return
    x_train, x_test, y_train, y_test = data
    print('Start training.')
    ## Use the first 55,000 (out of 60,000) samples to train, last 5,500 samples to validate.
    history = model.fit(x_train[:55000], y_train[:55000], nb_epoch=epochs, batch_size=batch,
              validation_data=(x_train[55000:], y_train[55000:]))
    print("Training took {0} seconds.".format(time.time() - start_time))
    return model, history

In [205]:
trained_model, training_history = TrainModel(data=[x_train, x_test, y_train, y_test])

Start training.
Train on 42000 samples, validate on 0 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Training took 40.62459897994995 seconds.


In [206]:
def TestModel(model=None, x_test=None):
    if model is None:
        print("Must provide a trained model.")
        return
    if x_test is None:
        print("Must provide data.")
        return
    scores = model.predict(x_test)
    return scores

In [207]:
test_score = TestModel(model=trained_model, x_test=x_test)
train_score = TestModel(model=trained_model, x_test=x_train)


In [208]:
labels = np.array(list(map(lambda item: np.argmax(item), test_score)))
labels_trains = np.array(list(map(lambda item: np.argmax(item), train_score)))
labels_y_train = np.array(list(map(lambda item: np.argmax(item), y_train)))

In [217]:
print(labels[:10])
print('Predicted label: ' + str(labels_trains[:20]))
print('1hotencoding la: ' + str(labels_y_train[:20]))
print('Actual label   : ' + str(y_label[:20,0]))

[2 0 8 2 3 2 0 3 0 3]
Predicted label: [1 0 1 6 0 2 7 3 5 3 8 8 1 3 3 1 2 0 7 5]
1hotencoding la: [1 0 1 4 0 0 7 3 5 3 8 9 1 3 3 1 2 0 7 5]
Actual label   : [1 0 1 4 0 0 7 3 5 3 8 9 1 3 3 1 2 0 7 5]


In [218]:
output=np.array([np.array(list(range(1, len(labels)+1))), labels])

In [219]:
output=np.transpose(output)

In [220]:
output.shape
output[:10,:]

array([[ 1,  2],
       [ 2,  0],
       [ 3,  8],
       [ 4,  2],
       [ 5,  3],
       [ 6,  2],
       [ 7,  0],
       [ 8,  3],
       [ 9,  0],
       [10,  3]])

In [221]:
table = pd.DataFrame(data=output[:,1], index=output[:,0], columns=['Label']) 

In [222]:
# Save to csv
table.index.name='ImageId'
table.to_csv('out.csv')

In [223]:

table

Unnamed: 0_level_0,Label
ImageId,Unnamed: 1_level_1
1,2
2,0
3,8
4,2
5,3
6,2
7,0
8,3
9,0
10,3
