In [1]:
import numpy as np
import pandas as pd
import random

In [2]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
predicted_results = pd.read_csv('sample_submission.csv')

In [3]:
train_X = train.drop('label', axis=1)/255
train_y = train['label']

samples = test/255

samples.head()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(train_X, train_y, test_size=.20, random_state=1)
print("number of test samples :", x_test.shape[0])
print("number of training samples:",x_train.shape[0])

number of test samples : 8400
number of training samples: 33600


In [5]:
def vectorized_result(j):
    e = np.zeros((10, 1))
    e[j] = 1.0
    return e

In [6]:
training_inputs = []   
test_inputs = []
sample_inputs = []

for row in x_train.iterrows():
    training_inputs.append((row[1].values.reshape((784,1))))
                        
training_results = [vectorized_result(y) for y in y_train]
training_data = list(zip(training_inputs, training_results))
    
for row in x_test.iterrows():
    test_inputs.append((row[1].values.reshape((784,1)))) 

test_data = list(zip(test_inputs, y_test))

for row in samples.iterrows():
    sample_inputs.append((row[1].values.reshape((784,1))))

In [7]:
class Network(object):
    def __init__(self,sizes):
        self.num_layers = len(sizes) 
        self.sizes = sizes
        self.biases = [np.random.randn(y,1) for y in sizes[1:]] 
        self.weights = [np.random.randn(y,x) for x,y in zip(sizes[:-1], sizes[1:])]

    def sigmoid(z):
        return 1.0/(1.0+np.exp(-z))

    def feedforward(self, a):
        for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a)+b) 
        return a

    def SGD(self, training_data, epochs, mini_batch_size, eta, test_data=None):
        if test_data:
            n_test = sum(1 for _ in test_data) 
        n = sum(1 for _ in training_data)  
        for j in range(epochs):  
            random.shuffle(training_data)
            mini_batches = [training_data[k:k+mini_batch_size] for k in range(0, n, mini_batch_size)] 
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, eta) 
            if test_data:
                print ("Epoch {}: {} / {} --> {:2f}%".format(j, self.evaluate(test_data), n_test,(100*self.evaluate(test_data)/n_test)))
            else:
                print ("Epoch {} complete".format(j))

    def update_mini_batch(self, mini_batch, eta):
        nabla_b = [np.zeros(b.shape) for b in self.biases]  
        nabla_w = [np.zeros(w.shape) for w in self.weights] 
        for x, y in mini_batch: 
            delta_nabla_b, delta_nabla_w = self.backprop(x, y)
            nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
        self.weights = [w-(eta/len(mini_batch))*nw for w, nw in zip(self.weights, nabla_w)] 
        self.biases = [b-(eta/len(mini_batch))*nb for b, nb in zip(self.biases, nabla_b)] 

    def backprop(self, x, y):
        nabla_b = [np.zeros(b.shape) for b in self.biases] 
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        activation = x
        activations = [x]
        zs = [] 
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation)+b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)
        delta = (activations[-1]-y) * sigmoid_prime(zs[-1])
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
  
        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
        return (nabla_b, nabla_w)

    def evaluate(self, test_data):
        test_results = [(np.argmax(self.feedforward(x)), y) for (x, y) in test_data] 
        return sum(int(x == y) for (x, y) in test_results)
    
    def predict(self, samples):
        predicts = [(np.argmax(self.feedforward(x))) for x in samples] 
        return predicts
    
def sigmoid(z): 
        return 1.0/(1.0+np.exp(-z))
    
def sigmoid_prime(z):
    return sigmoid(z)*(1-sigmoid(z))

In [8]:
mindbrain = Network([784,64,64,10])

In [9]:
mindbrain.SGD(training_data, 30, 7, 1.2, test_data=test_data)

Epoch 0: 6772 / 8400 --> 80.619048%
Epoch 1: 6983 / 8400 --> 83.130952%
Epoch 2: 7069 / 8400 --> 84.154762%
Epoch 3: 7108 / 8400 --> 84.619048%
Epoch 4: 7150 / 8400 --> 85.119048%
Epoch 5: 7867 / 8400 --> 93.654762%
Epoch 6: 7850 / 8400 --> 93.452381%
Epoch 7: 7891 / 8400 --> 93.940476%
Epoch 8: 7925 / 8400 --> 94.345238%
Epoch 9: 7923 / 8400 --> 94.321429%
Epoch 10: 7947 / 8400 --> 94.607143%
Epoch 11: 7978 / 8400 --> 94.976190%
Epoch 12: 7964 / 8400 --> 94.809524%
Epoch 13: 7968 / 8400 --> 94.857143%
Epoch 14: 7979 / 8400 --> 94.988095%
Epoch 15: 7989 / 8400 --> 95.107143%
Epoch 16: 7973 / 8400 --> 94.916667%
Epoch 17: 7986 / 8400 --> 95.071429%
Epoch 18: 7972 / 8400 --> 94.904762%
Epoch 19: 8009 / 8400 --> 95.345238%
Epoch 20: 7980 / 8400 --> 95.000000%
Epoch 21: 7996 / 8400 --> 95.190476%
Epoch 22: 8005 / 8400 --> 95.297619%
Epoch 23: 8010 / 8400 --> 95.357143%
Epoch 24: 8003 / 8400 --> 95.273810%
Epoch 25: 8018 / 8400 --> 95.452381%
Epoch 26: 8009 / 8400 --> 95.345238%
Epoch 27: 8

In [10]:
results = mindbrain.predict(sample_inputs)

In [11]:
print('Results, frequency per number: \n')
print('0:', results.count(0))
print('1:', results.count(1))
print('2:', results.count(2))
print('3:', results.count(3))
print('4:', results.count(4))
print('5:', results.count(5))
print('6:', results.count(6))
print('7:', results.count(7))
print('8:', results.count(8))
print('9:', results.count(9))

Results, frequency per number: 

0: 2794
1: 3176
2: 2819
3: 2743
4: 2765
5: 2485
6: 2804
7: 2916
8: 2750
9: 2748


In [12]:
predicted_results.Label = results

In [13]:
predicted_results.head(20)

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,9
4,5,3
5,6,7
6,7,0
7,8,3
8,9,0
9,10,3


In [14]:
predicted_results.to_csv('submission.csv', index=False)