In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split

In [None]:
df_train=pd.read_csv('../input/train.csv')
df_test=pd.read_csv('../input/test.csv')
df_train_label=df_train['label']
df_train=df_train.drop(labels = ["label"],axis = 1)

In [None]:
# data set is balance
df_train_label.value_counts() 

In [None]:
plt.imshow(df_train.iloc[1].values.reshape(28,28), cmap='gray')
plt.title(df_train_label.iloc[1])

In [None]:
Y_train = pd.get_dummies(df_train_label)

In [None]:
X_train, X_val, Y_train, Y_val = train_test_split(df_train.values, df_train_label.values, test_size = 0.2, random_state=1)

**#MLP**

In [None]:
num_examples = X_train.shape[0]      ## training set size
nn_input_dim = X_train.shape[1]      ## input layer dimensionality
nn_output_dim = len(np.unique(df_train_label))       ## output layer dimensionality

In [None]:
nn_output_dim

In [None]:
params = {
    "lr":1e-5,        ## learning_rate
    "max_iter":500,
    "h_dimn":40,     ## hidden_layer_size
    "regL1":1,
    "regL2":1,
}

In [None]:
def build_model():
    hdim = params["h_dimn"]
    # Initialize the parameters to random values.
    np.random.seed(0)
    W1 = np.random.randn(nn_input_dim, hdim) / np.sqrt(nn_input_dim)
    b1 = np.zeros((1, hdim))
    W2 = np.random.randn(hdim, nn_output_dim) / np.sqrt(hdim)
    b2 = np.zeros((1, nn_output_dim))

    # This is what we return at the end
    model = { 'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
    return model

def softmax(x):
    exp_scores = np.exp(x)
    probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
    return probs

def feedforward(model, x):
    W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
    #print(x.shape,W1.shape)
    z1 = x.dot(W1) + b1
    a1 = np.tanh(z1)
    z2 = a1.dot(W2) + b2
    probs = softmax(z2)
    return a1, probs

def backpropagation(model, x, y, a1, probs):
    W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
    
    delta3 = probs
    delta3[range(y.shape[0]), y] -= 1 
    dW2 = (a1.T).dot(delta3)
    db2 = np.sum(delta3, axis=0, keepdims=True)
    delta2 = delta3.dot(W2.T) * (1 - np.power(a1, 2))
    dW1 = np.dot(x.T, delta2)
    db1 = np.sum(delta2, axis=0)
    return dW2, db2, dW1, db1

def calculate_loss(model, x, y):
    W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
    
    # Forward propagation to calculate predictions
    _, probs = feedforward(model, x)
    
    # Calculating the cross entropy loss
    corect_logprobs = -np.log(probs[range(y.shape[0]), y])
    data_loss = np.sum(corect_logprobs)
    #regularization
    data_loss += params["regL2"]/2 * (np.sum(np.square(W1)) + np.sum(np.square(W2)))
    data_loss += params["regL1"] * (np.linalg.norm(W1, ord=1) + np.linalg.norm(W2, ord=1))
    
    return 1./y.shape[0] * data_loss

def test(model, x, y):
    W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
    # Forward propagation to calculate predictions
    _, probs = feedforward(model, x)
    preds = np.argmax(probs, axis=1)
    return np.count_nonzero(y==preds)/y.shape[0]

def train(model, X_train, X_test, Y_train, Y_test, print_loss=True):
    # Gradient descent. For each batch...
    W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
    for i in range(0, params["max_iter"]):

        # Forward propagation
        a1, probs = feedforward(model, X_train)

        # Backpropagation
        dW2, db2, dW1, db1 = backpropagation(model, X_train, Y_train, a1, probs)

        # Gradient descent parameter update
        W1 += -params["lr"] * dW1
        b1 += -params["lr"] * db1
        W2 += -params["lr"] * dW2
        b2 += -params["lr"] * db2
        
        # Assign new parameters to the model
        model = { 'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
        if print_loss and i % 50 == 0:
            print("Loss after iteration %i: %f" %(i, calculate_loss(model, X_train, Y_train)),
                  ", Test accuracy:", test(model, X_test, Y_test), "\n")
    return model

In [None]:
model = build_model()

#X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.5)

model = train(model, X_train, X_val, Y_train, Y_val)

In [None]:
fil=open('sub.csv','w')
test=df_test.values
fil.write('ImageId,Label\n')
for i in range(len(test)-1):
    l,z=feedforward(model,test[i])
    z=softmax(z)
    p=np.argmax(z,axis=1)
    fil.write('%d,%d\n'%(i+1,p))
l,z=feedforward(model,test[-1])
z=softmax(z)
p=np.argmax(z,axis=1)
fil.write('28000,%d\n'%p)
fil.close()

In [None]:
ls

**#CNN**