In [4]:
import numpy as np
import os
import gzip
from scipy import sparse
from sklearn.model_selection import train_test_split
import pandas as pd

In [5]:
def load_data(path):
    train = pd.read_csv(path)
    
    train, val = train_test_split(train, test_size = 5/42)
    
    X_train, Y_train = train.iloc[:,1:785], train.iloc[:,0]
    X_val, Y_val = val.iloc[:,1:785], val.iloc[:,0]
    
    return X_train, Y_train, X_val, Y_val

In [1]:
def softmax(V):
    e_V = np.exp(V - np.max(V, axis = 0, keepdims = True))
    Z = e_V / e_V.sum(axis = 0)
    return Z

## One-hot coding
def convert_labels(y, C = 10):
    Y = sparse.coo_matrix((np.ones_like(y), (y, np.arange(len(y)))), shape = (C, len(y))).toarray()
    return Y

def loss(Y, Yhat):
    return -np.sum(Y*np.log(Yhat))/Y.shape[1]

def ReLU(X):
    Z = np.maximum(X, 0)
    return Z

In [2]:
def train_nn(X_train, Y_train):
    d0 = 784
    d1 = 784
    d2 = 10
    learning_rate = 0.1
    n_train = 37000
    n_val = 5000
    #num_batch = 16
    epoch = 20

    # initialize parameters randomly
    w1 = np.random.randn(d0, d1)
    #w1 = np.zeros((d0, d1))
    b1 = np.zeros((d1, 1))
    w2 = np.random.randn(d1, d2)
    #w2 = np.zeros((d1, d2))
    b2 = np.zeros((d2, 1))

    #feed forward
    for k in range(epoch):
        h1 = np.dot(np.transpose(w1), X_train) + b1
        H1 = ReLU(h1)
        h2 = np.dot(np.transpose(w2), H1) + b2
        H2 = softmax(h2)
            
        E2 = H2 - Y_train
        dw2 = np.dot(H1, E2.T)
        db2 = np.sum(E2, axis = 1, keepdims = True)
        E1 = np.dot(w2, E2)
        E1[h1 <= 0] = 0 # gradient of ReLU
        dw1 = np.dot(X_train, E1.T)
        db1 = np.sum(E1, axis = 1, keepdims = True)
        
        #update
        w1 = w1 - learning_rate * dw1
        w2 = w2 - learning_rate * dw2
        b1 = b1 - learning_rate * db1
        b2 = b2 - learning_rate * db2
        
        print("accuracy training data epoch: ", accuracy_val(X_train, Y_train0, w1, b1, w2, b2))
# mini-batch
#    for k in range(epoch):
#        i = 0
#        rand_idx = np.random.permutation(n_train).reshape(1, n_train)
#        while i < n_train:
            
            
#            h1 = np.dot(np.transpose(w1), X_train[:, rand_idx[0, i:i+num_batch]]) + b1
#            H1 = ReLU(h1)
#            h2 = np.dot(np.transpose(w2), H1) + b2
#            H2 = softmax(h2)
        
#            E2 = (H2 - Y_train[:, rand_idx[0, i:i+num_batch]])/num_batch
#            dw2 = np.dot(H1, E2.T)
#            db2 = np.sum(E2, axis = 1, keepdims = True)
#            E1 = np.dot(w2, E2)
#            E1[h1 <= 0] = 0 # gradient of ReLU
#            dw1 = np.dot(X_train[:, rand_idx[0, i:i+num_batch]], E1.T)
#            db1 = np.sum(E1, axis = 1, keepdims = True)
        
            #update
 #           w1 = w1 - learning_rate * dw1
 #           w2 = w2 - learning_rate * dw2
 #           b1 = b1 - learning_rate * db1
 #           b2 = b2 - learning_rate * db2
                
 #           i = i + num_batch
        
 #       print("accuracy training data epoch: ", accuracy_val(X_train, Y_train0, w1, b1, w2, b2))
        
    
    return w1, b1, w2, b2

In [5]:
def accuracy_val(X_val, Y_val, w1, b1, w2, b2):
    h1 = np.dot(np.transpose(w1), X_val) + b1
    H1 = ReLU(h1)
    h2 = np.dot(np.transpose(w2), H1) + b2
    H2 = softmax(h2)
    predicted_y = np.argmax(H2, axis=0)
   
    accuracy = np.mean(predicted_y == Y_val)
    return accuracy

In [6]:
# Read training data
X_train, Y_train, X_val, Y_val = load_data('/home/datphan/NN/data/train.csv')

In [7]:
Y_train0 = Y_train.to_numpy().reshape(1, 37000)
Y_train = convert_labels(Y_train)
X_train = X_train.to_numpy().reshape(784, 37000)
Y_train = Y_train.reshape(10, 37000)
X_val = X_val.to_numpy().reshape(784, 5000)
Y_val = Y_val.to_numpy().reshape(1, 5000)

print(X_train.shape, Y_train.shape, Y_train0.shape, X_val.shape, Y_val.shape)

(784, 37000) (10, 37000) (1, 37000) (784, 5000) (1, 5000)


In [8]:
w1, b1, w2, b2 = train_nn(X_train, Y_train)
print("accuracy validation: ", 100 * accuracy_val(X_val, Y_val, w1, b1, w2, b2))

accuracy training data epoch:  0.10305405405405406
accuracy training data epoch:  0.0997027027027027
accuracy training data epoch:  0.10513513513513513
accuracy training data epoch:  0.08894594594594595
accuracy training data epoch:  0.09756756756756757
accuracy training data epoch:  0.09889189189189189
accuracy training data epoch:  0.09908108108108109
accuracy training data epoch:  0.09862162162162162
accuracy training data epoch:  0.09754054054054054
accuracy training data epoch:  0.11145945945945945
accuracy training data epoch:  0.10305405405405406
accuracy training data epoch:  0.0997027027027027
accuracy training data epoch:  0.10513513513513513
accuracy training data epoch:  0.08894594594594595
accuracy training data epoch:  0.09756756756756757
accuracy training data epoch:  0.09889189189189189
accuracy training data epoch:  0.09908108108108109
accuracy training data epoch:  0.09862162162162162
accuracy training data epoch:  0.09754054054054054
accuracy training data epoch:  0.

In [12]:
# Read testing_data
test = pd.read_csv("/home/datphan/NN/data/test.csv")
test = test.to_numpy().reshape(784, 28000)
print(test.shape)

(784, 28000)


In [None]:
h1 = np.dot(np.transpose(w1), test) + b1
H1 = ReLU(h1)
h2 = np.dot(np.transpose(w2), H1) + b2
H2 = softmax(h2)
predicted_y = np.argmax(H2, axis=0)

results = pd.Series(predicted_y,name="Label")
submission = pd.concat([pd.Series(range(1,28001),name = "ImageId"),results],axis = 1)
submission.to_csv("predict3.csv",index=False)