In [None]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize

In [None]:
data = pd.read_csv('data/train.csv')
data.head()

In [None]:
import scipy.io

mat_data = scipy.io.loadmat('data/ex4data1.mat')
mat_weights = scipy.io.loadmat('data/ex4weights.mat')

X_course = mat_data['X']
y_course = mat_data['y']
weights_course = np.concatenate([mat_weights['Theta1'].flatten(),
                                mat_weights['Theta2'].flatten()],
                                axis = 0)

In [None]:
X = data.iloc[:5000,1:]
y = data.iloc[:5000,0]

In [None]:
X_test = data.iloc[5002:15052, 1:]
y_test = data.iloc[5002:15052, 0]

In [None]:
class NeuralNetwork():
    def __init__(self, X_data, y_data, layers_size):
        self.X = np.array(X_data, dtype = 'float32')
        self.y = np.array([(i == y)*1 for i in model.labels]).T
        self.labels = np.unique(y)
        self.lsize = layers_size
        self.net_input = None
        self.net_output = None
        self.net_weights_trained = None
        
    def AddBias(self, x, value):
        if x.ndim > 1:
            return np.insert(x, 0, value, axis = 1)
        return np.insert(x, 0, value, axis = 0)
    
    def Sigmoid(self, z):
        return 1/(1 + np.exp(-z))
    
    def SigmoidGrad(self, z):
        return self.Sigmoid(z) * (1 - self.Sigmoid(z))
    
    def RandomWeights(self):
        net_weights = []
        for i in range(len(self.lsize)):
            if self.lsize[i] != self.lsize[-1]:
                eps = np.sqrt(6)/np.sqrt(self.lsize[i]\
                                         + self.lsize[i+1])
                weights = np.random.rand(self.lsize[i] + 1, 
                                        self.lsize[i+1]) * 2 * eps - eps
                net_weights.append(weights)
        return np.concatenate([i.flatten() for i in net_weights],
                                        axis = 0)
    
    def Weights(self, net_weights, layer):
        left = 0
        right = 0
        for i in range(layer):
            right += (self.lsize[i] + 1) * self.lsize[i+1]
            if i == layer-1:
                return (net_weights[left:right].reshape(self.lsize[i+1],
                                                       self.lsize[i] + 1)).T
            left += (self.lsize[i] + 1) * self.lsize[i+1]
    
    def ForwardPropagation(self, net_weights, X):  
        
        net_in = []                                                     
        net_out = []   
        
        N = len(self.lsize)
        for layer in range(N):
            if layer == 0:
                net_out.append(self.AddBias(X, 1))
            else:
                weights = self.Weights(net_weights, layer)
                if layer == N - 1:
                    z = np.matmul(net_out[-1], weights)
                    net_out.append(self.Sigmoid(z))
                else:
                    z = np.matmul(net_out[-1], weights)
                    net_in.append(z)
                    net_out.append(self.AddBias(self.Sigmoid(z), 1))
                    
        self.net_input = net_in
        self.net_output = net_out
    
    def CostFunction(self, net_weights, X, y, lmbda):
        
        self.ForwardPropagation(net_weights, X)
        
        m = X.shape[0] 
        
        nonreg_term = np.sum(-y * np.log(self.net_output[-1]) - (1 - y)\
                             * np.log(1 - self.net_output[-1]))/m
        weights_sq = np.sum(self.Weights(net_weights, layer)[:,1:]**2)
                                   for layer in range(1, len(self.lsize))
        reg_term = lmbda * np.sum(weights_sq)/(2*m)
        return nonreg_term + reg_term
    
    def BackwardPropagation(self, net_weights, X, y, lmbda):
        net_delta = []
        net_grad = []
        
        m = X.shape[0] 
        
        N = len(self.lsize)
        for layer in range(N-1, 0, -1):
            if layer == N - 1:
                delta = self.net_output[layer] - y
            elif layer != 0:
                weights = self.Weights(net_weights, layer+1)
                layer_input = self.net_input[layer+1]
                grad = self.SigmoidGrad(layer_input)
                delta = np.matmul(delta, weights.T[:,1:]) * grad
            net_delta.append(delta)
            
            reg_weights = self.AddBias(self.Weights(net_weights, 
                                                    layer)[:,1:], 0)
            layer_output = self.net_output[layer-1]
            reg_term = lmbda/m * reg_weights
            net_grad.insert(0, np.matmul(delta.T, layer_output))
        
        return np.concatenate([i.flatten() for i in net_grad], axis = 0)
        
        
#     def Fit(self, lmbda):
#         random_weights = self.RandomWeights()
#         random_weights = np.concatenate([i.flatten() for i in random_weights],
#                                         axis = 0)
#         args_ = (self.X, self.y, lmbda)
#         res = minimize(fun = self.CostFunction, args = args_, 
#                        x0=random_weights, method = 'TNC',
#                        jac = self.BackwardPropagation)
        
#         self.net_weights_trained = res.x
#         print(res)
        
#     def Predict(self, x, y):
#         x = np.array(x, dtype = 'float32')
#         self.ForwardPropagation(self.net_weights_trained, x)
#         pred_mat = self.net_output[-1]
#         ids_of_max = np.argmax(pred_mat, axis = 1)
#         pred = np.array([self.labels[i] for i in ids_of_max]) 
#         print(pred)     