In [None]:
from IPython.core.debugger import set_trace
import scipy.io
from scipy.special import expit #sigmoid function
from scipy.stats import logistic #for sigmoid gradient
from scipy.optimize import minimize
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
alphabet = sorted(['alpha_lc', 'alpha_uc',
               'beta_lc', 'beta_uc',
               'gamma_lc', 'gamma_uc',
               'delta_lc', 'delta_uc',
               'epsilon_lc', 'epsilon_uc',
               'zeta_lc', 'zeta_uc',
               'eta_lc', 'eta_uc',
               'theta_lc', 'theta_uc',
               'iota_lc', 'iota_uc',
               'kappa_lc', 'kappa_uc',
               'lambda_lc', 'lambda_uc',
               'mu_lc', 'mu_uc',
               'nu_lc', 'nu_uc',
               'xi_lc', 'xi_uc',
               'omicron_lc', 'omicron_uc',
               'pi_lc', 'pi_uc',
               'rho_lc', 'rho_uc',
               'sigma_lc', 'sigma_uc',
               'tau_lc', 'tau_uc',
               'upsilon_lc', 'upsilon_uc',
               'phi_lc', 'phi_uc',
               'chi_lc', 'chi_uc',
               'psi_lc', 'psi_uc',
               'omega_lc', 'omega_uc'
              ])

In [None]:
class NeuralNet():
    def __init__(self, n_input, n_output, n_hidden=25, lambda_=0, epsilon=0.10, random_state=None, max_iter=1000):
        self.n_input = n_input
        self.n_hidden = n_hidden
        self.n_output = n_output
        self.lambda_ = lambda_
        self.epsilon = epsilon
        np.random.seed(random_state)
        self.max_iter = max_iter
        self.theta1, self.theta2 = self._initialize_weights()

    def _unroll_weights(self, theta1, theta2):
        """Unroll theta weights
        (used for passing one weight name to the minimization function)
        """
        theta1 = theta1.reshape(theta1.size,order='F')
        theta2 = theta2.reshape(theta2.size,order='F')
        weights = np.concatenate((theta1, theta2), axis=0)

        return weights

    def _reshape_weights(self, weights):
        """Reshape weights to theta1, theta2 names (opposite of _unroll_weights())
        """

        theta1 = np.reshape(weights[0:(self.n_hidden)*(self.n_input+1)], (self.n_hidden, self.n_input+1), order="F")
        theta2 = np.reshape(weights[(self.n_hidden)*(self.n_input+1):], (self.n_output, self.n_hidden+1), order="F")
        
        return theta1, theta2
        
    def _initialize_weights(self):
        """Initiliaze weights for theta1 and theta2 by a random number in range [-epsilon,epsilon]
        theta1 is of size hidden_layer x input_layer+1
        theta2 is of size output_layer x hidden_layer+1
        +1 is for the bias unit
        """
        theta1 = np.random.rand(self.n_hidden, self.n_input+1) * 2 * self.epsilon - self.epsilon
        theta2 = np.random.rand(self.n_output, self.n_hidden+1) * 2 * self.epsilon - self.epsilon

        return theta1, theta2
    
    def _propagate_forward(self, X, theta1, theta2):
        """Perform feed forward process of neural network"""
        m = X.shape[0]
        
        a1 = np.concatenate((np.ones((m,1)), X), axis=1)
        z2 = a1.dot(theta1.T)
        a2 = expit(z2)
        a2 = np.concatenate((np.ones((m,1)), a2), axis=1)
        z3 = a2.dot(theta2.T)
        a3 = expit(z3)
        
        return a1, z2, a2, a3, z3
    
    def _propagate_backward(self, weights, a1, z2, a2, a3, y):
        """Perform backpropagation"""
        m = y.shape[0]
        theta1, theta2 = self._reshape_weights(weights)

        d3 = a3 - y
        d2 = np.dot(d3,theta2[:,1:])
        d2 = np.multiply(d2,logistic._pdf(z2)) #logistic._pdf is the gradient of the sigmoid function

        Delta1 = np.dot(d2.T,a1)
        Delta2 = np.dot(d3.T,a2)

        # set first columns to 0 to not have an effect in regularization
        theta1[:,0] = 0
        theta2[:,1] = 0

        theta1_grad = 1/m * (Delta1 + (self.lambda_*theta1))
        theta2_grad = 1/m * (Delta2 + (self.lambda_*theta2))

        grad = self._unroll_weights(theta1_grad, theta2_grad)

        return grad
    
    def _compute_cost(self, weights, X, y):
        """Return cost and gradient for X, y
        
        Performs forward propagation, computes the unregularized cost and executes backward propagation to get gradients
        """
        m = X.shape[0]
        J = 0

        theta1, theta2 = self._reshape_weights(weights)

        a1, z2, a2, a3, z3 = self._propagate_forward(X, theta1, theta2)

        J_unreg = 1/m * (np.sum((np.multiply(-y,np.log(a3)) - (np.multiply((1-y),np.log(1-a3))))))
        J = J_unreg + (self.lambda_/(2*m) * ((np.sum(np.square(theta1[:,1:]))) + (np.sum(np.square(theta2[:,1:])))))

        grad = self._propagate_backward(weights, a1, z2, a2, a3, y)
        
        return (J, grad)
    
    def fit(self, X, y):
        weights = self._unroll_weights(self.theta1, self.theta2)

        result = scipy.optimize.minimize(fun=self._compute_cost, x0=weights, args=(X, y), method='CG', jac=True, options={'maxiter': self.max_iter, 'disp': True})        
        
        self.theta1, self.theta2 = self._reshape_weights(result.x)
        
        print('Cost in final computation: %f' % result.fun)
        
        return self

    def predict(self, X):
        """Return index of predicted class (highest probability) and array of prediction scores for all classes"""
        a1, z2, a2, a3, z3 = self._propagate_forward(X, self.theta1, self.theta2)
        p = a3.argmax(axis=1)

        return p, a3
    
    def save(self):
        """Save learned weights to make them available for the web app"""
        np.save('theta1.npy', self.theta1)
        np.save('theta2.npy', self.theta2)

In [None]:
output_layer_size = 48 #24 in lowercase and 24 in uppercase

# load drawings
X = np.load('pixel_data.npy')
y_data = np.load('label_data.npy')
y = np.eye(output_layer_size)[y_data[:]] # make mxn matrix (for each row value in y, select the corresponding row of the eye matrix)

neural_net = NeuralNet(n_input = 400,
                       n_output = output_layer_size,
                       n_hidden = 45,
                       lambda_ = 2,
                       max_iter = 3000,
                       epsilon = 0.15,
                       random_state = 9
                      )
neural_net.fit(X, y)
neural_net.save() # write out learned weights for the web app to use for predictions

In [None]:
predictions, scores = neural_net.predict(X)

In [None]:
m = X.shape[0]
print('Training accuracy: ', sum(predictions==y_data)*100 / m, "%")

In [None]:
# choose a random letter, display and show predictions

index = np.random.randint(0,X.shape[0],1)
x = X[index]
x_predict = neural_net.predict(x)[0]
w, h = 20, 20
image = x.reshape(w,h)

x_predict_label = alphabet[int(x_predict)]
x_actual_label = alphabet[y[index].argmax()]

print('Predicted %s, labelled as %s' % (x_predict_label, x_actual_label))
plt.imshow(image,cmap='gray_r')

In [None]:
# hidden layer visualization
# enable for debugging
"""
fig = plt.figure(figsize=(10, 10), dpi=200)
for i,e in enumerate(neural_net.theta1):
    a=fig.add_subplot(5,5,i+1)
    image = e[1:].reshape(w,h)
    plt.imshow(image, cmap='gray_r')
plt.show
"""

In [None]:
# choose a random misclassification, display and show predictions

failures = np.asarray(np.where(predictions!=y_data)[0])
index = np.random.randint(0,failures.shape[0],1)
index = failures[index]
x = X[index]
image = x.reshape(w,h)
x_predict, x_predict_scores = neural_net.predict(x)
x_predict_label = alphabet[int(x_predict)]
x_actual_label = alphabet[y[index].argmax()]

print('Predicted %s, labelled as %s\n' % (x_predict_label, x_actual_label))

results = []
for i, score in enumerate(x_predict_scores.T):
    results.append([alphabet[i], score])

# show predictions, confidence value descending
for result in sorted(results,key=lambda x: float(x[1]), reverse=True):
    print(result[0], '_______', result[1][0])

plt.imshow(image,cmap='gray_r')