# Problem 1. American Handwriting

# Part 1: Derivative

*f*(x)$_i$ = $\frac{e_i^x}{\sum_je_j^x}$ 

gradient = $\begin{cases} f(x)_i(1-f(x)_i) & j = i\\-f(x)_if(x)_j & j\ne i \end{cases}$

In [86]:
import math
import numpy as np
# help from: https://slowbreathing.github.io/articles/2019-05/softmax-and-its-gradient
def softmax(vector):
  ''' Takes a vector (np.array) and returns softmax matrix '''
  result = []
  for row in vector:
    vals = np.asarray(row)
    result.append(np.exp(vals)/float(sum(np.exp(vals))))

  return np.array(result)

def softmax_gradient(vector):
  ''' 
  Takes a softmax vector (np.array) and returns the softmax gradient matrix 
  '''
  diag = np.asarray(np.diag(vector))
  size = len(diag)
  matrix = np.empty((size, size), dtype=float)
  for i in range(len(matrix)):
    for j in range(len(matrix)):
      if i == j:
        matrix[i][j] = vector[i] * (1-vector[i])
      else:
        matrix[i][j] = -vector[i]*vector[j]
  return matrix

In [78]:
# Test run of softmax and gradient
x = np.array([[1,3,5,7]])
sm = softmax(x)
print(sm[0])
sm_grad = softmax_gradient(sm[0])
print()
print(sm_grad)

[0.00214401 0.0158422  0.11705891 0.86495488]
<class 'numpy.float64'>
[0.00214401 0.0158422  0.11705891 0.86495488]

[[ 2.13941201e-03 -3.39658185e-05 -2.50975338e-04 -1.85447085e-03]
 [-3.39658185e-05  1.55912258e-02 -1.85447085e-03 -1.37027892e-02]
 [-2.50975338e-04 -1.85447085e-03  1.03356124e-01 -1.01250678e-01]
 [-1.85447085e-03 -1.37027892e-02 -1.01250678e-01  1.16807938e-01]]


# Part 2: Simple


%%time gives the current time, use to determine which is faster
Python Engineer on YouTube has good from scratch vids

make sure keras input layer dimension should correspond with your scratch program

follow up on learning rate for keras in 3. keep it simple

In later hidden layers, could be smaller, eventually getting into the 10 node output layer

Below is the code for the neural network structure.

In [90]:
# help from: https://towardsdatascience.com/math-neural-network-from-scratch-in-python-d6da9f29ce65

class Layer:
  def __init__(self, activ_function, activ_prime_function, in_size, out_size):
    '''
    Takes as parameters:
      activ_function: activation function
      activ_prime_function: the gradient of activation function
      in_size: input size
      out_size: output size
    Opting for a more complex layer class than found in link to keep together.
    '''
    self.input = None
    self.input_adjusted = None # used for holding adjusted input
    self.output = None
    self.activ = activ_function
    self.activ_prime = activ_prime_function
    self.weights = np.random.rand(in_size, out_size) - 0.5
    self.bias = np.random.rand(1, out_size) - 0.5

  def forward_prop(self, input_data):
    '''
    Takes input_data as a parameter, sets self.output as:
      activation of dot product of input_data and weights + bias
    Returns self.output.
    '''
    self.input = input_data
    self.input_adjusted = np.dot(self.input, self.weights) + self.bias
    self.output = self.activ(self.input_adjusted)
    return self.output   

  def back_prop(self, out_err, learn_rate):
    '''
    Takes as parameters:
      out_err: a given output error
      learn_rate: a given learning rate
    Returns:
      input error from element-wise multiplication of matrices (hidden layer)
    '''
    adjusted_err = self.activ_prime(self.input_adjusted[0]) * out_err

    in_err = np.dot(adjusted_err, self.weights.T)
    print(self.weights.shape)
    weight_err = np.dot(self.input.T, adjusted_err)
    self.weights -= learn_rate * weight_err
    self.bias -= learn_rate * adjusted_err
    return in_err

class NeuralNet:
  def __init__(self):
    self.layers = []
    self.loss = None
    self.loss_prime = None
    self.err = []
  
  def add_layer(self, layer):
    self.layers.append(layer)

  def set_loss(self, loss, loss_prime):
    self.loss = loss
    self.loss_prime = loss_prime
  
  def predict(self, input_data):
    '''
    Returns predicted output for elements in input_data.
    '''
    result = []
    
    for element in input_data:
      for layer in self.layers:
        element = layer.forward_prop(element)
      result.append(element)
    
    return result

  def fit(self, x_train, y_train, iterations, learn_rate):
    tot_count = len(x_train)
    for i in range(iterations):
      err = 0
      for j in range(tot_count):
        # train NN
        x = x_train[j]
        for layer in self.layers:
          x = layer.forward_prop(x)

        y = y_train[j]
        pred_err = self.loss_prime(y, x)
        for layer in reversed(self.layers):
          pred_err = layer.back_prop(pred_err, learn_rate)
        
        # compute loss to store
        err += self.loss(y, x)
      err /= tot_count
      self.err.append(err)

In [49]:
# sigmoid functions
def sigmoid(x):
  return 1/(1+np.exp(-x))

def sigmoid_prime(x):
  return sigmoid(x) * (1 - sigmoid(x))

# L2 error functions
def mse(x, x_pred):
  return np.mean((np.power(x-x_pred, 2)))

def mse_prime(x, x_pred):
  return 2*(x_pred-x)/x.size;

In [None]:
# Test run of NN for debugging and validation
def tanh(x):
    return np.tanh(x);

def tanh_prime(x):
    return 1-np.tanh(x)**2;

x_train = np.array([[[0,0]], [[0,1]], [[1,0]], [[1,1]]])
y_train = np.array([[[0]], [[1]], [[1]], [[0]]])

nn = NeuralNet()
nn.add_layer(Layer(tanh, tanh_prime, 2, 3))
nn.add_layer(Layer(tanh, tanh_prime, 3, 1))

nn.set_loss(mse, mse_prime)
nn.fit(x_train, y_train, 1000, 0.1)

print(nn.err[-1])

In [44]:
from keras.datasets import mnist
from keras.utils import np_utils

(train_X, train_y), (test_X, test_y) = mnist.load_data()

In [45]:
# reshape and normalize input data, copied from earlier mentioned article
train_X = train_X.reshape(train_X.shape[0], 1, 28*28)
train_X = train_X.astype('float32')
train_X /= 255
# encode output which is a number in range [0,9] into a vector of size 10
# e.g. number 3 will become [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
train_y = np_utils.to_categorical(train_y)

# same for test data : 10000 samples
test_X = test_X.reshape(test_X.shape[0], 1, 28*28)
test_X = test_X.astype('float32')
test_X /= 255
test_y = np_utils.to_categorical(test_y)

# end of copied normalization

In [91]:
nn_mnist = NeuralNet()
input_layer = Layer(sigmoid, sigmoid_prime, 28*28, 128)
output_layer = Layer(softmax, softmax_gradient, 128, 10)
nn_mnist.add_layer(input_layer)
nn_mnist.add_layer(output_layer)
nn_mnist.set_loss(mse, mse_prime)
nn_mnist.fit(train_X, train_y, 10, 0.1)

(128, 10)


ValueError: ignored

In [81]:
temp = np.array([[2.11068126,  1.47989292,  3.84471379,  1.01363238, -2.13620242,  1.11731586,
  -2.33209269,  3.35144424, -1.04378665, -0.36827289]])
check = temp[0]*1-temp[0]
print(type(check))

<class 'numpy.ndarray'>
