<a href="https://colab.research.google.com/github/Wasabi-Bobby/MachineLearningHomework/blob/master/LogisticRegressionWithNumpy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from keras.datasets import mnist

(train_images_original, train_labels_original), (test_images_original, test_labels_original) = mnist.load_data()

train_images = train_images_original.reshape((60000, 28 * 28))
train_images = train_images.astype('float32') / 255

train_labels = train_labels_original.reshape((60000, 1))

test_images = test_images_original.reshape((10000, 28 * 28))
test_images = test_images.astype('float32') / 255

test_labels = test_labels_original.reshape((10000, 1))

Using TensorFlow backend.


Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz


In [12]:
import numpy as np

a = np.random.permutation(10)
arg_max = np.argmax(a)
print(a)
print(arg_max)
print(a[arg_max])

[3 2 6 5 4 0 7 1 8 9]
9
9


In [0]:
import numpy as np
import math


# Make 10 trainers
# Each handle a digit from 0 - 9
# Each will train on 9 digits
# For testing we make all of them test on the number and argmax the highest option
# If the highest option is 50 percent or below then it will say the the item isn't a number

def sigmoid_double(x):
    # Simple implementation of the sigmoid function for double values.
    return 1.0 / (1.0 + np.exp(-x))


def sigmoid_prime_double(x):
    # Simple implementation of the derivative of of the sigmoid function for double values.
    return sigmoid_double(x) * (1 - sigmoid_double(x))


def sigmoid(z):
    # Simple implementation of the sigmoid function for vectors.
    return np.vectorize(sigmoid_double)(z)


def sigmoid_prime(z):
    # Simple implementation of the derivative of the sigmoid function for vectors.
    return np.vectorize(sigmoid_prime_double)(z)

class Layer:
  
  def __init__(self, number_classifiers, output_dim):
    self.number_classifiers = 10
    self.output_dim = output_dim
    self.loss_points = np.array([])
    self.mini_batch_size = 5000
    self.epoch = 10
    # Change this to either MSE or BCE
    self.option = "MSE"
    # Rate of learning
    self.rate = 1
    
    self.s = np.random.permutation(len(train_labels))

    # Added a division of sqrt(input) for initializaing weights
    # This is known as the Xavier Initialization and has less variance
    self.weight = np.random.randn(number_classifiers, 28 * 28)
    self.bias = np.random.random_sample()
    
  def test(self):
    #Initializing variables
    correct_tally = 0
    a_matrix = np.zeros(10)
    
    for i in range((int)(len(test_labels))):
      current_feature = test_images[i]
      #Resetting A matrix
      a_matrix.fill(0)
      
      for j in range(self.number_classifiers):
        current_weight = self.weight[j]
        z = current_feature.T.dot(current_weight) + self.bias
        a_matrix[j] = sigmoid(z)
      maximum_arg = a_matrix.argmax()
      if maximum_arg == test_labels[i]:
        correct_tally += 1
        
    if correct_tally == 0:
      print("The classifiers managed to have an accurate guess rate of : 0% ")
    else:
      print("The classifiers managed to have an accurate guess rate of : " + str(correct_tally / len(test_labels) * 100))
      

  def forward_prop(self):
    for epoch_index in range(self.epoch):
      for i in range(self.mini_batch_size):
        current_index   = epoch_index * self.mini_batch_size + i
        current_feature = train_images[self.s[current_index]]
        current_label   = train_labels[self.s[current_index]]
        
        for j in range(len(self.weight)):
          current_weight  = self.weight[j]
          label_for_weight = 1
          
          # If the current label isnt the classifier index (e.g. label 1 for classifier 0) then set label for weight to 0
          if j != current_label:
            label_for_weight = 0
            
          z = current_feature.T.dot(current_weight)  + self.bias
          a = sigmoid(z)
          
          self.backward_prop(j, a, label_for_weight, z, current_feature)

  def backward_prop(self, current_training_classifier, prediction, label, z, feature):
    self.update_weights(current_training_classifier, prediction, label, z, feature)
    self.update_bias(prediction, label, z, feature)
    
  
  def update_weights(self, current_training_classifier, prediction, label, z, feature):
    # Have to make this change all weights, so either for loop or make squared error give a matrix to transpose
    # Note: Even though I'm passing label, thinking about it logically this should always be one
    # This is because any time I am testing a number on a classifier it should be equal to that number that I am passing
    # Therefore, it should always be one for this scenario and we are merely training it to see if it can accurately detect that the number is for that classifier or not
    # Also, 1 means that it is that number while 0 means it isn't since sigmoid gives a value from 0-1 with how we calculate it
    
    if self.option == "MSE":
      new_value = self.weight[current_training_classifier] - self.rate * self.squared_error_gradient(prediction, label, z, feature)
      self.weight[current_training_classifier] = new_value
      
    elif self.option == "BCE":
      self.weight[current_training_classifier] = self.weight[current_training_classifier] - self.rate * self.binary_cross_entropy_deriv(prediction, label)
   
  def update_bias(self, prediction, label, z, feature):
    if self.option == "MSE":
      self.bias = self.bias - self.rate * self.squared_error_loss_deriv(prediction, label) * (prediction * (1-prediction)) # sigmoid_prime(z)
    elif self.option == "BCE":
      self.bias = self.bias - self.rate * self.binary_cross_entropy_deriv(prediction, label)
  
  def squared_error_loss(self, prediction, label):
    return 0.5 * (prediction - label) * (prediction - label)
    
  def squared_error_loss_deriv(self, prediction, label):
    return prediction - label
  
  def squared_error_gradient(self, prediction, label, z, feature):
    # return (prediction - label) * sigmoid_prime(z) * feature
    return (prediction - label) * (prediction * (1-prediction)) * feature
  
  def binary_cross_entropy(self, prediction, label):
    return -1 * label * math.log(prediction) - (1 - y) * math.log(1-a)
  
  def binary_cross_entropy_deriv(self, prediction, label):
    return -1 * label * (1 - prediction) + (1 - label) * prediction
    

In [34]:
new_layer = Layer(10, 1)
#new_layer.test()
new_layer.forward_prop()
new_layer.test()

The classifiers managed to have an accurate guess rate of : 42.120000000000005


In [0]:
 '''
 Comment / Removal section
class Classifier():
  def __init__(self, digit):
    self.current_image = 0
    self.train_images_personal = np.zeros((10000, 28*28))
    self.train_labels_personal  = np.zeros((10000, 1))
    for i in range(len(train_images)):
      current_index = 0
      if train_labels[i] == digit:
        self.train_images_personal[current_index] = train_images[i]
        self.train_labels_personal[current_index] = train_labels[i]

  def get_and_increment(self):
    self.current_image += 1
    return self.train_images_personal[self.current_image-1], self.train_labels_personal[self.current_image-1] 
'''
