<a href="https://colab.research.google.com/github/Wasabi-Bobby/MachineLearningHomework/blob/master/LogisticRegressionWithNumpy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from keras.datasets import mnist

(train_images_original, train_labels_original), (test_images_original, test_labels) = mnist.load_data()

train_images = train_images_original.reshape((60000, 28 * 28))
train_images = train_images.astype('float32') / 255

train_labels = train_labels_original.reshape((60000, 1))

test_images = test_images_original.reshape((10000, 28 * 28))
test_images = test_images.astype('float32') / 255

In [55]:
print(train_labels)

[[5]
 [0]
 [4]
 ...
 [5]
 [6]
 [8]]


In [0]:
import numpy as np
import math


# Make 10 trainers
# Each handle a digit from 0 - 9
# Each will train on 9 digits
# For testing we make all of them test on the number and argmax the highest option
# If the highest option is 50 percent or below then it will say the the item isn't a number

def sigmoid_double(x):
    # Simple implementation of the sigmoid function for double values.
    return 1.0 / (1.0 + np.exp(-x))


def sigmoid_prime_double(x):
    # Simple implementation of the derivative of of the sigmoid function for double values.
    return sigmoid_double(x) * (1 - sigmoid_double(x))


def sigmoid(z):
    # Simple implementation of the sigmoid function for vectors.
    return np.vectorize(sigmoid_double)(z)


def sigmoid_prime(z):
    # Simple implementation of the derivative of the sigmoid function for vectors.
    return np.vectorize(sigmoid_prime_double)(z)
  
class Classifier():
  def __init__(self, digit):
    self.current_image = 0
    self.train_images_personal = np.zeros((10000, 28*28))
    self.train_labels_personal  = np.zeros((10000, 1))
    for i in range(len(train_images)):
      current_index = 0
      if train_labels[i] == digit:
        self.train_images_personal[current_index] = train_images[i]
        self.train_labels_personal[current_index] = train_labels[i]
    
  def get_and_increment(self):
    self.current_image += 1
    return self.train_images_personal[self.current_image-1], self.train_labels_personal[self.current_image-1] 

class Layer:
  
  def __init__(self, number_classifiers, output_dim):
    self.number_classifiers = 10
    self.output_dim = output_dim
    self.loss_points = np.array([])
    self.mini_batch_size = 1000
    # Change this to either MSE or BCE
    self.option = "MSE"
    # Rate of learning
    self.rate = 0.1
    
    self.classifiers = []
    
    for i in range(number_classifiers):
      self.classifiers.append(Classifier(i))

    # Added a division of sqrt(input) for initializaing weights
    # This is known as the Xavier Initialization and has less variance
    self.weight = np.random.randn(output_dim, number_classifiers) * 1/math.sqrt(number_classifiers)
    self.bias = np.random.randn(output_dim, 1)
    
  def test(self):
    correct_tally   = 0
    current_training_weight = np.zeros(10)
    for i in range(len(test_labels)):
      a_matrix = np.array([])
      current_feature = test_images_original[i]
      for j in range(number_classifiers):
        current_training_weight.fill(0)
        current_weight = self.weight[0][j]
        current_training_weight[j] = current_weight
        z = current_training_weight.transpose() * current_feature + self.bias
        a_matrix.append(sigmoid(z))
      maximum_arg = a_matrix.argmax()
      if maximum_arg == test_labels[i]:
        correct_tally += 1
    print("The classifiers managed to have an accurate guess rate of : " + str(len(test_labels) / correct_tally))
      

  def forward_prop(self):
    # This is for what classifier should be training with the data atm
    # So we will give 100 examples to each classifier for later use
    # This will be a matrix of [10, 1]
    current_training_weight = np.zeros(10)
    for i in range(self.mini_batch_size):
      current_index = i % 10
      current_training_weight.fill(0)
      current_weight = self.weight[0][current_index]
      # current_training_weight[current_index] = current_weight
      current_feature, current_label = self.classifiers[current_index].get_and_increment()
      z = current_weight * current_feature + self.bias
      a = sigmoid(z)
      self.backward_prop(current_index, a, current_label, z, current_feature)
      
  def backward_prop(self, current_training_classifier, prediction, label, z, feature):
    self.update_weights(current_training_classifier, prediction, label, z, feature)
    # update_bias()
    
  
  def update_weights(self, current_training_classifier, prediction, label, z, feature):
    # Have to make this change all weights, so either for loop or make squared error give a matrix to transpose
    # Note: Even though I'm passing label, thinking about it logically this should always be one
    # This is because any time I am testing a number on a classifier it should be equal to that number that I am passing
    # Therefore, it should always be one for this scenario and we are merely training it to see if it can accurately detect that the number is for that classifier or not
    # Also, 1 means that it is that number while 0 means it isn't since sigmoid gives a value from 0-1 with how we calculate it
    if self.option == "MSE":
      new_value = self.weight[0][current_training_classifier] - self.rate * self.squared_error_gradient(prediction, 1, z, feature)
      print(new_value)
      self.weight[0][current_training_classifier] = new_value
    elif self.option == "BCE":
      self.weight[0][current_training_classifier] = self.weight[0][current_training_classifier] - self.rate * self.binary_cross_entropy_deriv(prediction, 1)
   
  def update_bias(self):
    if self.option == "MSE":
      self.bias - self.rate * self.squared_error_gradient(prediction, label, z, feature)
    elif self.option == "BCE":
      self.bias - self.rate * self.binary_cross_entropy_deriv(prediction, label)
  
  def squared_error_loss(self, prediction, label):
    return 0.5 * (prediction - label) * (prediction - label)
    
  def squared_error_loss_deriv(self, prediction, label):
    return prediction - label
  
  def squared_error_gradient(self, prediction, label, z, feature):
    return (prediction - label) * sigmoid_prime(z) * feature
  
  def binary_cross_entropy(self, prediction, label):
    return -1 * label * math.log(prediction) - (1 - y) * math.log(1-a)
  
  def binary_cross_entropy_deriv(self, prediction, label):
    return -1 * label * (1 - prediction) + (1 - label) * prediction
    

In [87]:
new_layer = Layer(10, 1)
new_layer.forward_prop()
new_layer.test()

[[0.60426268 0.60426268 0.60426268 0.60426268 0.60426268 0.60426268
  0.60426268 0.60426268 0.60426268 0.60426268 0.60426268 0.60426268
  0.60426268 0.60426268 0.60426268 0.60426268 0.60426268 0.60426268
  0.60426268 0.60426268 0.60426268 0.60426268 0.60426268 0.60426268
  0.60426268 0.60426268 0.60426268 0.60426268 0.60426268 0.60426268
  0.60426268 0.60426268 0.60426268 0.60426268 0.60426268 0.60426268
  0.60426268 0.60426268 0.60426268 0.60426268 0.60426268 0.60426268
  0.60426268 0.60426268 0.60426268 0.60426268 0.60426268 0.60426268
  0.60426268 0.60426268 0.60426268 0.60426268 0.60426268 0.60426268
  0.60426268 0.60426268 0.60426268 0.60426268 0.60426268 0.60426268
  0.60426268 0.60426268 0.60426268 0.60426268 0.60426268 0.60426268
  0.60426268 0.60426268 0.60426268 0.60426268 0.60426268 0.60426268
  0.60426268 0.60426268 0.60426268 0.60426268 0.60426268 0.60426268
  0.60426268 0.60426268 0.60426268 0.60426268 0.60426268 0.60426268
  0.60426268 0.60426268 0.60426268 0.60426268 0.

ValueError: ignored