<a href="https://colab.research.google.com/github/c-daly/Notebooks/blob/main/PerceptronAndNeuralNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Basic, naive implementation of perceptron to classify 0s in the MNIST dataset.

In [142]:
import numpy as np
import keras.datasets as kd
from keras.datasets import mnist
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

(x_train, y_train), (x_test, y_test) = mnist.load_data()
zeros_y_train = np.ones_like(y_train) * -1
zeros_y_test = np.ones_like(y_test) * -1
zeros = np.where(y_train == 0)
zeros_y_train[zeros] = 1 
zeros = np.where(y_test == 0)
zeros_y_test[zeros] = 1

w = None
epoch = 0
while epoch < 2:
  for example in zip(x_train, zeros_y_train): #, y_train):
    y = example[1]
    x = example[0].flatten()
    
    if w is None:
      w = np.ones_like(x)
    y_pred = np.dot(w,x)
    if (np.dot(w,x)) * y < 0:
       w = w + .1 *(np.dot(example[1], x))
  epoch += 1

  y_pred = []

  for example in x_test:
    prediction = np.sign(np.dot(w, example.flatten()))
    y_pred.append(prediction)

print(classification_report(zeros_y_test, y_pred, labels=[1, -1]))
print(confusion_matrix(zeros_y_test, y_pred))

              precision    recall  f1-score   support

           1       0.98      0.88      0.93       980
          -1       0.99      1.00      0.99      9020

    accuracy                           0.99     10000
   macro avg       0.99      0.94      0.96     10000
weighted avg       0.99      0.99      0.99     10000

[[9006   14]
 [ 117  863]]


Now code is modified to transition from perceptron to logistic regression.  First attempt does not use a bias term.

In [176]:
import numpy as np
import keras.datasets as kd
from keras.datasets import mnist
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

class LogisticRegression:  

  def __init__(self, X, y, epochs=5, alpha=0.1):
    self.X = X
    self.y = y
    self.m = X.shape[0]
    self.epochs = epochs
    self.alpha = alpha
    self.w = None

  def sigmoid(self, z):
    s = 1 / (1 + np.exp(-z))
    return s 

  def compute_loss(self, y, y_pred):
    return (y_pred - y) ** 2 

  def fit(self):
    epoch = 0
    total_cost = 0
    w = None
    i = 0
    while epoch < self.epochs:
      for example in zip(self.X,self.y):
        x = example[0]
        y = example[1]
        if w is None:
          #w = np.zeros_like(x)
          w = np.random.randn(len(x))
        z = np.dot(x,w)   
        a = self.sigmoid(z)

        cost = self.compute_loss(y, a)
        total_cost += cost/self.m
        dW = (1/self.m) * np.dot((y-a), x) 
        w =  w + (self.alpha * dW)
      print(f"total cost for epoch: {total_cost}")
      total_cost = 0
      epoch += 1
    self.w = w
    return w

  def predict(self, X):
    y_pred = []
    for x in X:
      z = np.dot(x, self.w)
      a = self.sigmoid(z)
      if a >= 0.5:
        y_pred.append(1)
      else:
        y_pred.append(0)
    return y_pred  

(x_train, y_train), (x_test, y_test) = mnist.load_data()
y_train_zeros = np.zeros_like(y_train)
y_train_zeros[np.where(y_train == 0)] = 1

y_test_zeros = np.zeros_like(y_test)
y_test_zeros[np.where(y_test == 0)] = 1

train_x_flatten = x_train.reshape(x_train.shape[0], -1)   # The "-1" makes reshape flatten the remaining dimensions
test_x_flatten = x_test.reshape(x_test.shape[0], -1)
lr = LogisticRegression(train_x_flatten, y_train_zeros)
lr.fit()
y_pred = lr.predict(test_x_flatten)
print(classification_report(y_test_zeros, y_pred))
print(confusion_matrix(y_test_zeros, y_pred))

#for y in zip(y_pred, y_test_zeros, y_test):
#  print(f"Prediction: {y[0]}, zero_y: {y[1]}, actual y: {y[2]}")



total cost for epoch: 0.08229424695301314
total cost for epoch: 0.04710463512085162
total cost for epoch: 0.037909295191461405
total cost for epoch: 0.032439208360717925
total cost for epoch: 0.028860075322790387
              precision    recall  f1-score   support

           0       0.99      0.98      0.99      9020
           1       0.86      0.87      0.87       980

    accuracy                           0.97     10000
   macro avg       0.92      0.93      0.93     10000
weighted avg       0.97      0.97      0.97     10000

[[8883  137]
 [ 124  856]]


Now with full ten node output, still no hidden layers

In [37]:
import numpy as np
import keras.datasets as kd
from keras.datasets import mnist
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
w = None
(x_train, y_train), (x_test, y_test) = mnist.load_data()

train_x_flatten = x_train.reshape(x_train.shape[0], -1)   # The "-1" makes reshape flatten the remaining dimensions
test_x_flatten = x_test.reshape(x_test.shape[0], -1).T
epoch = 0
while epoch < 2:
  for example in zip(x_train, y_train): #, y_train):
    y = example[1]
    x = example[0].flatten().T
    if w is None:
      w = np.random.randn(len(x), 10)
      print(x.shape)
    y_pred = np.dot(x,w)
    print(y_pred)
    print(y)
    #if y_pred * y < 0:
    #   w = w + .1 *(np.dot(example[1], x))
  epoch += 1

  y_pred = []

  for example in x_test:
    prediction = np.sign(np.dot(w, example.flatten()))
    y_pred.append(prediction)

print(classification_report(zeros_y_test, y_pred, labels=[1, -1]))
print(confusion_matrix(zeros_y_test, y_pred))

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
   758.72128293 -2416.64440314]
6
[ 4045.86031748  1204.72447205 -1245.51380002 -2521.35277343
 -2287.40777013 -2246.33479907  1789.70697426 -1286.4552517
 -2872.74691275 -2411.55487906]
8
[ 1877.91635642 -1291.72151371 -2876.07982749   264.55907487
 -2737.57529833   217.01753043   481.07358187  2896.87485999
  -924.20484449  1357.08331449]
3
[ 2437.75125762  -570.85168002 -2105.72815953 -1948.99866867
  2748.87676601  2304.36993041 -1657.11524561  -137.46941571
 -1448.50110474  1325.96168924]
9
[ 2495.07227203  -748.24544905  -815.43529701  1385.80694151
  -969.18095994  1500.17223874   179.47862486  2794.860362
   879.30780151 -2121.64165074]
5
[ 2390.72193149  1065.64846982  2777.53378142 -1800.82590599
 -1908.99189551  -994.84549622  2099.00953777 -3551.63667584
 -2852.40304526 -1099.02061887]
8
[ 4880.41635352   318.11017649   353.7523764    853.91645723
  1323.82582367 -1642.34458449   770.16814093 -2342.5881132
 -4

KeyboardInterrupt: ignored

In [None]:
import numpy as np
from keras.datasets import mnist
from sklearn.datasets import fetch_openml
from keras.utils.np_utils import to_categorical
from sklearn.model_selection import train_test_split

class Layer:
  def __init__(self, input, activation_function, size):
    # TODO: check X shape matches first element of size
    self.input = input 
    self.weights = np.random.random(input.shape)
    self.activation = activation_function
  
  def linear_output(self):
    return np.dot(self.input, self.weights)

  def activation_output(self):
    return self.activation(self.linear_output()) 

class NeuralNet:
  def __init__(self, input, layers, activation_function):
    self.input = input
    self.layers = layers
  
  def feed_forward(self):
    """
      Basic idea: loop over each layer
      and 
    """
    for layer in self.layers:
      layer.input = output
      output = layer.activation_output()
    pass

#(x_train, y_train), (x_test, y_test) = mnist.load_data()
#print(x_train.flatten().shape)

x, y = fetch_openml('mnist_784', version=1, return_X_y=True)
x = (x/255).astype('float32')
y = to_categorical(y)

x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.15, random_state=42)
weights = np.random.random(784)
print(np.dot(x_train, weights))

[71.76854826 61.32235731 39.45199643 ... 26.002135   62.49394216
 76.59350175]
