<a href="https://colab.research.google.com/github/harperd/machine-learning/blob/master/notebooks/multiclass-logistic-regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Multiclass Logistic Regression

Use logistic regression to recognize hand-written digits (0 to 9).

## Imports

In [0]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.style as style
import pandas as pd
import google.colab as colab
import io

from scipy.io import loadmat
from scipy.optimize import minimize

# Allow saving our graphs in the notebook
%matplotlib inline

style.use('dark_background')

## Read Sample Data

In [5]:
mat_file = colab.files.upload()
!ls -l

Saving ex3data1.mat to ex3data1.mat
total 7340
-rw-r--r-- 1 root root 7511764 Aug  5 23:57 ex3data1.mat
drwxr-xr-x 1 root root    4096 Aug  2 16:06 sample_data


In [6]:
mat_data = loadmat('ex3data1.mat')
mat_data

{'X': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 '__globals__': [],
 '__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Sun Oct 16 13:09:09 2011',
 '__version__': '1.0',
 'y': array([[10],
        [10],
        [10],
        ...,
        [ 9],
        [ 9],
        [ 9]], dtype=uint8)}

In [7]:
X = mat_data['X']
# Add bias column
X = np.hstack((np.ones(X.shape[0])[:, np.newaxis], X))

y = mat_data['y']
theta = np.array(np.zeros(X.shape[1]), ndmin = 2)

print(f'X Shape: {X.shape}')
print(f'y Shape: {y.shape}')
print(f'Theta Shape: {theta.shape}')

X Shape: (5000, 401)
y Shape: (5000, 1)
Theta Shape: (1, 401)


![Hand written numbers](https://github.com/harperd/machine-learning/blob/master/images/ex3-1.png?raw=1)

In [0]:
def sigmoid(z):
  return 1 / ( 1 + np.exp(-z) )

In [0]:
def compute_hypothesis(theta, X):
  # Compute our hypothesis
  z = X @ theta.T
  
  # Scale our hypothesis using Sigmoid.
  # Here, if the parameter is zero then the sigmoid value will be 0.5.
  h = sigmoid(z)
  
  return h

In [0]:
def compute_cost(theta, X, y, alpha):
  # Fix for minimize function
  theta = np.array(theta, ndmin = 2)
  X = np.array(X, ndmin = 2)
  y = np.array(y, ndmin = 2)
  
  # Compute our hypothesis
  h = compute_hypothesis(theta, X)
  
  first = np.log(h) * -y
  second = np.log(1 - h) * (1 - y)
  
  # The number of examples
  m = len(y)
  
  # Implement Ridge Regression (L2 Regularization)
  #
  # Get all theta values except theta0 out
  # intercept term
  num_params = theta.shape[1]
  params = theta[:, 1:num_params]
  
  x = 10 if np.sum(params ** 2) == 0 else np.round(np.sum(params ** 2))
  
  # Set our lambda alue
  lamb = alpha / x
  
  print(lamb)
  
  # Complete our regularization term
  reg = lamb * np.sum(params ** 2)
  
  # Compute our cost with regularization
  cost = (( np.sum(first - second) / m ) + reg )
  
  return cost

In [0]:
def compute_gradient(theta, X, y, alpha):
  # Fix for minimize function
  theta = np.array(theta, ndmin = 2)
  X = np.array(X, ndmin = 2)
  y = np.array(y, ndmin = 2)
  
  # Compute our hypothesis
  h = compute_hypothesis(theta, X)

  # Get the error
  error = h - y

  # The number of examples
  m = len(X)
  
  # Calculate the gradient
  gradient = (theta * (alpha / m)) + ((error.T @ X) / m)
  
  return gradient

In [121]:
def train_model(theta, X, y, alpha):
  num_classes = len(np.unique(y))
  num_params = theta.shape[1]
  theta_min = np.zeros((num_classes, num_params))
  
  for k in range(1, num_classes + 1):
    print(f'Optimizing theta values for class {k}... ', end = '')

    y_train = [ 1 if K[0] == k else 0 for K in y ]
    y_train = np.array(y_train, ndmin = 2).T

    result = minimize(
        method = 'TNC',
        fun = compute_cost,
        jac = compute_gradient,
        x0 = theta,
        args = ( X, y_train, alpha ))
    
    theta_min[k - 1] = result.x
    
    cost = compute_cost(result.x, X, y_train, alpha)
    
    print(f'Iterations = {result.nit}, cost = {cost}')
  
  return theta_min

theta_min = train_model(theta, X, y, .1)

Optimizing theta values for class 1... 0.01
inf
inf
inf
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan




nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
0.01
0.01
Iterations = 0, cost = 0.6931471805599454
Optimizing theta values for class 2... 0.01
inf
inf
0.1
0.004545454545454546
0.014285714285714287
0.014285714285714287
0.014285714285714287
0.014285714285714287
0.014285714285714287
0.00625
0.011111111111111112
0.011111111111111112
0.011111111111111112


KeyboardInterrupt: ignored

In [107]:
def make_predictions(theta, X):
  # Compute our hypothesis
  h = compute_hypothesis(theta, X)
  
  # Get the index of each max probability for each
  # of the 5k examples where index number is the 
  # classifier.
  h = ( np.argmax(h, axis = 1) ) + 1
  
  return h

def compute_accuracy(predictions, y):
  # Get the correct predictions where correct is 1 and
  # incorrect is 0.
  correct = [ 
      1 if p_val == y_val else 0 
      # The purpose of zip() is to map the similar index of multiple 
      # containers so that they can be used just using as single entity.
      for (p_val, y_val) in zip(predictions, y)
  ]
  
  # Calculate the overall accuracy.
  accuracy = sum(correct) / len(correct)
  
  return accuracy

h = make_predictions(theta_min, X)
accuracy = compute_accuracy(h, y)

print(f'Model accuracy: {accuracy * 100}%')

Model accuracy: 96.48%
