<a href="https://colab.research.google.com/github/harperd/machine-learning/blob/master/notebooks/multiclass-logistic-regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Multiclass Logistic Regression

Use logistic regression to recognize hand-written digits (0 to 9).

## Imports

In [0]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.style as style
import pandas as pd
import scipy.optimize as opt
import google.colab as colab
import io

from scipy.io import loadmat

# Allow saving our graphs in the notebook
%matplotlib inline

Chart Dark Theme For Google Colab Dark Mode

In [0]:
style.use('dark_background')

## Read Sample Data

In [3]:
mat_file = colab.files.upload()
!ls -l

Saving ex3data1.mat to ex3data1.mat
total 7340
-rw-r--r-- 1 root root 7511764 Aug  4 14:31 ex3data1.mat
drwxr-xr-x 1 root root    4096 Aug  1 16:08 sample_data


In [4]:
mat_data = loadmat('ex3data1.mat')
mat_data

{'X': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 '__globals__': [],
 '__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Sun Oct 16 13:09:09 2011',
 '__version__': '1.0',
 'y': array([[10],
        [10],
        [10],
        ...,
        [ 9],
        [ 9],
        [ 9]], dtype=uint8)}

In [5]:
X = mat_data['X']
# Add bias column
X = np.hstack((np.ones(X.shape[0])[:, np.newaxis], X))

y = mat_data['y']
theta = np.array(np.zeros(X.shape[1]), ndmin = 2)

print(f'X Shape: {X.shape}')
print(f'y Shape: {y.shape}')
print(f'Theta Shape: {theta.shape}')

X Shape: (5000, 401)
y Shape: (5000, 1)
Theta Shape: (1, 401)


![Hand written numbers](https://github.com/harperd/machine-learning/blob/master/images/ex3-1.png?raw=1)

In [0]:
def sigmoid(z):
  return 1 / ( 1 + np.exp(-z) )

In [0]:
def compute_hypothesis(theta, X):
  # Compute our hypothesis
  z = X @ theta.T
  
  # Scale our hypothesis using Sigmoid
  # Here, if the parameter is zero then the sigmoid value will be 0.5.
  h = sigmoid(z)
  
  return h

In [0]:
def compute_cost(theta, X, y):
  # Fix for minimize function
  theta = np.array(theta, ndmin = 2)
  X = np.array(X, ndmin = 2)
  y = np.array(y, ndmin = 2)
  
  # Compute our hypothesis
  h = compute_hypothesis(theta, X)
  
  first = np.log(h) * -y
  second = np.log(1 - h) * (1 - y)
  
  # The number of examples
  m = len(y)
  
  cost = np.sum(first - second) / m
  
  return cost

In [0]:
def compute_gradient(theta, X, y):
  # Fix for minimize function
  theta = np.array(theta, ndmin = 2)
  X = np.array(X, ndmin = 2)
  y = np.array(y, ndmin = 2)
  
  # Compute our hypothesis
  h = compute_hypothesis(theta, X)

  # Get the error
  error = h - y

  # The number of examples
  m = len(y)
  
  # Calculate the gradient
  gradient = (( error.T @ X ) / m) + ((.1 / m) * theta)
  
  return gradient

In [63]:
def train_model(theta, X, y):
  theta_min = {}
  
  for k in range(1, 11):
    print(f'> Optimizing theta values for class {k}...')

    y_train = [ 1 if K[0] == k else 0 for K in y ]
    y_train = np.array(y_train, ndmin = 2).T

    result = opt.fmin_tnc(
      # Initial guess.
      x0 = theta,
      # Objective function to be minimized.
      func = compute_cost,
      # Gradient of func.
      fprime = compute_gradient,
      # Extra arguments passed to f and fprime.
      args = ( X, y_train ))
    
    theta_min[k - 1] = result[0]

    iterations = result[1]
    cost = compute_cost(result[0], X, y_train)
    
    print(f'  Iterations = {iterations}, cost = {cost}')

  return theta_min

theta_min = train_model(theta, X, y)

> Optimizing theta values for class 1...
  Iterations = 123, cost = 0.008345230609298671
> Optimizing theta values for class 2...
  Iterations = 215, cost = 0.04443323146861598
> Optimizing theta values for class 3...
  Iterations = 187, cost = 0.052502878719436374
> Optimizing theta values for class 4...
  Iterations = 171, cost = 0.026052862922637393
> Optimizing theta values for class 5...
  Iterations = 233, cost = 0.04710706870711914
> Optimizing theta values for class 6...
  Iterations = 178, cost = 0.011827903837127233
> Optimizing theta values for class 7...
  Iterations = 192, cost = 0.024533886541116552
> Optimizing theta values for class 8...
  Iterations = 182, cost = 0.0740679645728465
> Optimizing theta values for class 9...
  Iterations = 181, cost = 0.065474591260563
> Optimizing theta values for class 10...
  Iterations = 155, cost = 0.005009637890079448


In [64]:
def make_predictions(theta, X):
  # Compute our hypothesis
  h = compute_hypothesis(theta, X)
  
  # Make prediction of 1 or zero based on threshold.
  predictions = [ 1 if s >= 0.5 else 0 for s in h ]

  return predictions

def compute_accuracy(predictions, y):
  # Get the correct predictions where correct is 1 and
  # incorrect is 0.
  correct = [ 
      1 if p_val - y_val == 0 else 0 
      # The purpose of zip() is to map the similar index of multiple 
      # containers so that they can be used just using as single entity.
      for (p_val, y_val) in zip(predictions, y)
  ]
  
  # Calculate the overall accuracy.
  accuracy = sum(correct) / len(correct)
  
  return accuracy

def compute_model_accuracy(theta_min, X, y):
  accuracy = 0

  for k in theta_min.keys():
    theta = theta_min[k]
    correct = [ 1 if K[0] == k else 0 for K in y ]
    predictions = make_predictions(theta, X)
    accuracy = accuracy + compute_accuracy(predictions, correct)

  return ( accuracy / len(theta_min.keys()) ) * 100

accuracy = compute_model_accuracy(theta_min, X, y)

print(f'Model accuracy: {accuracy}%')

Model accuracy: 81.46%
