# Multi-Class Classification with Perceptron

Lab Assignment from [AI for Beginners Curriculum](https://github.com/microsoft/ai-for-beginners).

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pickle
import os

import pylab
from ipywidgets import interact, interactive, fixed
import ipywidgets as widgets

np.random.seed(1)
import random

You can use the following perceptron training code from the lecture:

In [None]:
def train(positive_examples, negative_examples, num_iterations = 100):
    num_dims = positive_examples.shape[1]
    weights = np.zeros((num_dims,1)) # initialize weights
    
    pos_count = positive_examples.shape[0]
    neg_count = negative_examples.shape[0]
    
    report_frequency = 10
    
    for i in range(num_iterations):
        pos = random.choice(positive_examples)
        neg = random.choice(negative_examples)

        z = np.dot(pos, weights)   
        if z < 0:
            weights = weights + pos.reshape(weights.shape)

        z  = np.dot(neg, weights)
        if z >= 0:
            weights = weights - neg.reshape(weights.shape)
            
        if i % report_frequency == 0:             
            pos_out = np.dot(positive_examples, weights)
            neg_out = np.dot(negative_examples, weights)        
            pos_correct = (pos_out >= 0).sum() / float(pos_count)
            neg_correct = (neg_out < 0).sum() / float(neg_count)
            print("Iteration={}, pos correct={}, neg correct={}".format(i,pos_correct,neg_correct))

    return weights

In [None]:
def train_graph(positive_examples, negative_examples, num_iterations = 100):
    num_dims = positive_examples.shape[1]
    weights = np.zeros((num_dims,1)) # initialize weights
    
    pos_count = positive_examples.shape[0]
    neg_count = negative_examples.shape[0]
    
    report_frequency = 15
    snapshots = []
    
    for i in range(num_iterations):
        pos = random.choice(positive_examples)
        neg = random.choice(negative_examples)

        z = np.dot(pos, weights)   
        if z < 0:
            weights = weights + pos.reshape(weights.shape)

        z  = np.dot(neg, weights)
        if z >= 0:
            weights = weights - neg.reshape(weights.shape)
            
        if i % report_frequency == 0:             
            pos_out = np.dot(positive_examples, weights)
            neg_out = np.dot(negative_examples, weights)        
            pos_correct = (pos_out >= 0).sum() / float(pos_count)
            neg_correct = (neg_out < 0).sum() / float(neg_count)
            snapshots.append((np.copy(weights),(pos_correct+neg_correct)/2.0))

    return weights, np.array(snapshots)

In [None]:
def accuracy(weights, test_x, test_labels):
    res = np.dot(np.c_[test_x,np.ones(len(test_x))],weights)
    return (res.reshape(test_labels.shape)*test_labels>=0).sum()/float(len(test_labels))

# accuracy(wts, test_x, test_labels)

### Reading the Dataset

This code download the dataset from the repository on the internet. You can also manually copy the dataset from `/data` directory of AI Curriculum repo.

In [None]:
# !rm *.pkl
# https://github.com/mnielsen/neural-networks-and-deep-learning/blob/master/data/mnist.pkl.gz
# !gzip -d mnist.pkl.gz

In [None]:
file = '../mnist.pkl'
with open(f"{file}","rb") as mnist_pickle:
    MNIST = pickle.load(mnist_pickle, encoding="latin1")

In [None]:
Train = 0
Test = 1
Features = 0
Labels = 1

In [None]:
print(MNIST[Train][Features][0][130:180])
print(MNIST[Train][Labels][0])
features = MNIST[Train][Features].astype(np.float32) / 256.0
labels = MNIST[Train][Labels]
fig = plt.figure(figsize=(10,5))
for i in range(10):
    ax = fig.add_subplot(1,10,i+1)
    plt.imshow(features[i].reshape(28,28))
plt.show()

Code to create *one-vs-other* dataset for two-digit classification. You need to modify this code to create *one-vs-all* dateset.

In [None]:
# One-vs-other
def set_mnist_pos_neg(positive_label, negative_label):
    positive_indices = [i for i, j in enumerate(MNIST[Train][Labels]) 
                          if j == positive_label]
    negative_indices = [i for i, j in enumerate(MNIST[Train][Labels]) 
                          if j == negative_label]

    positive_images = MNIST[Train][Features][positive_indices]
    negative_images = MNIST[Train][Features][negative_indices]

    return positive_images, negative_images

In [None]:
# One-vs-all
def set_mnist_pos_neg_ONEvALL(positive_label):
    positive_indices = [i for i, j in enumerate(MNIST[Train][Labels]) 
                          if j == positive_label]
    negative_indices = [i for i, j in enumerate(MNIST[Train][Labels]) 
                          if j != positive_label]

    positive_images = MNIST[Train][Features][positive_indices]
    negative_images = MNIST[Train][Features][negative_indices]

    return positive_images, negative_images

In [None]:
pos1_vALL,neg1_vALL = set_mnist_pos_neg_ONEvALL(1)

In [None]:
# neg1

In [None]:
wts_1, snapshots_1 = train_graph(pos1_vALL,neg1_vALL)
# print(wts_1.transpose())

In [None]:
# IMPLEMENT THIS

def accuracy(weights, test_x, test_labels):
    res = np.dot(np.c_[test_x,np.ones(len(test_x))],weights)
    return (res.reshape(test_labels.shape)*test_labels>=0).sum()/float(len(test_labels))

In [None]:
def plotit2(snapshots_mn,step):
    fig = pylab.figure(figsize=(10,4))
    ax = fig.add_subplot(1, 2, 1)
    pylab.imshow(snapshots_mn[step][0].reshape(28, 28), interpolation='nearest')
    ax.set_xticks([])
    ax.set_yticks([])
    pylab.colorbar()
    ax = fig.add_subplot(1, 2, 2)
    ax.set_ylim([0,1])
    pylab.plot(np.arange(len(snapshots_mn[:,1])), snapshots_mn[:,1])
    pylab.plot(step, snapshots_mn[step,1], "bo")
    pylab.show()
def pl3(step): plotit2(snapshots_1,step)
# def pl4(step): plotit2(snapshots_mn2,step)    

In [None]:
interact(pl3, step=widgets.IntSlider(value=0, min=0, max=len(snapshots_1) - 1))

In [None]:
pos1_vALL,neg1_vALL = set_mnist_pos_neg_ONEvALL(1)
wts_1, snapshots_1 = train_graph(pos1_vALL,neg1_vALL)

In [None]:
pos2_vALL,neg2_vALL = set_mnist_pos_neg_ONEvALL(2)
wts_2, snapshots_2 = train_graph(pos2_vALL,neg2_vALL)

In [None]:
# Train = 0
# Test = 1
# Features = 0
# Labels = 1

Now you need to:
1. Create 10 *one-vs-all* datasets for all digits
1. Train 10 perceptrons
1. Define `classify` function to perform digit classification
1. Measure the accuracy of classification and print *confusion matrix*
1. [Optional] Create improved `classify` function that performs the classification using one matrix multiplication.