# Perceptron Classifier with Implementation

## Introduction

## Perceptron Classifier

## Numpy Implementation of Perceptron Classifier

In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import random
import numpy as np

np.random.seed(71)

In [31]:
a = np.array([1, 2, 3])
b = np.array([1, 2, 3])
a * b

array([1, 4, 9])

In [50]:
class PerceptronClassifier(object):
    """Numpy implementation of Perceptron."""

    def __init__(self, batch_size=64, lr=0.01, n_epochs=1000):
        self.batch_size = batch_size
        self.lr = lr
        self.n_epochs = n_epochs

    def get_data(self, X_train, y_train, shuffle=True):
        """Get dataset and information."""
        self.X_train = X_train
        self.y_train = y_train

        # Get the numbers of examples and inputs.
        self.n_examples, self.n_inputs = self.X_train.shape

        if shuffle:
            idx = list(range(self.n_examples))
            random.shuffle(idx)
            self.X_train = self.X_train[idx]
            self.y_train = self.y_train[idx]

    def _create_weights(self):
        """Create model weights and bias."""
        self.w = np.zeros(self.n_inputs).reshape(self.n_inputs, 1)
        self.b = np.zeros(1).reshape(1, 1)

    def _model(self, X):
        """Perceptron linear regression model.
        
        y = sign(Xw + b), where 
        - y = 1 if Xw + b > 0
        - y = -1 if Xw + b < 0
        """
        self.weighted_sum = np.matmul(X, self.w) + self.b
        return np.sign(self.weighted_sum)

    def _loss(self, y, y_):
        """Hinge loss.

        # hinge_loss(y, y_) 
        #   = 1/n * \sum_{i=1}^n (y_i * (Xw + b)) (y_i != y__i)
        """
        self.hinge_loss = y * self.weighted_sum * self.is_mismatch
        return np.mean(self.hinge_loss)

    def _optimize(self, X, y):
        """Optimize by stochastic gradient descent."""
        m = X.shape[0]

        y_ = self._model(X)
        self.is_mismatch = np.not_equal(y, y_)
        dw = 1 / m * np.matmul(X.T, self.is_mismatch * y)
        db = np.mean(self.is_mismatch)

        for (param, grad) in zip([self.w, self.b], [dw, db]):
            param[:] = param + self.lr * grad

    def _fetch_batch(self):
        """Fetch batch dataset."""
        idx = list(range(self.n_examples))
        for i in range(0, self.n_examples, self.batch_size):
            idx_batch = idx[i:min(i + self.batch_size, self.n_examples)]
            yield (self.X_train.take(idx_batch, axis=0), 
                   self.y_train.take(idx_batch, axis=0))

    def fit(self):
        """Fit model."""
        self._create_weights()

        for epoch in range(self.n_epochs):
            total_loss = 0
            for X_train_b, y_train_b in self._fetch_batch():
                y_train_b = y_train_b.reshape((y_train_b.shape[0], -1))
                self._optimize(X_train_b, y_train_b)
                batch_loss = self._loss(y_train_b, self._model(X_train_b))
                total_loss += batch_loss * X_train_b.shape[0]

            if epoch % 100 == 0:
                print('epoch {0}: training loss {1}'
                      .format(epoch, total_loss / self.n_examples))

        return self

    def get_coeff(self):
        """Get model coefficients."""
        return self.b, self.w.reshape((-1,))

    def predict(self, X):
        """Predict for new data."""
        return self._model(X).reshape((-1,))

## Data Preparation and Preprocessing

In [51]:
import sklearn
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression as LogisticRegressionSklearn

import sys
sys.path.append('../numpy/')
from metrics import accuracy

In [52]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [53]:
# Read breast cancer data.
X, y = load_breast_cancer(return_X_y=True)

In [54]:
X.shape, y.shape

((569, 30), (569,))

In [55]:
X[:3]

array([[1.799e+01, 1.038e+01, 1.228e+02, 1.001e+03, 1.184e-01, 2.776e-01,
        3.001e-01, 1.471e-01, 2.419e-01, 7.871e-02, 1.095e+00, 9.053e-01,
        8.589e+00, 1.534e+02, 6.399e-03, 4.904e-02, 5.373e-02, 1.587e-02,
        3.003e-02, 6.193e-03, 2.538e+01, 1.733e+01, 1.846e+02, 2.019e+03,
        1.622e-01, 6.656e-01, 7.119e-01, 2.654e-01, 4.601e-01, 1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, 1.326e+03, 8.474e-02, 7.864e-02,
        8.690e-02, 7.017e-02, 1.812e-01, 5.667e-02, 5.435e-01, 7.339e-01,
        3.398e+00, 7.408e+01, 5.225e-03, 1.308e-02, 1.860e-02, 1.340e-02,
        1.389e-02, 3.532e-03, 2.499e+01, 2.341e+01, 1.588e+02, 1.956e+03,
        1.238e-01, 1.866e-01, 2.416e-01, 1.860e-01, 2.750e-01, 8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, 1.203e+03, 1.096e-01, 1.599e-01,
        1.974e-01, 1.279e-01, 2.069e-01, 5.999e-02, 7.456e-01, 7.869e-01,
        4.585e+00, 9.403e+01, 6.150e-03, 4.006e-02, 3.832e-02, 2.058e-02,
        2.250e-02, 4.571e-03, 2.357e

In [56]:
y[:3]

array([0, 0, 0])

In [57]:
np.unique(y)

array([0, 1])

In [58]:
y = y * 2 - 1

In [59]:
np.unique(y)

array([-1,  1])

In [60]:
# Split data into training and test datasets.
X_train_raw, X_test_raw, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=71, shuffle=True, stratify=y)

In [61]:
print(X_train_raw.shape, y_train.shape)
print(X_test_raw.shape, y_test.shape)

(426, 30) (426,)
(143, 30) (143,)


In [62]:
# Feature engineering for standardizing features by min-max scaler.
min_max_scaler = MinMaxScaler()

X_train = min_max_scaler.fit_transform(X_train_raw)
X_test = min_max_scaler.transform(X_test_raw)

In [63]:
# Convert arrays to float32.
X_train, X_test, y_train, y_test = (
    np.float32(X_train), np.float32(X_test), 
    np.float32(y_train), np.float32(y_test))

In [64]:
X_train.dtype, y_train.dtype

(dtype('float32'), dtype('float32'))

## Fitting Perceptron Classifier in NumPy

In [160]:
# Fit Perceptron Linear Classfier.
perceptron = PerceptronClassifier(batch_size=64, lr=0.01, n_epochs=1000)

In [161]:
# Get datasets and build graph.
perceptron.get_data(X_train, y_train, shuffle=True)

In [162]:
perceptron.fit()

epoch 0: training loss 0.0008866159369027201
epoch 100: training loss -0.0007857102767283937
epoch 200: training loss -0.0015027830668259063
epoch 300: training loss -0.0022024071171934515
epoch 400: training loss -0.0029043668498415012
epoch 500: training loss -0.0036208848776723523
epoch 600: training loss -0.004348338183782957
epoch 700: training loss -0.005053166245825901
epoch 800: training loss -0.0057505283119160784
epoch 900: training loss -0.0064534287864013


<__main__.PerceptronClassifier at 0x7f93e60db3c8>

In [163]:
# Get coefficient.
perceptron.get_coeff()

(array([[2.54125]]),
 array([-0.55529043, -0.59290708, -0.52942659, -0.35006117, -0.42639994,
        -0.17166483, -0.07923539, -0.21111116, -0.38296862, -0.11653929,
        -0.16367682, -0.1966969 , -0.14609987, -0.09415943, -0.17666762,
        -0.00944617,  0.02240947, -0.18020348, -0.07492088,  0.06488046,
        -0.50357032, -0.6535921 , -0.46371634, -0.28376743, -0.42778206,
        -0.17448865, -0.18052559, -0.46123491, -0.26020508, -0.12042708]))

In [164]:
# Predicted probabilities for training data.
y_train_ = perceptron.predict(X_train)
y_train_[:10]

array([ 1., -1., -1.,  1.,  1.,  1., -1., -1.,  1., -1.])

In [165]:
# Prediction accuracy for training data.
accuracy(y_train, y_train_)

0.9647887323943662

In [166]:
# Predicted label correctness for test data.
y_test_ = perceptron.predict(X_test)
print(y_test_[:10])

[-1.  1. -1.  1. -1.  1. -1.  1. -1. -1.]


In [167]:
# Prediction accuracy for test data.
accuracy(y_test, y_test_)

0.9370629370629371

## Benchmark with Sklearn's Perceptron Classifier

In [73]:
# Fit sklearn's Perceptron Classifier.
from sklearn.linear_model import Perceptron as PerceptronSklearn

perceptron_sk = PerceptronSklearn(max_iter=1000)

perceptron_sk.fit(X_train, y_train.reshape(y_train.shape[0], ))

Perceptron()

In [74]:
# Get coefficients.
perceptron_sk.intercept_, perceptron_sk.coef_

(array([15.]),
 array([[-0.72899743, -3.01555639, -0.7359542 , -2.43953361, -1.77268198,
          3.4363058 , -4.36588551, -5.86158106, -1.44860323,  2.08235057,
         -8.09292061, -0.0142108 , -6.45478977, -4.94032835,  2.64935014,
          4.17988278,  2.22296718,  1.77882101,  1.68285472,  3.90993372,
         -5.3023118 , -4.69802772, -4.73340373, -5.25614419, -3.47223126,
         -0.16961402, -4.70317868, -4.79027496, -3.04435263, -1.81434456]]))

In [75]:
# Predicted labels for training data.
y_train_ = perceptron_sk.predict(X_train)
y_train_[:3]

array([ 1., -1., -1.], dtype=float32)

In [76]:
# Prediction accuracy for training data.
accuracy(y_train, y_train_)

0.9835680751173709

In [77]:
# Predicted label correctness for test data.
y_test_ = perceptron_sk.predict(X_test)
y_test_[:3]

array([-1.,  1., -1.], dtype=float32)

In [78]:
# # Prediction accuracy for test data.
accuracy(y_test, y_test_)

0.972027972027972