<a href="https://colab.research.google.com/github/fboldt/aulasann/blob/main/aula06c_real_word_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

X, y = load_digits(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [2]:
def sign(a):
  return (a>=0)*2-1

In [3]:
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.metrics import accuracy_score

In [4]:
import numpy as np
include_bias = lambda X: np.c_[np.ones(X.shape[0]), X]

In [5]:
from scipy.special import expit

def tanh(a):
  return expit(a)*2-1

In [6]:
class BackPropagation():
  def __init__(self, max_iter=1000, learning_rate=0.01, n_hidden=[2]):
    self.max_iter = max_iter
    self.learning_rate = learning_rate
    self.n_hidden = n_hidden

  def foward(self, X):
    self.A = []
    self.Z = []
    AUX = X.copy()
    for W in self.Ws:
      self.A.append(include_bias(AUX))
      self.Z.append(self.A[-1] @ W)
      AUX = tanh(self.Z[-1])
    return AUX

  def backward(self, Xb, y, ypred):
    grads = []
    output_delta = y - ypred
    grads.insert(0, self.A[-1].T @ output_delta)
    for i in range(len(self.Ws)-1, 0, -1):
      tanh_grad = (1 - np.square(tanh(self.Z[i-1])))
      input_delta = (output_delta @ self.Ws[i][1:, :].T) * tanh_grad
      grads.insert(0, self.A[i-1].T @ input_delta)
      output_delta = input_delta.copy()
    for i in range(len(self.Ws)-1, -1, -1):
      self.Ws[i] += grads[i] * self.learning_rate

  def getWs(self, X, y):
    self.Ws = []
    previous_output = X.shape[1]
    for layer_size in self.n_hidden:
      self.Ws.append(np.random.uniform(-1, 1, size=(previous_output+1, layer_size)))
      previous_output = layer_size
    if len(y.shape) == 1:
      y = y.reshape(-1, 1)
    self.Ws.append(np.random.uniform(-1, 1, size=(previous_output+1, y.shape[1])))
    for _ in range(self.max_iter):
      ypred = self.foward(X)
      self.backward(X, y, ypred)
    return self.Ws



In [7]:
from sklearn.preprocessing import LabelBinarizer

class MLP(BaseEstimator, ClassifierMixin):
  def __init__(self, trainingAlgorithm=BackPropagation()):
    self.trainingAlgorithm = trainingAlgorithm

  def fit(self, X, y):
    self.labelBinarizer = LabelBinarizer()
    y = self.labelBinarizer.fit_transform(y)
    y = y * 2 - 1
    self.w = self.trainingAlgorithm.getWs(X, y)
    return self

  def predict(self, X):
    ypred = self.trainingAlgorithm.foward(X)
    if ypred.shape[1] == 1:
      ypred = sign(ypred)
    else:
      tmp = np.zeros(ypred.shape)
      idx = np.argmax(ypred, axis=1)
      tmp[np.arange(ypred.shape[0]), idx] = 1
      ypred = tmp
    ypred = self.labelBinarizer.inverse_transform(ypred)
    return ypred



In [8]:
model = MLP(trainingAlgorithm=BackPropagation(max_iter=1000, learning_rate=0.0001, n_hidden=[128]))
model.fit(X_train, y_train)
ypred = model.predict(X_train)
print(accuracy_score(y_train, ypred))


0.9986082115518441


In [9]:
ypred = model.predict(X_test)
print(accuracy_score(y_test, ypred))


0.9722222222222222


In [10]:
from sklearn.datasets import fetch_olivetti_faces
X, y = fetch_olivetti_faces(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [11]:
model = MLP(trainingAlgorithm=BackPropagation(max_iter=1000, learning_rate=0.0001, n_hidden=[]))
model.fit(X_train, y_train)
ypred = model.predict(X_train)
print(accuracy_score(y_train, ypred))

0.990625


In [12]:
ypred = model.predict(X_test)
print(accuracy_score(y_test, ypred))

0.7125
