In [999]:
# TODO: Fix imports
from typing import List
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris, load_breast_cancer, load_wine

In [1001]:
np.set_printoptions(precision=2)

In [1004]:
def column_matrix(array: List[float]):
  return np.array(array, ndmin=2).T

def random_matrix(rows: int, columns: int):
  return np.random.rand(rows, columns)

In [1008]:
class MultilayerPerception:
  def __init__(self, max_iter=200, learning_rate=0.001, tol=0.01, hidden_layer_sizes=(100,)):
    self.max_iterations = max_iter
    self.learning_rate = learning_rate
    self.error_tolerance = tol
    self.hidden_layer_sizes = hidden_layer_sizes

  def fit(self, dataset, targets):
    num_outputs, num_inputs = dataset.shape
    layers = [num_inputs] + list(self.hidden_layer_sizes) + [num_outputs]

    self.weights = [np.random.rand(layers[i], layers[i + 1]) for i in range(len(layers) - 1)]
    self.derivatives = [np.zeros((layers[i], layers[i + 1])) for i in range(len(layers) - 1)]
    self.activations = [np.zeros(layers[i]) for i in range(len(layers))]
    self.targets = targets

    error = 1
    training_iteration = 0
    while error > self.error_tolerance and training_iteration < self.max_iterations:
      for index, sample in enumerate(dataset):
        target = targets[index]
        output = self._feed_forward(sample)
        error = target - output

        self._back_propagate(error)
        self._gradient()

        error = self._mse(target, output)
      training_iteration += 1

  def predict(self, X):
    labels = list(set(self.targets))
    predictions = self._feed_forward(X)
    labeled_predictions = []

    for prediction in predictions:
      avg = np.average(prediction)
      labeled_predictions.append(min(labels, key=lambda x: abs(x - avg)))

    return labeled_predictions


  def _feed_forward(self, sample):
    activations = sample
    self.activations[0] = activations

    for index, weight in enumerate(self.weights):
        net_inputs = np.dot(activations, weight)
        activations = self._activate(net_inputs)
        self.activations[index + 1] = activations

    return activations

  def _back_propagate(self, error):
    for i in reversed(range(len(self.derivatives))):
      activations = self.activations[i+1]
      delta = error * self._activate(activations, derivate=True)
      delta_re = delta.reshape(delta.shape[0], -1).T
      
      current_activations = self.activations[i]
      current_activations = current_activations.reshape(current_activations.shape[0],-1)

      self.derivatives[i] = np.dot(current_activations, delta_re)

      error = np.dot(delta, self.weights[i].T)

  def _gradient(self):
    # TODO: Enumerate
    for i in range(len(self.weights)):
      derivatives = self.derivatives[i]
      self.weights[i] += derivatives * self.learning_rate
  
  def _activate(self, sample, derivate=False):
    sigmoid = lambda x: 1/(1 + np.exp(-x))
    dsigmoid = lambda y: y * (1 - y)

    return sigmoid(sample) if derivate == False else dsigmoid(sample)

  def _mse(self, target, output):
    return np.average((target - output) ** 2)


In [1013]:
def test(dataset, dataset_name: str):
  X_train, X_test, y_train, y_test = train_test_split(dataset['data'], dataset["target"], random_state=42, test_size=0.3)

  test_hidden_layer_sizes = [(100,), (100, 200), (50, 50, 50)]

  for hidden_layer_sizes in test_hidden_layer_sizes:
    mlp = MultilayerPerception(hidden_layer_sizes=hidden_layer_sizes)
    mlp.fit(X_train, y_train)

    predictions = mlp.predict(X_test)
    correct_count = 0
    for index, p in enumerate(predictions):
      if p == y_test[index]:
        correct_count += 1

    print(f'{dataset_name} -> score is {correct_count / X_test.shape[0]} with hidden layers {hidden_layer_sizes}')

In [1017]:
iris_data = load_iris()
wine_data = load_wine()
cancer_data = load_breast_cancer()
datasets = [(iris_data, 'Iris dataset'), (wine_data, 'Wine dataset'), (cancer_data, 'Breast cancer dataset')]

for (dataset, dataset_name) in datasets:
  test(dataset, dataset_name)


Iris dataset -> score is 0.28888888888888886 with hidden layers (100,)
Iris dataset -> score is 0.28888888888888886 with hidden layers (100, 200)
Iris dataset -> score is 0.28888888888888886 with hidden layers (50, 50, 50)
Wine dataset -> score is 0.3888888888888889 with hidden layers (100,)
Wine dataset -> score is 0.3888888888888889 with hidden layers (100, 200)
Wine dataset -> score is 0.3888888888888889 with hidden layers (50, 50, 50)
Breast cancer dataset -> score is 0.631578947368421 with hidden layers (100,)
Breast cancer dataset -> score is 0.631578947368421 with hidden layers (100, 200)
Breast cancer dataset -> score is 0.631578947368421 with hidden layers (50, 50, 50)
