# Imports

In [1]:
import pandas as pd
import math
import numpy as np
from sklearn.metrics import accuracy_score

# Question 3: Neural Networks

## Part A: Custom Implementation

Let's begin with the dataset given in the PDF:

In [2]:
feat1 = np.array([4, 4, 1, 2.5, 4.9, 1.9, 3.5, 0.5, 2, 4.5])
feat2 = np.array([2.9, 4, 2.5, 1, 4.5, 1.9, 4, 1.5, 2.1, 2.5])
labels = np.array([1, 1, -1, -1, 1, -1, 1, -1, -1, 1])
df = pd.DataFrame(data=dict({
    "x1": feat1,
    "x2": feat2,
    "y_true": labels 
}))
df.head(10)

Unnamed: 0,x1,x2,y_true
0,4.0,2.9,1
1,4.0,4.0,1
2,1.0,2.5,-1
3,2.5,1.0,-1
4,4.9,4.5,1
5,1.9,1.9,-1
6,3.5,4.0,1
7,0.5,1.5,-1
8,2.0,2.1,-1
9,4.5,2.5,1


In [None]:
from dataclasses import dataclass


def sigmoid(x: np.array) -> np.array:
    """
    Given an array of activation values, we return 
    an array of probabilities between 0-1. 

    They will NOT necessarily add up to 1.

    This is mainly intended for classification problems.
    """
    return 1.0 / (1.0 + np.exp(-x))


@dataclass
class BinaryClassificationMLP:
    num_layers: int
    units_per_layer: np.ndarray
    num_features: int = None
    threshold: float = 0.5
    classes: np.ndarray = np.array([0, 1])

    def define_model(self, num_features: int) -> None:
        '''Glorot weight initialization'''
        self.layers = list()
        fan_in = self.num_features
        fan_out_index = 0
        for layer_index in range(self.num_layers):
            # initialize using a randomly sampled uniform distribution
            fan_out = self.units_per_layer[fan_out_index]
            scale = max(1.0, (fan_in + fan_out) / 2.0)
            limit = math.sqrt(3.0 * scale)
            layer_weights = np.random.uniform(low=-limit, high=limit, size=(fan_in, fan_out))
            # biases - initialize to zero, b/c who cares
            layer_bias = np.zeros((fan_out, 1))
            # activation - use tanh for hidden layers, and sigmoid for the last one
            activation = np.tanh
            if layer_index == self.num_layers - 1:
                activation = sigmoid
            # add to the list, and prep for next iteration
            self.layers.append((layer_weights, layer_bias, activation))
            fan_in = fan_out
            fan_out_index += 1

    def forward(self):
        ... # TODO

    def backward(self):
        pass # TODO

    def fit(self, X_train: np.array, y_train: np.array,
            epochs=1000, learning_rate=0.0001) -> None:

        # A: initial state of the network
        num_samples, num_features = X_train.shape
        self.define_model(num_features)

        # B: training!
        for _ in range(epochs):
            activations = self.forward(X_train)
            derivatives = self.backward(activations)

            # TODO: update the weight matrix
            ...

    def predict(self, X) -> np.ndarray:
        activations = self.forward(X)
        class1, class2 = self.classes
        # TODO: make sure you're getting output of the last layer
        y_pred = np.where(activations[:][-1] >= self.threshold, class2, class1)
        return y_pred


In [None]:
# hyperparams for the model
num_layers = 2
units_per_layer = [5, 1]
num_features = 2
classes = np.array([-1, 1])