# Personal Information
- Name: **Fernando Martinez**
- Student ID: **A19737173**

## 1. Implement Neural Network from scratch

In [11]:
# Implementation
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import warnings;   warnings.filterwarnings("ignore")

class Layer:
    # Initialize weights. By default I used a normal distribution
    def __init__(self, input_size, out_size, activation_func):
        self.activation_func = activation_func
        self.W = np.random.randn(out_size, input_size)
        self.b = np.random.randn(self.W.shape[0],1)
    # List of activation functions with their corresponding gradients
    def activation_function(self, z): 
        if self.activation_func == 'relu':
            result = np.maximum(0, z)
            dresult = np.where(z <= 0, 0, 1)
        if self.activation_func == 'tanh':
            result = np.tanh(z)
            dresult = 1 - result**2
        if self.activation_func == 'softmax':
            result = np.exp(z)/np.sum(np.exp(z),axis=0)
            dresult = None
        if self.activation_func == 'sigmoid':
            result = 1/(1 + np.exp(-z))
            dresult = result * (1 - result)
        return result, dresult
    # Forward propagation process in layer
    def forward_propagation(self, input):
        self.input = input
        self.z = np.dot(self.W, np.transpose(self.input)) + self.b
        self.a, _ = self.activation_function(self.z)
        self.a = np.transpose(self.a)
        return self.a
    # Backward propagation process in layer
    def backward_propagation(self, gradients_input, learning_rate):
      self.gradients_input = gradients_input
      if self.activation_func == 'softmax':
          self.dw = np.dot(self.gradients_input.T, self.input)
          self.db = np.sum(self.gradients_input, axis=0).reshape(-1,1)
          gradients_output = np.dot(self.gradients_input, self.W)
      else:
          _, self.dz = self.activation_function(self.z)
          self.dz = np.transpose(self.gradients_input) * self.dz
          self.dw = np.dot(self.dz, self.input)
          self.db = np.sum(self.dz, axis=1).reshape(-1,1)
          gradients_output = np.dot(np.transpose(self.dz), self.W)
      self.W -= learning_rate * self.dw
      self.b -= learning_rate * self.db
      return gradients_output
        
class NN:
    def __init__(self, architecture, loss_type='CrossEntropy', metric='Accuracy',learning_rate_pool=[0.001,0.002,0.0015]):
        self.architecture = architecture # NN architecture to use
        self.loss_type = loss_type # Type of loss function
        self.learning_rate = np.random.choice(learning_rate_pool) # random sampling across list of different learning rates
        self.metric = metric # By default accuracy
        self.loss_log = []; self.metric_log = [] # Store loss and metric results

    # To make predictions we will run forward prop
    def predict(self, input):
        output = input
        for layer in self.architecture:
            output = layer.forward_propagation(output)
        return output
    # Training process
    def train(self, X_train, y_train, epochs, batch_size):
        for epoch in range(epochs):
            print(f"Epoch: {epoch}...")
            
            for i in range(0,X_train.shape[0],batch_size):
                data, target = X_train[i:i+batch_size],y_train[i:i+batch_size]
                # forward propagation
                output = self.predict(data)
                # error
                _, gradient = self.loss(y=target, yhat=output)

                #backpropagation
                for Layer in reversed(self.architecture):
                    gradient = Layer.backward_propagation(gradient, self.learning_rate)

            loss = self.loss(y_train, self.predict(X_train))[0]
            self.loss_log.append(loss)
            metric_result = self.accuracy(X_train, y_train)
            self.metric_log.append(metric_result)
            print(f"Train {self.metric}: {metric_result}")

    # Loss function
    def loss(self, y, yhat):
        # Multiclass problems 
        if self.loss_type == 'CrossEntropy':
            CrossEntropy = -np.sum(y * np.log(yhat), axis=1)
            gradient =  (yhat - y)/len(yhat)
            total_CrossEntropy = np.mean(CrossEntropy)
        # For Binary problems (0 or 1) 
        if self.loss_type == 'BinaryCrossEntropy':
            CrossEntropy = np.sum(-np.expand_dims(y, axis=1)*np.log(yhat) - (1-np.expand_dims(y, axis=1))*np.log(1-yhat))
            gradient =  (-np.expand_dims(y, axis=1)/yhat + (1-np.expand_dims(y, axis=1))/(1-yhat))/len(yhat)
            total_CrossEntropy = np.mean(CrossEntropy)
        return total_CrossEntropy, gradient
    # Accuracy metric by hand    
    def accuracy(self, X, y):
        return np.mean(np.round(self.predict(X)) == np.round(np.expand_dims(y, axis=1)))

In [16]:
# Standard Scaler made from scratch
class StandardScaler:
    # Standard Scaler
    def fit(self, X):
        self.sample_mean = np.mean(X, axis=0).values
        self.sample_std = np.std(X, axis=0).values

    def fit_transform(self, X):
        self.sample_mean = np.mean(X, axis=0).values
        self.sample_std = np.std(X, axis=0).values
        return np.array((X - self.sample_mean)/self.sample_std)

data_path = "data.csv"
data = pd.read_csv(data_path, header=None)
X = data.iloc[:,0:2]
y = data.iloc[:,-1]

scaler_HandCrafted = StandardScaler()
X_scaled = scaler_HandCrafted.fit_transform(X)
X_train, x_validation, y_train, y_validation = train_test_split(X_scaled, y,test_size = 0.2)

# # training neural network
nn = NN(architecture=[Layer(2, 64, activation_func='relu'), 
                      Layer(64, 1, activation_func='sigmoid')], 
        loss_type="BinaryCrossEntropy")
nn.train(X_train, y_train, epochs=50, batch_size=1) #Stochastic Gradient descent

Epoch: 0...
Train Accuracy: 0.6125
Epoch: 1...
Train Accuracy: 0.7125
Epoch: 2...
Train Accuracy: 0.7375
Epoch: 3...
Train Accuracy: 0.75
Epoch: 4...
Train Accuracy: 0.7875
Epoch: 5...
Train Accuracy: 0.8
Epoch: 6...
Train Accuracy: 0.825
Epoch: 7...
Train Accuracy: 0.8375
Epoch: 8...
Train Accuracy: 0.8625
Epoch: 9...
Train Accuracy: 0.8625
Epoch: 10...
Train Accuracy: 0.8875
Epoch: 11...
Train Accuracy: 0.8875
Epoch: 12...
Train Accuracy: 0.9
Epoch: 13...
Train Accuracy: 0.9
Epoch: 14...
Train Accuracy: 0.9
Epoch: 15...
Train Accuracy: 0.9125
Epoch: 16...
Train Accuracy: 0.9125
Epoch: 17...
Train Accuracy: 0.9125
Epoch: 18...
Train Accuracy: 0.9125
Epoch: 19...
Train Accuracy: 0.9125
Epoch: 20...
Train Accuracy: 0.925
Epoch: 21...
Train Accuracy: 0.925
Epoch: 22...
Train Accuracy: 0.925
Epoch: 23...
Train Accuracy: 0.925
Epoch: 24...
Train Accuracy: 0.925
Epoch: 25...
Train Accuracy: 0.925
Epoch: 26...
Train Accuracy: 0.925
Epoch: 27...
Train Accuracy: 0.9375
Epoch: 28...
Train Accur

### Results

In [17]:
# Accuracy on test/validation data
print(f"My implementation's accuracy: {nn.accuracy(x_validation, y_validation)}")

My implementation's accuracy: 0.9


# 2. TensorFlow/Keras Implementation

In [22]:
import tensorflow as tf
model_tf = tf.keras.Sequential([tf.keras.layers.Dense(64, activation="relu"),
                            tf.keras.layers.Dense(1, activation="sigmoid")])
model_tf.compile(loss=tf.keras.losses.BinaryCrossentropy(),
                optimizer=tf.keras.optimizers.SGD(),
                metrics=["accuracy"])
history = model_tf.fit(X_train, np.squeeze(y_train), epochs=50, validation_data=(x_validation, np.squeeze(y_validation)))

Epoch 1/50

2022-12-10 18:09:46.707944: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/50
Epoch 3/50
Epoch 4/50

2022-12-10 18:09:46.963355: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


### Results

In [23]:
# Accuracy
print(f"TensorFlow accuracy: {model_tf.evaluate(x_validation, y_validation)[1]}")

TensorFlow accuracy: 0.9000000357627869


**Results**:

Both models, the architecture made from scratch and the one from the Tensorflow/Keras implementation, with the same configuration, achieve the same validation performance, resulting in 90% accuracy. 