### Imports

In [None]:
import os
import sys

p = os.path.abspath('../src/')
if p not in sys.path:
    sys.path.append(p)

In [None]:
import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt

### Data preprocessing

In [None]:
# Load training data
data_train = pd.read_csv('../data/iris_train.csv')

print(data_train)

In [None]:
# Convert string target values to numeric values
#       class 0: Iris-setosa
#       class 1: Iris-versicolor
#       class 2: Iris-virginica
data_train['species'] = data_train['species'].map({'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2})

# also convert all string numeric values to float ['2' -> 2.0]
data_train = data_train.apply(pd.to_numeric)

# extract frequency of each species class
class_freq = data_train['species'].value_counts()
class_freq = list(class_freq.sort_index())

# Visual data
graph = plt.bar(list(range(0,3)), class_freq)
plt.xticks(list(range(0,3)))
plt.ylabel('Frequency')
plt.xlabel('Species')
plt.title('Training Data')

plt.show()

In [None]:
# Preprocess data for training

# Convert pandas dataframe to array
data_train_array = data_train.values

# Split x (features) and y (targets)
x_array = data_train_array[:, :4]
y_array = data_train_array[:, 4]

# Tensorify
X = torch.tensor(x_array, dtype=torch.float)
Y = torch.tensor(y_array, dtype=torch.long)

print(X.shape)

### Network Setup

In [None]:
# Network architecture (MLP, 1 hidden layer): layer sizes
# Note: will stick to 1 hidden layer for iris, will set up nb of hidden layers as hyperparam for larger dataset
INPUT_NEURONS = 4
hidden_neurons_range = [10] # hidden layer 1
OUTPUT_NEURONS = 3

In [None]:
# Network architecutre: activation functions

# Normal network with sigmoid
def create_standard_sigmoid_network(hidden_neurons):
    return torch.nn.Sequential(
        torch.nn.Linear(INPUT_NEURONS, hidden_neurons),
        torch.nn.Sigmoid(),
        torch.nn.Linear(hidden_neurons, OUTPUT_NEURONS)
    )

# Normal network with relu
def create_standard_relu_network(hidden_neurons):
    return torch.nn.Sequential(
        torch.nn.Linear(INPUT_NEURONS, hidden_neurons),
        torch.nn.ReLU(),
        torch.nn.Linear(hidden_neurons, OUTPUT_NEURONS)
    )

network_funs = [
    ("standard_sigmoid", create_standard_sigmoid_network),
    ("standard_relu", create_standard_relu_network),
]

### Train and Plot Losses

In [None]:
# Training hyperparameters
# Training constants
lr_range = [rate for rate in np.logspace(-4,-1,num=5,base=10.0)]
NUM_EPOCH = 500

# Loss function -> could be hyperparam too
loss_func = torch.nn.CrossEntropyLoss()

In [None]:
# Train each network and plot the loss
for name, network_fun in network_funs:
    for hidden_neurons in hidden_neurons_range:
        for lr in lr_range:
            print(f"Network: {name}, HIDDEN_NEURONS: {hidden_neurons}, LR: {lr} \n=========")

            # Create new instance of network
            network = network_fun(hidden_neurons)

            # Optimiser
            optimiser = torch.optim.SGD(network.parameters(), lr=lr)

            # store all losses for visualisation
            all_losses = []

            # train a neural network
            for epoch in range(NUM_EPOCH):
                # Perform forward pass
                Y_pred = network(X)
                # Compute loss
                loss = loss_func(Y_pred, Y)
                all_losses.append(loss.item())

                # print progress
                if epoch % 50 == 0:
                    # convert three-column predicted Y values to one column for comparison
                    _, predicted = torch.max(torch.nn.functional.softmax(Y_pred,1), 1)
                    # calculate and print accuracy
                    total = predicted.size(0)
                    correct = predicted.data.numpy() == Y.data.numpy()
                    # Print
                    print('Epoch [%d / %d] Loss: %.4f  Accuracy: %.2f %%'
                        % (epoch + 1, NUM_EPOCH, loss.item(), 100 * sum(correct)/total))

                # Clear the gradients before running the backward pass.
                network.zero_grad()
                # Perform backward pass
                loss.backward()
                # Step optimiser
                optimiser.step()

            # Plot
            plt.figure()
            plt.plot(all_losses)
            plt.xlabel("Epoch")
            plt.ylabel("Loss")
            plt.show()