# Test for Zeroth Order Methods

This file tests the zeroth order methods to ensure they are functioning.

How to init all algo with same weights
Hyperparm: Hidden neurons - N - Mu
Algo: std/zeroth sigmoid/relu
Figures (with colours), to be saved

### Imports

In [None]:
import os
import sys

p = os.path.abspath('../src/')
if p not in sys.path:
    sys.path.append(p)

In [None]:
import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt

from difference_methods import one_point_estimate, two_point_estimate, coordinate_estimate
from linear_zeroth_function import Linear_Zeroth
from sigmoid_zeroth_function import Sigmoid_Zeroth
from relu_zeroth_function import ReLU_Zeroth

## Test Difference Methods

Test using ```torch.nn.functional.linear```, ```torch.sigmoid``` and ```torch.nn.functional.relu``` as the functions to differentiate:

In [None]:
# Setup testing functions
weight = torch.Tensor([[1.0,-2.0],[-3.0,4.0],[5.0,-6.0]])
bias = torch.Tensor([[2.5, 0.0, -2.5]])
test_f_0 = lambda x: torch.nn.functional.linear(x, weight, bias)

test_f_1 = torch.sigmoid

test_f_2 = torch.nn.functional.relu

# Setup values
fs = [test_f_0, test_f_1, test_f_2]
xs = [torch.Tensor([[0.0, 0.0]]), torch.Tensor([[0.0, 0.0], [1.0, -2.5]])]
methods = [('one', one_point_estimate), ('two', two_point_estimate), ('coord', coordinate_estimate)]
mus = [0.1] #, 2.0]
ns = [1000] #, 100]

# Iterate over values
for i, f in enumerate(fs):
    print(f"test_f_{i}\n========")
    for x in xs:
        print(f"{x = }\n---------")
        # o = f(x)
        # print(o)
        for name, method in methods:
            for mu in mus:
                if name == 'coord':
                    estimate = method(f, x, mu)
                    print(f"{name}({mu}) = {estimate}")
                else:
                    for n in ns:
                        estimate = method(f, x, mu, n)
                        print(f"{name}({mu},{n}) = {estimate}")
        print(f"---------")
    if i < 2:
        print("")

## Test Zeroth Order Modules and Functions

We create a small and simple neural network to test the functionality of the zeroth order variants of the linear, sigmoid, and relu modules/functions. We run a quick test on the flower dataset.

### Data

In [None]:
# load training data
data_train = pd.read_csv('../data/iris_train.csv')

print(data_train)

In [None]:
# convert string target values to numeric values
#       class 0: Iris-setosa
#       class 1: Iris-versicolor
#       class 2: Iris-virginica
data_train['species'] = data_train['species'].map({'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2})

# also convert all string numeric values to int ['2' -> 2]
data_train = data_train.apply(pd.to_numeric)

# extract frequency of each species class
class_freq = data_train['species'].value_counts()
class_freq = list(class_freq.sort_index())

# Visual data
graph = plt.bar(list(range(0,3)), class_freq)
plt.xticks(list(range(0,3)))
plt.ylabel('Frequency')
plt.xlabel('Species')
plt.title('Training Data')

plt.show()

In [None]:
# Process data for training

# convert pandas dataframe to array
data_train_array = data_train.values

# split x (features) and y (targets)
x_array = data_train_array[:, :4]
y_array = data_train_array[:, 4]

# tensorify
X = torch.tensor(x_array, dtype=torch.float)
Y = torch.tensor(y_array, dtype=torch.long)

print(X.shape)

### Network Setup

In [None]:
# Standard Constants
INPUT_NEURONS = 4
HIDDEN_NEURONS = 10
OUTPUT_NEURONS = 3

# Zeroth Constants
DIFFERENCE_METHOD = 'coord'
MU = 0.001
N = 100

In [None]:
# Linear Networks

# Normal
standard_linear = torch.nn.Sequential(
    torch.nn.Linear(INPUT_NEURONS, OUTPUT_NEURONS),
)
# Zeroth Order
zeroth_linear = torch.nn.Sequential(
    Linear_Zeroth(INPUT_NEURONS, OUTPUT_NEURONS, bias=False, difference_method=DIFFERENCE_METHOD, mu=MU, n=N)
)

# Sigmoid

# Normal network
standard_sigmoid = torch.nn.Sequential(
    torch.nn.Linear(INPUT_NEURONS, HIDDEN_NEURONS),
    torch.nn.Sigmoid(),
    torch.nn.Linear(HIDDEN_NEURONS, OUTPUT_NEURONS)
)
# Zeroth Order only sigmoid
partial_sigmoid = torch.nn.Sequential(
    torch.nn.Linear(INPUT_NEURONS, HIDDEN_NEURONS),
    Sigmoid_Zeroth(difference_method=DIFFERENCE_METHOD, mu=MU, n=N),
    torch.nn.Linear(HIDDEN_NEURONS, OUTPUT_NEURONS)
)
# Full Zeroth Order
zeroth_sigmoid = torch.nn.Sequential(
    Linear_Zeroth(INPUT_NEURONS, HIDDEN_NEURONS, bias=False, difference_method=DIFFERENCE_METHOD, mu=MU, n=N),
    Sigmoid_Zeroth(difference_method=DIFFERENCE_METHOD, mu=MU, n=N),
    Linear_Zeroth(HIDDEN_NEURONS, OUTPUT_NEURONS, bias=False, difference_method=DIFFERENCE_METHOD, mu=MU, n=N)
)

# ReLU

# Normal network
standard_relu = torch.nn.Sequential(
    torch.nn.Linear(INPUT_NEURONS, HIDDEN_NEURONS),
    torch.nn.ReLU(),
    torch.nn.Linear(HIDDEN_NEURONS, OUTPUT_NEURONS)
)
# Zeroth Order only ReLU
partial_relu = torch.nn.Sequential(
    torch.nn.Linear(INPUT_NEURONS, HIDDEN_NEURONS),
    ReLU_Zeroth(difference_method=DIFFERENCE_METHOD, mu=MU, n=N),
    torch.nn.Linear(HIDDEN_NEURONS, OUTPUT_NEURONS)
)
# Full Zeroth Order
zeroth_relu = torch.nn.Sequential(
    Linear_Zeroth(INPUT_NEURONS, HIDDEN_NEURONS, bias=False, difference_method=DIFFERENCE_METHOD, mu=MU, n=N),
    ReLU_Zeroth(difference_method=DIFFERENCE_METHOD, mu=MU, n=N),
    Linear_Zeroth(HIDDEN_NEURONS, OUTPUT_NEURONS, bias=False, difference_method=DIFFERENCE_METHOD, mu=MU, n=N),
)

networks = [
    ("standard_linear", standard_linear),
    ("zeroth_linear", zeroth_linear),

    ("standard_sigmoid", standard_sigmoid),
    ("partial_sigmoid", partial_sigmoid),
    ("zeroth_sigmoid", zeroth_sigmoid),

    ("standard_relu", standard_relu),
    ("partial_relu", partial_relu),
    ("zeroth_relu", zeroth_relu),
]

### Train and Plot Losses

In [None]:
# Training Constants
LR = 0.01
NUM_EPOCH = 500

# Loss function
loss_func = torch.nn.CrossEntropyLoss()

# Train each network and plot the loss
for name, network in networks:
    print(f"Network: {name}\n=========")
    # Optimiser
    optimiser = torch.optim.SGD(network.parameters(), lr=LR)

    # store all losses for visualisation
    all_losses = []

    # train a neural network
    for epoch in range(NUM_EPOCH):
        # Perform forward pass
        Y_pred = network(X)
        # Compute loss
        loss = loss_func(Y_pred, Y)
        all_losses.append(loss.item())

        # print progress
        if epoch % 50 == 0:
            # convert three-column predicted Y values to one column for comparison
            _, predicted = torch.max(torch.nn.functional.softmax(Y_pred,1), 1)
            # calculate and print accuracy
            total = predicted.size(0)
            correct = predicted.data.numpy() == Y.data.numpy()
            # Print
            print('Epoch [%d / %d] Loss: %.4f  Accuracy: %.2f %%'
                % (epoch + 1, NUM_EPOCH, loss.item(), 100 * sum(correct)/total))

        # Clear the gradients before running the backward pass.
        network.zero_grad()
        # Perform backward pass
        loss.backward()
        # Step optimiser
        optimiser.step()

    # Plot
    plt.figure()
    plt.plot(all_losses)
    plt.show()