# Test of bespoke neural network

Import necessary modules:

In [None]:
from classes.MyModel import MyModel
import torch
import torch.nn as nn
import math as math
from pathlib import Path
from torchviz import make_dot

from modules.helper_functions_tsp import (find_problem_size,
                                          find_distances_array,
                                          cost_fn_fact, 
                                          cost_fn_tensor, 
                                          hot_start, 
                                          hot_start_list_to_string)

from modules.config import GRAPH_DIR
from modules.helper_ML_functions import (find_device, 
                                         get_ready_to_train,
                                         train_model)

from modules.graph_functions import (plot_sine_activation,
                                     plot_model_training)

Set up constants:

In [None]:
LOCATIONS = 8                       #locations in problem
DECODING_FORMULATION = 'original'   #decoding formulation
                                    #options: 'original', 'new'
GRAY = False                        #gray code  
STD_DEV = 0.5                       #standard deviation for weight randomization
NUM_EPOCHS = 50                     #number of epochs for training
NUM_LAYERS = 2                      #number of layers in the mode
LR = 0.0001                         #Learning rate
VERBOSE = False                     #controls how much output the model produces
SHOTS = 64                          #size of input tensor.  Reduces randomness
MOMENTUM = 0.000                    #momentum for optimizer
WEIGHT_DECAY = 0.0002               #importance of L2 regularization in optimiser
OPTIMIZER = 'SGD'                   #optimizer to use
                                    #options: 'Adam', 'SGD', 'RMSprop
     

Read in data, report on data read and validate distance array:

In [None]:
distance_array, best_dist = find_distances_array(LOCATIONS, print_comments=True)

Evaluate cost function and clear cache:

In [None]:
cost_fn = cost_fn_fact(LOCATIONS, distance_array, GRAY, method = DECODING_FORMULATION, verbose=VERBOSE)
cost_fn.clear_cache()

Check if CUDA is available and set the device

In [None]:
device = find_device()
print(f"Using device: {device}")

Find problem size:

In [None]:
qubits = find_problem_size(LOCATIONS, DECODING_FORMULATION)
print(f'There are {qubits} qubits needed for {LOCATIONS} locations in the {DECODING_FORMULATION} formulation.')

Find a hot start using a lazy classical algorithm and find the quality of the hot start:

In [None]:
hot_start_list = hot_start(distance_array, LOCATIONS)
print(f'The hot start location list is {hot_start_list}')
bin_hot_start_list =  hot_start_list_to_string(hot_start_list, LOCATIONS, GRAY, DECODING_FORMULATION)
print(f'This is equivalent to a binary list: {bin_hot_start_list}')
bin_hot_start_list_tensor = torch.tensor([bin_hot_start_list])
print(f'bin_hot_start_list_tensor = {bin_hot_start_list_tensor}')
distance_tensor = cost_fn_tensor(bin_hot_start_list_tensor, cost_fn).clone().detach().requires_grad_(True)
print(f'The hot start distance is {float(distance_tensor):.2f}, compared to a best distance of {best_dist:.2f}.')

Set up input as repeated hot start tensor calculated above:

In [None]:
unrepeated_input = bin_hot_start_list_tensor.float().to(device)
my_input = unrepeated_input.repeat(SHOTS, 1).requires_grad_(True)

Set up model with gradient required:

In [None]:

model = MyModel(qubits, NUM_LAYERS, STD_DEV, cost_fn).to(device)
for param in model.parameters():
    param.requires_grad = True

Report on model parameters:

In [None]:
total_params = sum(
	param.numel() for param in model.parameters()
)
trainable_params = sum(
	p.numel() for p in model.parameters() if p.requires_grad
)

print(f'There are {total_params} parameters in total, of which {trainable_params} are trainable')

for name, param in model.named_parameters():
    print(f"Parameter {name} requires_grad: {param.requires_grad}")

Test the Sine activation function and print out a graph:

Plot the sine activation function:

In [None]:
plot_sine_activation()

Print out model details including graph

In [None]:
output = model(my_input)
lowest_cost = float(output)
print(f'Output = {output}')
filename = Path(GRAPH_DIR).joinpath('torchviz')

param_dict = dict(model.named_parameters())
print(param_dict)  # Debugging: print the parameters to ensure they seem reasonable
make_dot(output, params=param_dict).render(filename, format="png")
make_dot(output, params=param_dict)

Set up criterion, optimizer and target ready to train model.

In [None]:
target, criterion, optimizer = get_ready_to_train(model, OPTIMIZER, LR, WEIGHT_DECAY, momentum = MOMENTUM)

Train model and print out results:

In [None]:
lowest_cost, epoch_lowest_cost, epoch_hist, loss_hist, lowest_history = \
    train_model(NUM_EPOCHS,
                model, 
                my_input, 
                target, 
                criterion,
                optimizer,
                print_results=True)

In [None]:
print(f'The lowest cost found was {lowest_cost:.3f} at epoch {epoch_lowest_cost}.')
print(f'The best known cost is {best_dist:.3f} and the hot start cost was {float(distance_tensor):.3f}.')

Plot loss ratio by epoch:

In [None]:
plot_model_training(epoch_hist, loss_hist)


Print out parameters:

In [None]:
for layer in model.children():
    if isinstance(layer, nn.Linear):
        print('weight:', layer.weight)
        print('bias:', layer.bias)