# Testing av koden

For å se om alt fungerer som det skal, har vi implementert tester av forskjellige lag og funksjoner.
Vi bruker blandt annet den innebgydde funksjonen assert som sjekker om påstanden til venstre er korrekt, dersom den ikke er det returnerer den AssertionError med kommentaren til høyre for komma. 

For eksempel har vi:
assert X.shape == (b,m,n), f"X.shape={X.shape}, expected {(b,m,n)}"
Dersom X har dimensjon (b,m,n) vil koden kjøre normalt, dersom den ikke har det vil den returnere AssertionError og "X.shape = {X.shape}, expected {(b,m,n)}"

In [1]:
from layers import *
from neural_network import NeuralNetwork
from utils import _jit_onehot, onehot
import numpy as np
from optimizer import Adam

In [2]:
# We choose some arbitrary values for the dimensions
b = 6
n_max = 7
m = 8
n = 5

d = 10
k = 5
p = 20

#r = 3
#L_1 = 2

# Create an arbitrary dataset
x = np.random.randint(0, m, (b,n))
y = np.random.randint(0, m, (b,n_max))

# initialize the layers
feed_forward = FeedForward(d,p)
self_attention = SelfAttention(d,k)
embed_pos = EmbedPosition(n_max,m,d)
un_embed = LinearLayer(d,m, True)
softmax = Softmax()
transformerblock = TransformerBlock(d,k,p)


# a manual forward pass
X = _jit_onehot(x, m)  # (b, m, n)
z0 = embed_pos.forward(X)
z1 = feed_forward.forward(z0)
z2 = self_attention.forward(z1)
z3 = un_embed.forward(z2)
Z = softmax.forward(z3) 

print(f'z0 dim: {z0.shape}')
print(f'z1 dim: {z1.shape}')
print(f'z2 dim: {z2.shape}')
print(f'z3 dim: {z3.shape}')
print(f'Z dim: {Z.shape}')


# Check the shapes
assert X.shape == (b,m,n), f'X.shape={X.shape}, expected {(b,m,n)}'
assert z0.shape == (b,d,n), f'z0.shape={z0.shape}, expected {(b,d,n)}'
assert z1.shape == (b,d,n), f'z1.shape={z1.shape}, expected {(b,d,n)}'
assert z2.shape == (b,d,n), f'z2.shape={z2.shape}, expected {(b,d,n)}'
assert z3.shape == (b,m,n), f'z3.shape={z3.shape}, expected {(b,m,n)}'
assert Z.shape == (b,m,n), f'Z.shape={Z.shape}, expected {(b,m,n)}'

#is X one-hot?
assert X.sum() == b*n, f'X.sum()={X.sum()}, expected {b*n}'


assert np.allclose(Z.sum(axis=1), 1), f'Z.sum(axis=1)={Z.sum(axis=1)}, expected {np.ones(b)}'
assert np.abs(Z.sum() - b*n) < 1e-5, f'Z.sum()={Z.sum()}, expected {b*n}'
assert np.all(Z>=0), f'Z={Z}, expected all entries to be non-negative'


OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


z0 dim: (6, 10, 5)
z1 dim: (6, 10, 5)
z2 dim: (6, 10, 5)
z3 dim: (6, 8, 5)
Z dim: (6, 8, 5)


In [7]:
# y_pred: (batch, m, n)
# y_true: (batch, m, n)
# m = number of classes

# test the forward pass

x = np.random.randint(0, m, (b,n))
print(f'shape of x {x.shape}')
X = jit_onehot(x, m) # dim (b,m,n)
print(f'The dimension of X is: {X.shape}')

# we test with a y that is shorter than the maximum length
n_y = n_max - 1
y = np.random.randint(0, m, (b, n_y))
print(f'The dimension of y is: {y.shape}')


# initialize a neural network based on the layers above
network = NeuralNetwork(r=2, d=10, m=2, L=2, p=20, k=5)

# and a loss function
loss = CrossEntropy()

# do a forward pass
Z = network.forward(X)
print(f'The dimension of Z is: {Z.shape}')

# compute the loss
L = loss.forward(Z, y)

# get the derivative of the loss wrt Z
grad_Z = loss.backward()

# Does not work on our network because we did not save our gradient in a array and perform a backward pass
_ = network.backward(grad_Z)

#and and do a gradient descent step

_ = network.step_gd(Adam())


shape of x (6, 5)
The dimension of X is: (6, 8, 5)
The dimension of y is: (6, 6)


AssertionError: 

In [2]:
"""
Here you may add additional tests to for example:

- Check if the ['d'] keys in the parameter dictionaries are not None, or receive something when running backward pass
- Check if the parameters change when you perform a gradient descent step
- Check if the loss decreases when you perform a gradient descent step

This is voluntary, but could be useful.
"""

import numpy as np
from tqdm import trange
from typing import Any
import layers as l
from optimizer import Adam
from utils import *
from training import make_model, training_sorting, training_addition
from data_generators import get_train_test_sorting


def get_test_data():
   data_set = get_train_test_sorting(length=5, num_ints=2, samples_per_batch=250, n_batches_train=10, n_batches_test=2)
   train_set = list(zip(data_set['x_train'], data_set['y_train']))
   return train_set


def test_forward_shape():
   model = make_model()

   train_set = get_test_data()

   batch_x = onehot(train_set[0][0], 2)
   out = model.forward(batch_x)
   assert out.shape == (250, 2, 9)


def test_backward():
   model = make_model()
   grad_loss = np.random.randn(250)
   train_set = get_test_data()

   batch_x = jit_onehot(train_set[0][0], 2)
   Y_pred = model.forward(batch_x)
   loss_function = l.CrossEntropy()
   y_true = train_set[1][0]
   Y_true = jit_onehot(y_true, 2)
   Y_true_pad = np.pad(batch_x, ((0, 0), (0, 0), (Y_pred.shape[2] - batch_x.shape[2], 0)))

   loss_function.forward(Y_pred, Y_true_pad)
   grad_loss = loss_function.backward()
   model.backward(grad_loss)
  

def test_adam():
   np.seterr(all='raise')

   # Initialize model and optimizer
   model = make_model()
   optimizer = Adam()
   # Overfit on a single example

   # Get all training data
   train_set = get_test_data()

   loss_function = l.CrossEntropy()

   m = 2

   # First input value from training set
   input = train_set[0][0]
   output = train_set[0][1]
   batch_x = jit_onehot(input, m)

   for _ in range(1000):
      y_hat = model.forward(batch_x)
      # y_hat: (b, m, n)
      
      y_hat_indices = np.argmax(y_hat, axis=1)
      Y_pred

      Y_pred_slice = y_hat[:,:,-Y_true.shape[2]:]
      correct = y_hat_indices == output
      accuracy = np.mean(correct)

      # y_true is not one-hot encoded, but instead class indices
      loss_value = loss_function.forward(y_hat, y_true=train_set[1][0]).mean()


      # dLdY: (b, m, n)
      grad_loss = loss_function.backward()


      model.backward(grad_loss)


      model.step_gd(optimizer)


      print(f'{accuracy=:.5f}, {loss_value=:.5f}')


def module_backward_works(input, out_shape: tuple, module):
   # Not done, work in progress


   # Want dY/dX of this value
   grad_output = np.ones(out_shape)


   # Compute the forward pass
   forward_result = module.forward(input)


   # Now do backward with this in mind
   dL_dx = module.backward(grad_output)


   perturbed = input + delta_input
   forward_perturbed = module.forward(perturbed)
   # print(forward_perturbed)
   print(((forward_perturbed - forward_result).sum() / delta))
   # print(grad_output)
   # assert np.allclose((forward_perturbed - forward_result) / delta, grad_output, atol=1e-6)

def test_backward_correct():
   batch_size = 10
   in_dims = 2
   out_dims = 3
   seq_len = 5
   module = l.LinearLayer(in_dims, out_dims, has_bias=False)
   input = np.random.randn(batch_size, in_dims, seq_len)
   module_backward_works(input, (batch_size, out_dims, seq_len), module)

if __name__ == '__main__':
   test_forward_shape()
   test_backward()
   test_adam()
   #test_backward_correct()

TypingError: Failed in nopython mode pipeline (step: nopython frontend)
[1m[1m('wrong tuple length for $6load_attr.1: ', 'expected 2, got 3')[0m
[0m[1mDuring: typing of exhaust iter at /Users/hanna/Library/CloudStorage/OneDrive-NTNU/TMA4320 - Vitber/Vitber-indmat-11/utils.py (71)[0m
[1m
File "utils.py", line 71:[0m
[1mdef _jit_onehot(x, m):
    <source elided>
    '''
[1m    b, n = x.shape
[0m    [1m^[0m[0m


In [None]:
#check if loss is non-negative
assert L >= 0, f"L={L}, expected L>=0"
assert grad_Z.shape == Z.shape, f"grad_Z.shape={grad_Z.shape}, expected {Z.shape}"

#check if onehot(y) gives zero loss
Y = onehot(y, m)
L = loss.forward(Y, y)
assert L < 1e-5, f"L={L}, expected L<1e-5"
