# Lab 8.1: nodes modification
by Domrachev Ivan, B20-Ro-01

In [1]:
from nn_from_scratch.neurons import Convolution

import numpy as np
from matplotlib import pyplot as plt

import tensorflow as tf
from tensorflow.keras import layers

2023-11-13 23:41:48.794824: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-13 23:41:48.844324: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-13 23:41:48.845027: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Part 1. Bathched & Biased Convolution

The convolutional layer was extended to support batches inputs and optional bias input
> Note: the solution is far from being generalized, f.e. it lacks padding, stride settings, as well as support of batches of the pictures

In [2]:
input_dim = (10, 3, 7, 5)
kernel_size = 2
output_layers = 20
conv = Convolution(input_dim, kernel_size, output_layers=output_layers, use_bias=False)
output_dim = conv._output_dim

# Random input values (x itself, and assumed partial derivative)
x_input = np.random.random(input_dim).astype(dtype=np.float32)
dL_dy = np.random.random(output_dim).astype(dtype=np.float32)

output = conv.forward(x_input)
dL_dx = conv.backward(dL_dy)
dL_dw = conv._W_pd
# bias = conv._B
# dL_db = conv._B_pd

In [3]:
output.shape, conv._W.shape, x_input.shape

((10, 20, 6, 4), (20, 3, 2, 2), (10, 3, 7, 5))

In [4]:
b, m, n, p, q = input_dim[0], input_dim[2], input_dim[3], kernel_size, kernel_size
assert all(
    np.allclose(
        output[i][kern][j][k], (x_input[i, :, j:j+p, k:k+q] * conv._W[kern]).sum()
    ) 
    for i in range(b) 
    for j in range(m-p+1) 
    for k in range(n-q+1)
    for kern in range(output_layers)
)

As before, let's compare its performance with tensorflow implementation:

In [5]:
conv.W.shape

(20, 3, 2, 2)

In [6]:
x_input_batched = tf.constant(
    np.moveaxis(
        x_input,
        1, -1
    ), 
    dtype=tf.float32
)

weights_reshaped = tf.constant(
    conv.W.transpose(2, 3, 1, 0),   
    dtype=tf.float32
)

2023-11-13 23:41:51.534355: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-11-13 23:41:51.535228: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1960] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [7]:
conv_keras = layers.Conv2D(
    20, 2,
    input_shape=x_input_batched.shape[1:],
    use_bias=False,
    kernel_initializer=tf.keras.initializers.Constant(weights_reshaped),
    data_format='channels_last'
)

In [8]:
x_input_batched.shape

TensorShape([10, 7, 5, 3])

In [9]:
with tf.GradientTape(persistent=True) as tape:
    tape.watch(x_input_batched)  # Watch the input tensor for gradient computation
    conv_output = conv_keras(x_input_batched)

conv_output_np = conv_output.numpy()

In [10]:
print(conv_output_np.shape)
conv_output_np = np.moveaxis(conv_output_np, -1, 1)
print(conv_output_np.shape)

(10, 6, 4, 20)
(10, 20, 6, 4)


In [11]:
output.shape

(10, 20, 6, 4)

1. Check that layer output is correct

In [12]:
output[0, 0]

array([[ 0.04081991,  0.00420363,  0.09719883, -0.00735995],
       [ 0.06143697,  0.07457685,  0.00036471,  0.07781924],
       [ 0.00496632,  0.04221429,  0.06450655, -0.00169559],
       [ 0.00320771,  0.03251834, -0.03337364,  0.08637109],
       [ 0.00873353, -0.00916717,  0.12397933, -0.0313285 ],
       [-0.00855705,  0.0378824 ,  0.00410559,  0.16866183]],
      dtype=float32)

In [13]:
conv_output_np[0, 0]

array([[ 0.04081991,  0.00420364,  0.09719883, -0.00735995],
       [ 0.06143697,  0.07457686,  0.00036472,  0.07781925],
       [ 0.00496632,  0.04221429,  0.06450653, -0.00169559],
       [ 0.00320773,  0.03251833, -0.03337364,  0.0863711 ],
       [ 0.00873353, -0.00916717,  0.12397933, -0.0313285 ],
       [-0.00855705,  0.0378824 ,  0.00410559,  0.16866183]],
      dtype=float32)

In [14]:
np.max(np.abs(conv_output_np[:, :, :, :] - output[:, :, :, :])) < 1e-6

True

2. Check that backpropogation for both input and weights is correct

In [15]:
dL_dy_keras = tf.constant(np.moveaxis(dL_dy, 1, -1), dtype=tf.float32)

dL_dx_keras = tape.gradient(
    conv_output, x_input_batched, output_gradients=dL_dy_keras
)
dL_dw_keras = tape.gradient(
    conv_output, conv_keras.trainable_variables, output_gradients=dL_dy_keras
)

dL_dx_keras_np = np.moveaxis(dL_dx_keras.numpy().squeeze(), -1, 1)
dL_dw_keras_np = np.moveaxis(dL_dw_keras[0].numpy().squeeze(), -1, 0)

In [16]:
dL_dx_keras_np.shape, dL_dx.shape

((10, 3, 7, 5), (10, 3, 7, 5))

In [17]:
np.mean(np.abs(dL_dx_keras_np - dL_dx)) < 1e-6

True

In [18]:
np.mean(np.abs(dL_dw_keras_np - np.moveaxis(dL_dw, 1, -1))) < 1e-5

True

## Support for tensor inputs

### ReLU

It's quite trivial to add 4D tensor support to ReLU, but anyway, let's validate it's functionality:

In [19]:
input_dim = (5, 3, 10, 10)
conv = Convolution(input_dim, kernel_size, output_layers=output_layers)
output_dim = conv._output_dim

# Random input values (x itself, and assumed partial derivative)
x_input = np.random.random(input_dim)
dL_dy = np.random.random(output_dim)

output = conv.forward(x_input)
dL_dx = conv.backward(dL_dy)
dL_dw = conv._W_pd
bias = conv._B
dL_db = conv._B_pd


In [20]:
from nn_from_scratch.nodes import ReLU  
import numpy as np

input_dim = (2, 2, 2, 2)

# Random input values (x itself, and assumed partial derivative)
x_input = np.random.randint(-10, 10, size=input_dim)
dL_dy = np.random.randint(-10, 10, size=input_dim)
print(x_input)
relu = ReLU(input_dim)

# Forward call
y_value = relu.forward(x_input)

# Backpropogation
dL_dx = relu.backward(dL_dy)

y_value, dL_dx

[[[[-10   7]
   [ -1   4]]

  [[  2   4]
   [ -2  -4]]]


 [[[  7   6]
   [  4  -4]]

  [[ -9  -1]
   [  1  -2]]]]


(array([[[[0, 7],
          [0, 4]],
 
         [[2, 4],
          [0, 0]]],
 
 
        [[[7, 6],
          [4, 0]],
 
         [[0, 0],
          [1, 0]]]]),
 array([[[[ 0, -2],
          [ 0,  5]],
 
         [[-2, -9],
          [ 0,  0]]],
 
 
        [[[-5, -5],
          [-9,  0]],
 
         [[ 0,  0],
          [ 2,  0]]]]))

As one could see, the functionality works

## Vectorization

Another handy node is Vectorizer -- it transforms everything into vector. It's very handy, since it allows to utilize existing code for CNN as well!

In [21]:
from nn_from_scratch.nodes import Vectorization  
import numpy as np

input_dim = (2, 2, 2, 2)

# Random input values (x itself, and assumed partial derivative)
x_input = np.random.randint(-10, 10, size=input_dim)

vectorize = Vectorization(input_dim)
output_dim = vectorize._output_dim
dL_dy = np.random.randint(-10, 10, size=output_dim)

# Forward call
y_value = vectorize(x_input)

# Backpropogation
dL_dx = vectorize.backward(dL_dy)

y_value.shape, dL_dx.shape

((2, 8), (2, 2, 2, 2))