In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers

In [2]:
class GradientDescent():


    def __init__(self, lr = 1e-3, eps = 1e-4):
        self.lr = lr
        self.eps = eps
        self.delta = 0


    def optimize(self, target, gradients):
        optimized = []
        for t, grad in zip(target, gradients):
            optimized.append(t - self.lr * grad)
            self.delta += self.lr * np.linalg.norm(grad)
        return optimized


    def stop(self):
        return not(self.delta > 1e-9 or self.delta < self.eps)

In [3]:
class Node:


    def __init__(self, input_dim, output_dim, inner_ndim):

        self.n_input = (input_dim, ) if isinstance(input_dim, int) else tuple(input_dim)
        self.input_dim = 1 if isinstance(input_dim, int) else len(self.n_input)
        self.n_output = (output_dim, ) if isinstance(output_dim, int) else tuple(output_dim)
        self.output_dim = 1 if isinstance(input_dim, int) else len(self.n_output)
        self.inner_dim = inner_ndim
        self.input = None
        self.labels = None



    def change_dims(self, x, dim):
        return np.reshape(x, x.shape[-dim:]) if x.ndim > dim else (x if x.ndim == dim else np.expand_dims(x, tuple(range(dim - x.ndim))))

In [4]:
class Convolution(Node):


    def __init__(self, input_dim, conv_dim, W = None):
        super().__init__(input_dim, (1, input_dim[1] - conv_dim[1] + 1, input_dim[1] - conv_dim[1] + 1), 3)
        self.W = np.random.uniform(0.4, 0.6, conv_dim) if W is None else W
        self.input_values = None
        self.output_values = None
        self.labels = None
        self.W_pd = None


    def convolve(self, T, W, add_padding = False):
        T = np.expand_dims(T, axis=0) if T.ndim == 2 else T
        W = np.expand_dims(W, axis=0) if W.ndim == 2 else W
        output_shape = (1, (T.shape[1] + W.shape[1] - 1), (T.shape[2] + W.shape[2] - 1)) if add_padding else (1, (T.shape[1] - W.shape[1] + 1), (T.shape[2] - W.shape[2] + 1))
        T = np.pad(T, pad_width=[(0, 0), (W.shape[1] - 1, W.shape[1] - 1), (W.shape[2] - 1, W.shape[2] - 1)]) if add_padding else T

        convolution = np.zeros(output_shape)
        for row in range(output_shape[1]):
            for col in range(output_shape[2]):
                convolution[0, row, col] = np.sum(T[:, row: row + W.shape[1], col: col + W.shape[2]] * W)
        return convolution


    def forward(self, input, labels = None):
        self.input_values = self.change_dims(input, self.inner_dim)
        self.output_values = self.convolve(self.input_values, self.W)
        return self.change_dims(self.output_values, self.output_dim)


    def backward(self, input_pd):
        self.W_pd = np.concatenate([self.convolve(self.input_values[i], self.change_dims(input_pd, self.inner_dim)) for i in range(self.n_input[0])], axis=0)
        return self.change_dims(np.concatenate([self.convolve(self.change_dims(input_pd, self.inner_dim), self.W[i, ::-1, ::-1], True) for i in range(self.n_input[0])], axis=0), self.output_dim)


    def optimize_weights(self, gd):
        self.W = gd.optimize([self.W], [self.W_pd])[0]

# Validation

In [5]:
input = (3, 5, 5)
conv_dim = (3, 2, 2)
conv = Convolution(input, conv_dim)

x_input = np.random.random(input)
dL_dy = np.random.random(conv.n_output)
output = conv.forward(x_input)
dL_dx = conv.backward(dL_dy)
dL_dw = conv.W_pd

x = tf.constant(np.moveaxis(np.expand_dims(x_input, axis=0), 1, -1), dtype=tf.float32)
weights = tf.constant(np.moveaxis(conv.W, 0, -1), dtype=tf.float32)
conv_keras = layers.Conv2D(1, 2, input_shape=x.shape[1:], use_bias=False, kernel_initializer=tf.keras.initializers.Constant(weights))

In [6]:
with tf.GradientTape(persistent=True) as tape:
    tape.watch(x)
    conv_output = conv_keras(x)

print(f"Keras: {conv_output.numpy().transpose(0, 3, 1, 2).squeeze()}")
print()
print(f"Mine: {output[0]}")

Keras: [[2.2348945 3.4954233 4.0519085 3.838164 ]
 [2.5698576 3.3950186 3.454504  3.4474192]
 [3.3045144 3.2550535 3.350473  3.3015566]
 [2.8904407 2.8494146 3.6963587 3.9793224]]

Mine: [[2.23489443 3.4954233  4.05190854 3.83816412]
 [2.56985745 3.39501872 3.45450437 3.44741934]
 [3.30451437 3.2550536  3.3504725  3.30155665]
 [2.89044066 2.84941454 3.69635859 3.97932248]]


In [7]:
dL_dy_keras = tf.constant(np.expand_dims(dL_dy, axis=-1), dtype=tf.float32)
dL_dx_keras = tape.gradient(conv_output, x, output_gradients=dL_dy_keras)
dL_dw_keras = tape.gradient(conv_output, conv_keras.trainable_variables, output_gradients=dL_dy_keras)

print(f"Keras: {np.moveaxis(dL_dx_keras[0].numpy().squeeze(), -1, 0)}")
print()
print(f"Mine: {dL_dx}")
print()
print()
print(f"Keras: {np.moveaxis(dL_dw_keras[0].numpy().squeeze(), -1, 0)}")
print()
print(f"Mine: {dL_dw}")

Keras: [[[0.37909093 0.77021277 0.6416171  0.6713644  0.41283754]
  [0.73011065 1.3666209  0.9097757  0.994215   0.755394  ]
  [0.6286058  1.0950552  0.7051432  0.66914517 0.45234478]
  [0.6090616  0.98327136 0.9517509  1.01633    0.42039365]
  [0.32755104 0.51317054 0.4733592  0.6506156  0.308841  ]]

 [[0.46209475 0.8300403  0.6655717  0.7461585  0.37989756]
  [0.8924444  1.4468099  0.93077147 1.1316056  0.7097536 ]
  [0.76867914 1.1437469  0.736063   0.7396288  0.4259026 ]
  [0.74422693 1.0163168  1.0580063  1.0656393  0.39159   ]
  [0.40154198 0.52121985 0.54028285 0.6895323  0.29355246]]

 [[0.446396   0.938094   0.78887504 0.81122065 0.52142346]
  [0.78528917 1.5282522  1.0040628  1.0671905  0.87655234]
  [0.6668273  1.2415341  0.8267333  0.75031567 0.5201904 ]
  [0.6627463  1.1197139  1.0645967  1.1774385  0.505856  ]
  [0.3172918  0.54953426 0.4779792  0.6827619  0.34051234]]]

Mine: [[[0.37909093 0.77021273 0.64161714 0.67136443 0.41283753]
  [0.73011066 1.36662101 0.9097757  