In [1]:
import sys

import numpy as np
import tensorflow as tf
import tensorflow.keras.layers as layers
from tensorflow.keras import Model
from tqdm import trange

In [2]:
sys.path.append("/Users/gabrielmisrachi/Job Search/Deepsense/cnn-from-scratch")

In [3]:
from base import Batcher
from cnn import Conv2D, Identity, MSE, SGD

In [4]:
tf.keras.backend.set_floatx('float64')

In [5]:
batch_size = 20
input_shape = (3, 3, 1)
n_samples = 1000
images = np.random.rand(n_samples, *input_shape)
kernel_size = 2
n_filters = 1
targets = np.zeros((n_samples, 4))
batcher = Batcher(images, targets, batch_size)
conv = Conv2D(input_shape, kernel_size, n_filters, activation=Identity(), full=False)
conv.optimizer = SGD(lr=0.01)
loss = MSE()

In [6]:
def compute_grads(conv, images_batch, targets_batch):
    preds = conv.forward(images_batch)
    preds = preds.reshape(-1, 4)
    loss_value = loss.forward(preds, targets_batch)
    deltas = loss.gradient(preds, targets_batch)
    deltas = deltas.reshape((-1, 2, 2, 1))
    grads, deltas = conv.compute_grads(deltas)
    deltas = conv.activation.derivative(conv.weighted_input_memory) * deltas
    return loss_value, grads, preds

In [8]:
loss_values = np.inf
t = trange(100, desc="Loss = {}".format(np.mean(loss_values)), leave=True)
for _ in t:
    loss_list = []
    done = False
    while not done:
        done, images_batch, targets_batch = batcher.next()
        loss_values, grads, _ = compute_grads(conv, images_batch, targets_batch)
        conv.optimizer.step(conv.weights, grads)
        loss_list.append(loss_values.mean())
    t.set_description("Loss = {}".format(np.mean(loss_list)))
if not np.allclose(conv.weights, np.zeros_like(conv.weights)):
    print(
        "Conv layer weights {} did not converge to 0.".format(conv.weights)
    )

Loss = 1.419637016567881: 100%|██████████| 100/100 [00:06<00:00, 15.74it/s]

Conv layer weights [[[[0.63576771]
   [0.54199641]]

  [[0.65191169]
   [0.48363533]]]] did not converge to 0.





In [9]:
conv_tf = layers.Conv2D(n_filters, kernel_size, padding="valid", activation="linear", use_bias=False)

In [10]:
class MyModel(Model):
    def __init__(self):
        super(MyModel, self).__init__()
        self.conv1 = conv_tf
        self.flatten = layers.Flatten()

    def call(self, x):
        x = self.conv1(x)
        x = self.flatten(x)
        return x

# Create an instance of the model
model = MyModel()

In [11]:
def compute_grads_tf(model, images_batch, targets_batch):
    with tf.GradientTape() as tape:
        # training=True is only needed if there are layers with different
        # behavior during training versus inference (e.g. Dropout).
        predictions = model(images_batch, training=True)
        loss_value = loss_object(targets_batch, predictions)
    gradients = tape.gradient(loss_value, model.trainable_variables)
    return loss_value, gradients, predictions

In [13]:
loss_values = np.inf
loss_object = tf.keras.losses.mean_squared_error
optimizer = tf.keras.optimizers.SGD()
t = trange(1, desc="Loss = {}".format(np.mean(loss_values)), leave=True)
for _ in t:
    loss_list = []
    done = False
    while not done:
        done, images_batch, targets_batch = batcher.next()
        loss_value, gradients, _ = compute_grads_tf(model, images_batch, targets_batch)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        loss_list.append(np.mean(loss_value))
    t.set_description("Loss = {}".format(np.mean(loss_list)))

Loss = 0.009134009948617015: 100%|██████████| 1/1 [00:01<00:00,  1.05s/it]


# Check gradients

In [14]:
conv.weights

array([[[[0.63576771],
         [0.54199641]],

        [[0.65191169],
         [0.48363533]]]])

In [15]:
conv_tf.set_weights([conv.weights.reshape(2, 2, 1, 1)])

In [16]:
model.weights

[<tf.Variable 'my_model/conv2d/kernel:0' shape=(2, 2, 1, 1) dtype=float64, numpy=
 array([[[[0.63576771]],
 
         [[0.54199641]]],
 
 
        [[[0.65191169]],
 
         [[0.48363533]]]])>]

In [17]:
done, images_batch, targets_batch = batcher.next()
loss_value, gradients, predictions = compute_grads(conv, images_batch, targets_batch)
loss_value_tf, gradients_tf, predictions_tf = compute_grads_tf(model, images_batch, targets_batch)

In [32]:
gradients.sum(axis=0)

array([[[[24.2096953 ],
         [23.93038234]],

        [[24.67887866],
         [24.71300484]]]])

In [19]:
gradients_tf[0]

<tf.Tensor: id=2015, shape=(2, 2, 1, 1), dtype=float64, numpy=
array([[[[24.2096953 ]],

        [[23.93038234]]],


       [[[24.67887866]],

        [[24.71300484]]]])>

In [30]:
gradients_tf

[<tf.Tensor: id=2015, shape=(2, 2, 1, 1), dtype=float64, numpy=
 array([[[[24.2096953 ]],
 
         [[23.93038234]]],
 
 
        [[[24.67887866]],
 
         [[24.71300484]]]])>]

In [20]:
predictions[0]

array([1.0898142 , 1.18667946, 0.80355518, 0.70998429])

In [22]:
conv.forward(images_batch)[0]

array([[[1.0898142 ],
        [1.18667946]],

       [[0.80355518],
        [0.70998429]]])

In [23]:
predictions_tf[0]

<tf.Tensor: id=2019, shape=(4,), dtype=float64, numpy=array([1.0898142 , 1.18667946, 0.80355518, 0.70998429])>

In [24]:
conv_tf(images_batch)[0]

<tf.Tensor: id=2026, shape=(2, 2, 1), dtype=float64, numpy=
array([[[1.0898142 ],
        [1.18667946]],

       [[0.80355518],
        [0.70998429]]])>

In [25]:
loss_value

array([0.93642044, 1.862848  , 2.00968683, 2.45707922, 1.47717618,
       1.0090872 , 0.66258847, 2.24115293, 1.59150963, 1.80737672,
       1.83669153, 0.61724609, 1.28671836, 1.51763524, 1.68438726,
       0.76260523, 0.24047109, 1.26616325, 1.88606276, 1.0483213 ])

In [26]:
loss_value_tf

<tf.Tensor: id=1991, shape=(20,), dtype=float64, numpy=
array([0.93642044, 1.862848  , 2.00968683, 2.45707922, 1.47717618,
       1.0090872 , 0.66258847, 2.24115293, 1.59150963, 1.80737672,
       1.83669153, 0.61724609, 1.28671836, 1.51763524, 1.68438726,
       0.76260523, 0.24047109, 1.26616325, 1.88606276, 1.0483213 ])>