# Cognition & Computation - Lab 2

In [None]:
# 🔧 Setup for Google Colab
import os
import sys

# Check if running on Colab
IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    print("🔧 Running on Google Colab - setting up environment...\n")
    
    # Clone repository if not already done
    if not os.path.exists('groundeep-unimodal-training'):
        print("📥 Cloning repository...")
        !git clone https://github.com/francesco-cal98/dbn-training.git groundeep-unimodal-training
    
    # Change to repo directory
    os.chdir('groundeep-unimodal-training')
    print(f"📂 Working directory: {os.getcwd()}")
    
    # Install minimal dependencies
    print("\n📦 Installing dependencies...")
    !pip install -q torch torchvision numpy matplotlib scikit-learn tqdm
    
    print("\n✅ Setup complete! Ready to run the notebook.\n")
else:
    print("💻 Running locally")

As usual, let's first download the scripts from the GitHub repository that implement a DBN in PyTorch, and load some useful Python libraries.

In [None]:
def get_dbn_library():  """  Load DBN library from local implementation.    This uses the wrapper classes (DBN.py, RBM.py) which internally use  the implementation from src/classes/gdbn_model.py.    No download needed - files are already in the repository!  """  import os    # Check if wrapper files exist  if not os.path.exists('DBN.py'):    raise FileNotFoundError(      "DBN.py not found. Make sure you're running from the repository root."    )  if not os.path.exists('RBM.py'):    raise FileNotFoundError(      "RBM.py not found. Make sure you're running from the repository root."    )    print("✅ Using local DBN implementation")  print("   - Wrapper classes: DBN.py, RBM.py")  print("   - Core implementation: src/classes/gdbn_model.py")

In [None]:
%%capture
get_dbn_library()

In [None]:
import math
import matplotlib.pyplot as plt
import numpy as np
import torch
import torchvision as tv

from DBN import DBN

Let's choose the kind of device used for computations (CPU or GPU).

In [None]:
print(torch.cuda.is_available())
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(device)

Let's download and normalize the MNIST dataset as in the previous Lab.

In [None]:
%%capture
mnist_tr = tv.datasets.MNIST(root="../mnist", train=True, download=True)
mnist_te = tv.datasets.MNIST(root="../mnist", train=False, download=True)

In [None]:
mnist_tr.data = mnist_tr.data / 255
mnist_te.data = mnist_te.data / 255

mnist_tr.data = mnist_tr.data.to(device)
mnist_te.data = mnist_te.data.to(device)
mnist_tr.targets = mnist_tr.targets.to(device)
mnist_te.targets = mnist_te.targets.to(device)

We now create a hierarchical generative model (Deep Belief Network) and train it in an unsupervised way on the MNIST dataset.

In [None]:
dbn_mnist = DBN(visible_units=28*28,
                hidden_units=[400, 500, 800],
                k=1,
                learning_rate=0.1,
                learning_rate_decay=False,
                initial_momentum=0.5,
                final_momentum=0.95,
                weight_decay=0.0001,
                xavier_init=False,
                increase_to_cd_k=False,
                use_gpu=torch.cuda.is_available())

In [None]:
num_epochs = 50
batch_size = 125

dbn_mnist.train_static(
    mnist_tr.data,
    mnist_tr.targets,
    num_epochs,
    batch_size
)

## Linear read-out

As in the previous lab, we can now extract the hidden representations of the data, by propagating the neuron's activations from the sensory (visible) layer in a bottom-up fashion, and try to lineary decode the content of the representations in a supervised way.

In [None]:
def get_kth_layer_repr(input, k, device):
  flattened_input = input.view((input.shape[0], -1)).type(torch.FloatTensor).to(device)
  hidden_repr, __ = dbn_mnist.rbm_layers[k].to_hidden(flattened_input)  # here we access the RBM object
  return hidden_repr

In [None]:
hidden_repr_1 = get_kth_layer_repr(mnist_tr.data, 0, device)
hidden_repr_2 = get_kth_layer_repr(hidden_repr_1, 1, device)
hidden_repr_3 = get_kth_layer_repr(hidden_repr_2, 2, device)

In [None]:
class LinearModel(torch.nn.Module):
  def __init__(self, last_layer_size):
    super().__init__()
    self.linear = torch.nn.Linear(last_layer_size, 10)

  def forward(self, x):
    return self.linear(x)

In [None]:
layer_size = dbn_mnist.rbm_layers[0].W.shape[1]
linear1 = LinearModel(layer_size).to(device)

layer_size = dbn_mnist.rbm_layers[1].W.shape[1]
linear2 = LinearModel(layer_size).to(device)

layer_size = dbn_mnist.rbm_layers[2].W.shape[1]
linear3 = LinearModel(layer_size).to(device)

In [None]:
def train_supervised(network, input_data, epochs=1000):
  optimizer = torch.optim.SGD(network.parameters(), lr=0.05)
  loss_fn = torch.nn.CrossEntropyLoss()

  for epoch in range(epochs):
    optimizer.zero_grad()
    predictions = network(input_data).squeeze()
    targets = mnist_tr.targets.reshape(predictions.shape[0])  # here are the labels
    loss = loss_fn(predictions, targets)
    loss.backward()
    optimizer.step()

    if epoch % 100 == 0:
      print("epoch : {:3d}/{}, loss = {:.4f}".format(epoch + 1, epochs, loss))

In [None]:
train_supervised(linear1, hidden_repr_1, 1000)
train_supervised(linear2, hidden_repr_2, 1000)
train_supervised(linear3, hidden_repr_3, 1000)

In [None]:
hidden_repr_1_test = get_kth_layer_repr(mnist_te.data, 0, device)
hidden_repr_2_test = get_kth_layer_repr(hidden_repr_1_test, 1, device)
hidden_repr_3_test = get_kth_layer_repr(hidden_repr_2_test, 2, device)

In [None]:
# compute the classifier predictions:
predictions_test1 = linear1(hidden_repr_1_test)
predictions_test2 = linear2(hidden_repr_2_test)
predictions_test3 = linear3(hidden_repr_3_test)

In [None]:
def compute_accuracy(predictions_test, targets):
  predictions_indices = predictions_test.max(axis=1).indices  # convert probabilities to indices
  accuracy = (predictions_indices == targets).sum() / len(targets)
  return accuracy.item()

In [None]:
compute_accuracy(predictions_test1, mnist_te.targets)

In [None]:
compute_accuracy(predictions_test2, mnist_te.targets)

In [None]:
compute_accuracy(predictions_test3, mnist_te.targets)

## Comparison with a feed-forward neural network

Let's now train a simple feed-forward neural network with the same structure of the DBN, in order to compare a non-linear model that is trained end-to-end to solve a classification task with a simple linear classifier that solves the same task using representations of input data learned in an unsupervised way through the DBN.

In [None]:
class Feedforward(torch.nn.Module):
  def __init__(self, first_hidden_layer_size, second_hidden_layer_size, third_hidden_layer_size):
    super().__init__()
    self.first_hidden = torch.nn.Linear(784, first_hidden_layer_size)
    self.second_hidden = torch.nn.Linear(first_hidden_layer_size, second_hidden_layer_size)
    self.third_hidden = torch.nn.Linear(second_hidden_layer_size, third_hidden_layer_size)
    self.output = torch.nn.Linear(third_hidden_layer_size, 10)

  def forward(self, input):
    relu = torch.nn.ReLU()
    first_hidden_repr = relu(self.first_hidden(input))
    second_hidden_repr = relu(self.second_hidden(first_hidden_repr))
    third_hidden_repr = relu(self.third_hidden(second_hidden_repr))
    output = self.output(third_hidden_repr)
    return output

In [None]:
ffnn = Feedforward(400, 500, 800).to(device)

In [None]:
train_supervised(ffnn, mnist_tr.data.reshape((60000, 784)), 1050)

In [None]:
predictions_ffnn = ffnn(mnist_te.data.reshape((10000, 784)))

In [None]:
compute_accuracy(predictions_ffnn, mnist_te.targets)

## Robustness to noise

We will now inject some noise in the input images and see how much the representations learned by the DBN and the feed-forward network are robust to perturbations in the sensory signal.

Similarly to what happens in psychophysical experiments, this will allow to create a psychometric curve describing the decrease in classification accuracy with respect to the noise level.

In [None]:
def inject_noise(mnist_data, noise_level):

  ### TASK: create a very simple function that adds some Gaussian noise (see torch.randn function) to the MNIST data
  random_gaussian_tensor = torch.randn(mnist_data.shape, device = device)*noise_level
  return mnist_data + random_gaussian_tensor

Let's see what a noisy image looks like:

In [None]:
noise_level = 0.3
mnist_test_with_noise = inject_noise(mnist_te.data, noise_level)
__ = plt.imshow(mnist_test_with_noise[0].reshape(28, 28).to("cpu"), cmap="gray")

We will now compute the hidden representations for the noisy images using the DBN. Then, we will use the read-out classifiers that we trained on the representations without noise to classify the noisy stimuli.

In [None]:
def get_accuracy_values_at_noise_level(noise_level):

  mnist_test_with_noise = inject_noise(mnist_te.data, noise_level)  # first, let's create noisy test images

  hidden_repr_1_noisy = get_kth_layer_repr(mnist_test_with_noise, 0, device)  # here we compute the DBN representations
  hidden_repr_2_noisy = get_kth_layer_repr(hidden_repr_1_noisy, 1, device)
  hidden_repr_3_noisy = get_kth_layer_repr(hidden_repr_2_noisy, 2, device)

  predictions_first_hidden_noisy = linear1(hidden_repr_1_noisy)  # here we use the previously-trained read-out classifiers
  predictions_second_hidden_noisy = linear2(hidden_repr_2_noisy)
  predictions_third_hidden_noisy = linear3(hidden_repr_3_noisy)

  accuracy_first_hidden = compute_accuracy(predictions_first_hidden_noisy, mnist_te.targets)
  accuracy_second_hidden = compute_accuracy(predictions_second_hidden_noisy, mnist_te.targets)
  accuracy_third_hidden = compute_accuracy(predictions_third_hidden_noisy, mnist_te.targets)

  ### TASK: repeat a similar process for the feed-forward model (NB: make sure you reshape the input data appropriately!)
  predictions_ffnn_noisy = ffnn(mnist_test_with_noise.reshape((10000, 784)))  # we repeat a similar process for the feed-forward model
  accuracy_ffnn = compute_accuracy(predictions_ffnn_noisy, mnist_te.targets)

  return accuracy_first_hidden, accuracy_second_hidden, accuracy_third_hidden, accuracy_ffnn

In [None]:
acc = get_accuracy_values_at_noise_level(0.3);
print("Accuracy of H1 read-out: %.3f" % acc[0])
print("Accuracy of H2 read-out: %.3f" % acc[1])
print("Accuracy of H3 read-out: %.3f" % acc[2])
print("Accuracy of FF network : %.3f" % acc[3])

Let's create the psychometric curves for the DBN (at different levels of internal representations) and for the feed-forward network:

In [None]:
def plot_noise_robustness_curves(noise_levels):
  accuracy_values_first_hidden = []
  accuracy_values_second_hidden = []
  accuracy_values_third_hidden = []
  accuracy_values_ffnn = []

  for noise_level in noise_levels:
    acc = get_accuracy_values_at_noise_level(noise_level)
    accuracy_values_first_hidden.append(acc[0])
    accuracy_values_second_hidden.append(acc[1])
    accuracy_values_third_hidden.append(acc[2])
    accuracy_values_ffnn.append(acc[3])

  fig, ax = plt.subplots()
  ax.plot(range(len(noise_levels)), accuracy_values_first_hidden)
  ax.plot(range(len(noise_levels)), accuracy_values_second_hidden)
  ax.plot(range(len(noise_levels)), accuracy_values_third_hidden)
  ax.plot(range(len(noise_levels)), accuracy_values_ffnn)

  ax.set_title("Robustness to noise")
  ax.set_xlabel("Noise level (%)")
  ax.set_ylabel("Accuracy")
  plt.xticks(range(len(noise_levels)), [int(l*100) for l in noise_levels])
  plt.legend(["First hidden", "Second hidden", "Third hidden", "FFNN"])

In [None]:
noise_levels = np.linspace(0,2,10)
plot_noise_robustness_curves(noise_levels)

## Reference paper
- [Testolin et al. (2017) - Letter perception emerges from unsupervised deep learning and recycling of natural image features](https://www.nature.com/articles/s41562-017-0186-2)

## Contacts

- 📧 flavio.petruzzellis@phd.unipd.it
- 💬 [Moodle forum](https://stem.elearning.unipd.it/mod/forum/view.php?id=600538)