# Problem Set 1 (Take Home) - 40 points

## PS 1.A - 20 points

In [this video](https://www.youtube.com/watch?v=ho6JXE3EbZ8) the author explains how to extract various visualizations of what CNNs learn. [Your course site](https://pantelis.github.io/artificial-intelligence/aiml-common/lectures/cnn/cnn-example-architectures/visualizing-what-convnets-learn.html) also covers the topic. 

Using the [CIFAR-10 dataset](https://www.cs.toronto.edu/~kriz/cifar.html), train a ResNet-50 based CNN on the classification task of $K=9$ classes (filter out the class `ship`) and create the following visualizations for first, middle and last blocks of ResNet-50. You are free to select a class to showcase such visualizations.

* Visualizing intermediate convnet outputs (“intermediate activations”). This is useful to understand how successive convnet layers transform their input.

* Visualizing convnets filters. This is useful to understand precisely what visual pattern or concept each filter in a convnet is receptive to.

* Visualizing heatmaps of class activation in an image. This is useful to understand which part of an image where identified as belonging to a given class, and thus allows to localize objects in images.


In [None]:
#RESNET 50 WITHOUT SHIPS
import numpy as np
import tensorflow as tf
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout
import matplotlib.pyplot as plt
from skimage.transform import resize
from keras.datasets import cifar10
from keras import backend as K

NUM_CLASSES = 9
BATCH_SIZE = 32
NUM_EPOCHS = 10

(X_train, y_train), (X_test, y_test) = cifar10.load_data()

train_mask = np.where(y_train != 8)[0]
test_mask = np.where(y_test != 8)[0]

X_train = X_train[train_mask] #image RGB values excluding 8 (ships)
y_train = y_train[train_mask] #label values excluding 8 (ships)

X_test = X_test[test_mask]  #image RGB values excluding 8 (ships)
y_test = y_test[test_mask] #label values excluding 8 (ships)


X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

model = tf.keras.applications.ResNet50(include_top=False, weights='imagenet')
model.compile(loss="sparse_categorical_crossentropy",
                    optimizer="sgd",
                    metrics=["accuracy"])

model.summary()
#history = model.fit(X_train, y_train, epochs=NUM_EPOCHS, validation_data=(X_test, y_test), batch_size=BATCH_SIZE)


"""
Compiled on 10/22/2023

Epoch 1/10
1407/1407 [==============================] - 437s 308ms/step - loss: 8.1892 - accuracy: 0.1011 - val_loss: 8.0799 - val_accuracy: 0.0883
Epoch 2/10
1407/1407 [==============================] - 433s 307ms/step - loss: 7.9276 - accuracy: 0.1049 - val_loss: 8.2936 - val_accuracy: 0.0973
Epoch 3/10
1407/1407 [==============================] - 455s 323ms/step - loss: 7.8094 - accuracy: 0.1060 - val_loss: 8.3251 - val_accuracy: 0.0905
Epoch 4/10
1407/1407 [==============================] - 511s 363ms/step - loss: 7.7946 - accuracy: 0.1061 - val_loss: 7.7418 - val_accuracy: 0.1069
Epoch 5/10
1407/1407 [==============================] - 509s 361ms/step - loss: 7.7918 - accuracy: 0.1063 - val_loss: 7.8850 - val_accuracy: 0.0997
Epoch 6/10
1407/1407 [==============================] - 463s 329ms/step - loss: 7.6843 - accuracy: 0.1065 - val_loss: 7.6634 - val_accuracy: 0.1075
Epoch 7/10
1407/1407 [==============================] - 442s 314ms/step - loss: 7.6665 - accuracy: 0.1069 - val_loss: 7.6485 - val_accuracy: 0.1075
Epoch 8/10
1407/1407 [==============================] - 433s 308ms/step - loss: 7.6425 - accuracy: 0.1071 - val_loss: 7.6217 - val_accuracy: 0.1101
Epoch 9/10
1407/1407 [==============================] - 434s 308ms/step - loss: 7.6235 - accuracy: 0.1071 - val_loss: 7.6292 - val_accuracy: 0.1077
Epoch 10/10
1407/1407 [==============================] - 433s 308ms/step - loss: 7.6222 - accuracy: 0.1071 - val_loss: 7.6446 - val_accuracy: 0.1066

"""


In [None]:
#CREATING A JPEG FROM CIFAR10
from PIL import Image
plt.imshow(X_train[4])
array = X_train[4] * 255
array = array.astype(np.uint8)
img = Image.fromarray(array)
img.save("car.jpeg")

In [None]:
#VISUALIZING COVNETS

img_path = 'car.jpeg'

# We preprocess the image into a 4D tensor
#from keras.preprocessing import image
import numpy as np

img = tf.keras.utils.load_img(img_path, target_size=(32, 32))
img_tensor = tf.keras.utils.img_to_array(img)
img_tensor = np.expand_dims(img_tensor, axis=0)
# Remember that the model was trained on inputs
# that were preprocessed in the following way:
img_tensor /= 255.

# Its shape is (1, 150, 150, 3)
print(img_tensor.shape)

import matplotlib.pyplot as plt

plt.imshow(img_tensor[0])
plt.show()

# Extracts the outputs of the top 8 layers:
layer_outputs = [layer.output for layer in model.layers[:8]][1:]
# Creates a model that will return these outputs, given the model input:
activation_model = tf.keras.models.Model(inputs=model.input, outputs=layer_outputs)

activations = activation_model.predict(img_tensor)

first_layer_activation = activations[0]
print(first_layer_activation.shape)

import matplotlib.pyplot as plt

plt.matshow(first_layer_activation[0, :, :, 1], cmap='viridis')
plt.show()

# These are the names of the layers, so can have them as part of our plot
layer_names = []
for layer in model.layers[:8]:
    layer_names.append(layer.name)

images_per_row = 16

# Now let's display our feature maps
for layer_name, layer_activation in zip(layer_names, activations):
    # This is the number of features in the feature map
    n_features = layer_activation.shape[-1]

    # The feature map has shape (1, size, size, n_features)
    size = layer_activation.shape[1]

    # We will tile the activation channels in this matrix
    n_cols = n_features // images_per_row
    display_grid = np.zeros((size * n_cols, images_per_row * size))

    # We'll tile each filter into this big horizontal grid
    for col in range(n_cols):
        for row in range(images_per_row):
            channel_image = layer_activation[0,
                                             :, :,
                                             col * images_per_row + row]
            # Post-process the feature to make it visually palatable
            channel_image -= channel_image.mean()
            channel_image /= channel_image.std()
            channel_image *= 64
            channel_image += 128
            channel_image = np.clip(channel_image, 0, 255).astype('uint8')
            display_grid[col * size : (col + 1) * size,
                         row * size : (row + 1) * size] = channel_image

    # Display the grid
    scale = 1. / size
    plt.figure(figsize=(scale * display_grid.shape[1],
                        scale * display_grid.shape[0]))
    plt.title(layer_name)
    plt.grid(False)
    plt.imshow(display_grid, aspect='auto', cmap='viridis')
    
plt.show()

In [None]:
#VISUALIZING CONVNET FILTERS

tf.compat.v1.disable_eager_execution()

model = tf.keras.applications.vgg16.VGG16(weights='imagenet', include_top=False)

layer_name = 'block3_conv1'
filter_index = 0

layer_output = model.get_layer(layer_name).output
loss = tf.keras.backend.mean(layer_output[:, :, :, filter_index])
        
grads = K.gradients(loss, model.input)[0]

grads /= (tf.keras.backend.sqrt(tf.keras.backend.mean(tf.keras.backend.square(grads))) + 1e-5)

iterate = K.function([model.input], [loss, grads])

import numpy as np
loss_value, grads_value = iterate([np.zeros((1, 150, 150, 3))])

# We start from a gray image with some noise
input_img_data = np.random.random((1, 150, 150, 3)) * 20 + 128.

# Run gradient ascent for 40 steps
step = 1.  # this is the magnitude of each gradient update
for i in range(40):
    # Compute the loss value and gradient value
    loss_value, grads_value = iterate([input_img_data])
    # Here we adjust the input image in the direction that maximizes the loss
    input_img_data += grads_value * step

def deprocess_image(x):
    # normalize tensor: center on 0., ensure std is 0.1
    x -= x.mean()
    x /= (x.std() + 1e-5)
    x *= 0.1

    # clip to [0, 1]
    x += 0.5
    x = np.clip(x, 0, 1)

    # convert to RGB array
    x *= 255
    x = np.clip(x, 0, 255).astype('uint8')
    return x

def generate_pattern(layer_name, filter_index, size=150):
    layer_output = model.get_layer(layer_name).output
    loss = K.mean(layer_output[:, :, :, filter_index])

    grads = K.gradients(loss, model.input)[0]

    grads /= (K.sqrt(K.mean(K.square(grads))) + 1e-5)

    iterate = K.function([model.input], [loss, grads])
    
    # ADDED ONE OF THE CIFAR10 IMAGES FOR THE VISUALIZATION
    input_img_data = np.resize(X_train[4], (1, 32, 32, 3))

    step = 1.
    for i in range(40):
        loss_value, grads_value = iterate([input_img_data])
        input_img_data += grads_value * step
        
    img = input_img_data[0]
    return deprocess_image(img)

plt.imshow(generate_pattern('block3_conv1', 0))
plt.show()

for layer_name in ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1']:
    size = 32
    margin = 5

    # This a empty (black) image where we will store our results.
    results = np.zeros((8 * size + 7 * margin, 8 * size + 7 * margin, 3))

    for i in range(8):  # iterate over the rows of our results grid
        for j in range(8):  # iterate over the columns of our results grid
            # Generate the pattern for filter `i + (j * 8)` in `layer_name`
            filter_img = generate_pattern(layer_name, i + (j * 8), size=size)

            # Put the result in the square `(i, j)` of the results grid
            horizontal_start = 0
            horizontal_end = 32
            vertical_start = 0
            vertical_end = 32
            results[horizontal_start: 32, vertical_start: 32, :] = filter_img

    # Display the results grid
    plt.figure(figsize=(5, 5))
    plt.imshow((results * 255).astype(np.uint8))
    plt.show()

## PS 1.B - 20 points

In [this notebook](https://pantelis.github.io/artificial-intelligence/aiml-common/lectures/transfer-learning/transfer_learning_tutorial.html) we showcase _transfer learning_ using a pre-trained CNN model. 

Perform the fine-tunning and feature extraction methods of transfer learning using the same model as in PS-1A, for the class `ship`. 

Repeat the visualization of PS-1.A before and after  transfer learning and write a conclusive summary as to the relative value of the two methods.

In [None]:
#RESNET 50 WITH ONLY SHIPS
import numpy as np
import tensorflow as tf
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout
import matplotlib.pyplot as plt
from skimage.transform import resize
from keras.datasets import cifar10
from keras import backend as K

NUM_CLASSES = 1
BATCH_SIZE = 32
NUM_EPOCHS = 10

(X_train, y_train), (X_test, y_test) = cifar10.load_data()

train_mask = np.where(y_train == 8)[0]
test_mask = np.where(y_test == 8)[0]

X_train = X_train[train_mask] #image RGB values excluding all classes (except ships)
y_train = y_train[train_mask] #label values excluding all classes (except ships)

X_test = X_test[test_mask]  #image RGB values excluding all classes (except ships)
y_test = y_test[test_mask] #label values excluding all classes (except ships)
 
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

model = tf.keras.applications.ResNet50(include_top=False, weights='imagenet')
model.compile(loss="sparse_categorical_crossentropy",
                    optimizer="sgd",
                    metrics=["accuracy"])

model.summary()
#history = model.fit(X_train, y_train, epochs=NUM_EPOCHS, validation_data=(X_test, y_test), batch_size=BATCH_SIZE)


In [None]:
#CREATING A SHIP JPEG FROM CIFAR10
from PIL import Image
plt.imshow(X_train[4])
array = X_train[4] * 255
array = array.astype(np.uint8)
img = Image.fromarray(array)
img.save("ship.jpeg")

In [None]:
#VISUALIZING COVNETS

img_path = 'ship.jpeg'

# We preprocess the image into a 4D tensor
#from keras.preprocessing import image
import numpy as np

img = tf.keras.utils.load_img(img_path, target_size=(32, 32))
img_tensor = tf.keras.utils.img_to_array(img)
img_tensor = np.expand_dims(img_tensor, axis=0)
# Remember that the model was trained on inputs
# that were preprocessed in the following way:
img_tensor /= 255.

# Its shape is (1, 150, 150, 3)
print(img_tensor.shape)

import matplotlib.pyplot as plt

plt.imshow(img_tensor[0])
plt.show()

# Extracts the outputs of the top 8 layers:
layer_outputs = [layer.output for layer in model.layers[:8]][1:]
# Creates a model that will return these outputs, given the model input:
activation_model = tf.keras.models.Model(inputs=model.input, outputs=layer_outputs)

activations = activation_model.predict(img_tensor)

first_layer_activation = activations[0]
print(first_layer_activation.shape)

import matplotlib.pyplot as plt

plt.matshow(first_layer_activation[0, :, :, 1], cmap='viridis')
plt.show()

# These are the names of the layers, so can have them as part of our plot
layer_names = []
for layer in model.layers[:8]:
    layer_names.append(layer.name)

images_per_row = 16

# Now let's display our feature maps
for layer_name, layer_activation in zip(layer_names, activations):
    # This is the number of features in the feature map
    n_features = layer_activation.shape[-1]

    # The feature map has shape (1, size, size, n_features)
    size = layer_activation.shape[1]

    # We will tile the activation channels in this matrix
    n_cols = n_features // images_per_row
    display_grid = np.zeros((size * n_cols, images_per_row * size))

    # We'll tile each filter into this big horizontal grid
    for col in range(n_cols):
        for row in range(images_per_row):
            channel_image = layer_activation[0,
                                             :, :,
                                             col * images_per_row + row]
            # Post-process the feature to make it visually palatable
            channel_image -= channel_image.mean()
            channel_image /= channel_image.std()
            channel_image *= 64
            channel_image += 128
            channel_image = np.clip(channel_image, 0, 255).astype('uint8')
            display_grid[col * size : (col + 1) * size,
                         row * size : (row + 1) * size] = channel_image

    # Display the grid
    scale = 1. / size
    plt.figure(figsize=(scale * display_grid.shape[1],
                        scale * display_grid.shape[0]))
    plt.title(layer_name)
    plt.grid(False)
    plt.imshow(display_grid, aspect='auto', cmap='viridis')
    
plt.show()

In [None]:
#VISUALIZING CONVNET FILTERS 

tf.compat.v1.disable_eager_execution()

model = tf.keras.applications.vgg16.VGG16(weights='imagenet', include_top=False)

layer_name = 'block3_conv1'
filter_index = 0

layer_output = model.get_layer(layer_name).output
loss = tf.keras.backend.mean(layer_output[:, :, :, filter_index])
        
grads = K.gradients(loss, model.input)[0]

grads /= (tf.keras.backend.sqrt(tf.keras.backend.mean(tf.keras.backend.square(grads))) + 1e-5)

iterate = K.function([model.input], [loss, grads])

import numpy as np
loss_value, grads_value = iterate([np.zeros((1, 150, 150, 3))])

# We start from a gray image with some noise
input_img_data = np.random.random((1, 150, 150, 3)) * 20 + 128.

# Run gradient ascent for 40 steps
step = 1.  # this is the magnitude of each gradient update
for i in range(40):
    # Compute the loss value and gradient value
    loss_value, grads_value = iterate([input_img_data])
    # Here we adjust the input image in the direction that maximizes the loss
    input_img_data += grads_value * step

def deprocess_image(x):
    # normalize tensor: center on 0., ensure std is 0.1
    x -= x.mean()
    x /= (x.std() + 1e-5)
    x *= 0.1

    # clip to [0, 1]
    x += 0.5
    x = np.clip(x, 0, 1)

    # convert to RGB array
    x *= 255
    x = np.clip(x, 0, 255).astype('uint8')
    return x

def generate_pattern(layer_name, filter_index, size=150):
    layer_output = model.get_layer(layer_name).output
    loss = K.mean(layer_output[:, :, :, filter_index])

    grads = K.gradients(loss, model.input)[0]

    grads /= (K.sqrt(K.mean(K.square(grads))) + 1e-5)

    iterate = K.function([model.input], [loss, grads])
    
    # ADDED ONE OF THE CIFAR10 IMAGES FOR THE VISUALIZATION
    input_img_data = np.resize(X_train[4], (1, 32, 32, 3))

    step = 1.
    for i in range(40):
        loss_value, grads_value = iterate([input_img_data])
        input_img_data += grads_value * step
        
    img = input_img_data[0]
    return deprocess_image(img)

plt.imshow(generate_pattern('block3_conv1', 0))
plt.show()

for layer_name in ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1']:
    size = 32
    margin = 5

    # This a empty (black) image where we will store our results.
    results = np.zeros((8 * size + 7 * margin, 8 * size + 7 * margin, 3))

    for i in range(8):  # iterate over the rows of our results grid
        for j in range(8):  # iterate over the columns of our results grid
            # Generate the pattern for filter `i + (j * 8)` in `layer_name`
            filter_img = generate_pattern(layer_name, i + (j * 8), size=size)

            # Put the result in the square `(i, j)` of the results grid
            horizontal_start = 0
            horizontal_end = 32
            vertical_start = 0
            vertical_end = 32
            results[horizontal_start: 32, vertical_start: 32, :] = filter_img

    # Display the results grid
    plt.figure(figsize=(5, 5))
    plt.imshow((results * 255).astype(np.uint8))
    plt.show()