# Transfer learning

## Try a pre-trained model on random (AI-generated) pics

In [None]:
from keras.utils import load_img, img_to_array
from keras.applications.resnet50 import ResNet50

pretrained_model = ResNet50(weights='imagenet')
print(pretrained_model.summary())

In [None]:
size_ResNet = (224, 224)
img_path = "../data/leonardo_rabbit.jpg"
img = load_img(img_path, target_size=size_ResNet)
img

In [None]:
x = img_to_array(img)
x.shape

In [None]:
# make sure it is one sample with 3 dimensions
import numpy as np

x = np.expand_dims(x, axis=0)
x.shape

In [None]:
preds = pretrained_model.predict(x)
preds.shape

In [None]:
from keras.applications.resnet50 import decode_predictions
decoded_preds = decode_predictions(preds, top=3)
for i in decoded_preds[0]:
    print(f"{i[1]} with prob {i[2]:.2%}")

## Apply transfer learning for a "real" task

Fun [story](https://youtu.be/vIci3C4JkL0) from HBO's Silicon Valley series from 2017: Shazam for food.

Download the data from [Kaggle](https://www.kaggle.com/datasets/dansbecker/hot-dog-not-hot-dog).

In [None]:
from os import path

folder_where_data_resides = "../data/hotdog/"

# Look at an example hot_dog:
hot_dog = load_img(path.join(folder_where_data_resides, "train/hot_dog/1000288.jpg"), target_size=size_ResNet)
hot_dog

In [None]:
# Look at an example not hot_dog
not_hot_dog = load_img(path.join(folder_where_data_resides, "train/not_hot_dog/100135.jpg"), target_size=size_ResNet)
not_hot_dog

In [21]:
def preprocess_image(img):
    return np.expand_dims(img_to_array(img), axis=0)

def predict_top_classes(preprocessed_img):
    preds = pretrained_model.predict(preprocessed_img)
    decoded_preds = decode_predictions(preds, top=3)
    for i in decoded_preds[0]:
        print(f"{i[1]} with prob {i[2]:.2%}")


In [None]:
print("Prediction for a sample hot dog:")
predict_top_classes(preprocess_image(hot_dog))

print("\nPrediction for a sample not hot dog:")
predict_top_classes(preprocess_image(not_hot_dog))

### Load the whole data in batches

In [None]:
# Load data
from keras.utils import image_dataset_from_directory

train_dataset = image_dataset_from_directory(path.join(folder_where_data_resides, "train"), batch_size=32, image_size=size_ResNet)
test_dataset = image_dataset_from_directory(path.join(folder_where_data_resides, "train"), batch_size=32, image_size=size_ResNet)

train_dataset

In [None]:
for images, labels in train_dataset.take(1):
    print("Batch shape:", images.shape, labels.shape)

In [None]:
import matplotlib.pyplot as plt
from keras.utils import array_to_img

# Take 1 batch and look for the first 15 images:
for images, labels in train_dataset.take(1):
    for i in range(15):
        plt.subplot(3, 5, i+1)
        plt.imshow(array_to_img(images[i]))
        plt.title(train_dataset.class_names[labels[i]])
        plt.axis("off")
    plt.show()

### Build a CNN for this task from scratch

**TODO**: Build a CNN model from scratch and evaluate its performance.

In [None]:
# Use the model we ended up with the digit recognition example
from keras.models import Sequential
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten, Dropout

# Build the model

# Compile the model and print summary

# Fit the model (with early stopping) and look at validation accuracy


### Fine-tune a pre-defined model for our task

We can fine-tune a pre-trained model for our purposes by modifying the last few layers, and learn only the new parameters on new data (freezing the weights of the original network).

In [None]:
# Load pre-trained ResNet50 model without the top layer as we do not want to classify for 1000 classes but only simple binary
base_model = ResNet50(weights="imagenet", include_top=False, input_shape=size_ResNet + (3,))  # concatenating tuples to account for three channels

print(base_model.summary())

Some of the parameters are non-trainable: these correspond to batch normalization layers that applies a transformation to maintain the mean output close to 0 and the output standard deviation close to 1. Mean and standard deviation are parameters that are not updated during backpropagation.

In [None]:
# Freeze the layers of the pre-trained model
base_model.trainable = False

len(base_model.trainable_variables)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense

fine_tuned_model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(256, activation="relu"),
    Dense(1, activation="sigmoid")
])

# Compile the model
fine_tuned_model.compile(optimizer="adam", loss='binary_crossentropy', metrics=['accuracy'])

print(fine_tuned_model.summary())

In [None]:
from keras.callbacks import EarlyStopping

# Fit the fine-tuned the model (batch size already defined)
fine_tuned_model.fit(train_dataset, epochs=10, validation_data=test_dataset, callbacks=[EarlyStopping(monitor='val_accuracy', patience=5)])

# Evaluate the model
loss, accuracy = fine_tuned_model.evaluate(test_dataset)
print(f"\nTest accuracy: {accuracy:.2%}")

Modern LLMs also follow the transfer learning pattern: they are pre-trained on vast amounts of general data (basically: the internet), then are fine-tuned on specific tasks.