# Setup env and load data

This is a test project to try different computer vision models (CNN from the scratch, transfer learning etc.). As the test dataset it uses CIFAR-10:
https://www.cs.toronto.edu/~kriz/cifar.html

In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

print("TensorFlow version:", tf.__version__)


TensorFlow version: 2.18.0


In [None]:
# Mount Google Drive

from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Paths

In [None]:
from pathlib import Path
import json

path_to_google_drive_dir = Path("drive/MyDrive/ML-practice/computer-vision")

In [None]:
def get_path(name: str) -> Path:
  """
  Get path to the dir/file and
  generate the parent dir if it doesn't exist.
  """
  path = path_to_google_drive_dir / name

  # Create dirs
  if path.is_dir():
    path.mkdir(parents=True, exist_ok=True)
  else:
    path.parent.mkdir(parents=True, exist_ok=True)

  return path

In [None]:
CIRAF_TRAIN_DIR = "cifar10/train"
CIRAF_TEST_DIR = "cifar10/test"

# Data preprocessing

## Load dataset

In [None]:
# # Import dataset
# from tensorflow.keras.datasets import cifar10

# (X_train, y_train), (X_test, y_test) = cifar10.load_data()

In [None]:
# from tensorflow.keras.utils import to_categorical

# # Normalize pixel values
# X_train, X_test = X_train / 255.0, X_test / 255.0

# # One-hot encode labels
# y_train_cat = to_categorical(y_train, 10)
# y_test_cat = to_categorical(y_test, 10)

## Save datasets locally

In [None]:
import os
from PIL import Image

def save_cifar10_to_disk(X, y, base_path) -> None:
    os.makedirs(base_path, exist_ok=True)
    for i in range(10):  # classes 0–9
        os.makedirs(os.path.join(base_path, str(i)), exist_ok=True)

    for idx, (img, label) in enumerate(zip(X, y.flatten())):
        img_pil = Image.fromarray(img)
        img_path = os.path.join(base_path, str(label), f"{idx}.png")
        img_pil.save(img_path)

In [None]:
import os
from tensorflow.keras.datasets import cifar10

def save_datasets() -> None:
    if os.path.exists(CIRAF_TRAIN_DIR) and os.path.exists(CIRAF_TEST_DIR):
        print("Datasets already exists!")
        return

    (X_train, y_train), (X_test, y_test) = cifar10.load_data()
    save_cifar10_to_disk(X_train, y_train, CIRAF_TRAIN_DIR)
    save_cifar10_to_disk(X_test, y_test, CIRAF_TEST_DIR)

    print("Datasets is uploaded!")
    print(f"X_train: {len(X_train)}, y_train: {len(y_train)}, "
            f"X_test: {len(X_test)}, y_test: {len(y_test)}")

    # Clear variables from memory
    del X_train, y_train, X_test, y_test

save_datasets()

Datasets already exists!


In [None]:
INPUT_SHAPE: tuple[int, int, int] = (32, 32, 3)
INPUT_SIZE: tuple[int, int] = (32, 32)

## Get datagen

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator, DirectoryIterator


def create_train_generator(
    target_size: tuple[int, int] = INPUT_SIZE,
    rotation_range: int | None = None,
    horizontal_flip: bool | None = None,
    width_shift_range: float | None = None,
    height_shift_range: float | None = None,
    rescale: float | None = None,
    batch_size: int = 32,
    class_mode: str | None = "categorical",
) -> DirectoryIterator:
    train_datagen = ImageDataGenerator(
        rotation_range=rotation_range,
        horizontal_flip=horizontal_flip,
        width_shift_range=width_shift_range,
        height_shift_range=height_shift_range,
        rescale=rescale,
    )

    return train_datagen.flow_from_directory(
        CIRAF_TRAIN_DIR,
        target_size=target_size,
        batch_size=batch_size,
        class_mode=class_mode
    )


def create_val_generator(
    target_size: tuple[int, int] = INPUT_SIZE,
    rescale: float | None = 1./255,
    batch_size: int = 32,
    class_mode: str | None = "categorical",
) -> DirectoryIterator:
    val_datagen = ImageDataGenerator(
        rescale=rescale,
    )

    return val_datagen.flow_from_directory(
        CIRAF_TEST_DIR,
        target_size=target_size,
        batch_size=batch_size,
        class_mode=class_mode,
    )

In [None]:
# CIFAR-10 class names
class_names = [
    'airplane',   # 0
    'automobile', # 1
    'bird',       # 2
    'cat',        # 3
    'deer',       # 4
    'dog',        # 5
    'frog',       # 6
    'horse',      # 7
    'ship',       # 8
    'truck',      # 9
]

In [None]:
def show_images_from_generator(datagen: DirectoryIterator,
                               class_names: list[str] = class_names,
                               n: int = 5) -> None:
    """
    Display a row of images with their class labels using a data generator.

    Args:
        datagen (DirectoryIterator): data generator for loading images
        class_names (list): class name list for decoding
        n (int): number of images to show
    """
    plt.figure(figsize=(15, 3))
    for i in range(n):
        # Get the next batch of images and labels
        images, labels = next(datagen)
        plt.subplot(1, n, i+1)
        plt.imshow(images[0])  # Display the first image in the batch
        label_index = np.argmax(labels[0])  # Get the class index
        plt.title(class_names[label_index])
        plt.axis('off')
    plt.show()


In [None]:
datagen = create_train_generator()
# show_images_from_generator(datagen, n=5)

Found 50000 images belonging to 10 classes.


# Training

In [None]:
from typing import Callable
from pathlib import Path
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import Callback, History
from tensorflow.keras.models import load_model
import json

def load_or_train_model_from_directory(create_fn: Callable[[], Model],
                                       model_path: Path,
                                       history_path: Path,
                                       callbacks: list[Callback],
                                       train_generator,
                                       val_generator,
                                       epochs: int = 30) -> tuple[Model, History]:
    """
    Load a saved model or train a new one using directory-based generators and save it.

    Args:
        create_fn: function that returns a compiled Keras model.
        model_path: path to the saved model (.keras or .h5).
        history_path: path to save training history.
        callbacks: list of Keras callbacks (e.g., EarlyStopping, ModelCheckpoint).
        train_generator: DirectoryIterator for training data.
        val_generator: DirectoryIterator for validation data.
        epochs: number of training epochs.

    Returns:
        A trained or loaded Keras model and its History.
    """
    try:
        model = load_model(model_path)
        print(f"✅ Loaded saved model from {model_path}")

        history = json.load(open(history_path))
        print(f"✅ Loaded history from {history_path}")

    except Exception as e:
        print(f"⚠️ Could not load model or history: {e}")
        model = create_fn()

        history: History = model.fit(
            train_generator,
            validation_data=val_generator,
            epochs=epochs,
            callbacks=callbacks
        )

        model.save(model_path)
        print(f"💾 Model is saved to {model_path}")

        history_path.write_text(json.dumps(history.history))
        print(f"💾 History is saved to {history_path}")

    return model, history


In [None]:
from typing import Callable
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import Callback, History
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import load_model

def load_or_train_model(create_fn: Callable[[], Model],
                        model_path: Path,
                        history_path: Path,
                        callbacks: list[Callback],
                        X_train: tf.Tensor,
                        y_train_cat: tf.Tensor,
                        X_test: tf.Tensor,
                        y_test_cat: tf.Tensor,
                        datagen: ImageDataGenerator | None = None,
                        epochs: int = 30) -> tuple[Model, History]:
    """
    Load a saved model or train a new one and save it.

    Args:
        create_fn: function that returns a compiled Keras model.
        model_path: path to the saved model (.keras or .h5).
        datagen: an ImageDataGenerator for data augmentation.
        callbacks: list of Keras callbacks (e.g., EarlyStopping, ModelCheckpoint).
        X_train, y_train_cat: training data (optional augmented using datagen).
        X_test, y_test_cat: validation data.
        epochs: number of training epochs.

    Returns:
        A trained or loaded Keras model.
    """
    try:
        model = load_model(model_path)
        print(f"✅ Loaded saved model from {model_path}")

        history = json.load(open(history_path))
        print(f"✅ Loaded history from {history_path}")

    except Exception as e:
        print(f"⚠️ Could not load model or history: {e}")
        model = create_fn()

        if datagen:

            datagen.fit(X_train)

            history: History = model.fit(
                datagen.flow(X_train, y_train_cat, batch_size=64),
                validation_data=(X_test, y_test_cat),
                epochs=epochs,
                callbacks=callbacks
            )
        else:
            history: History = model.fit(
                X_train,
                y_train_cat,
                validation_data=(X_test, y_test_cat),
                epochs=epochs,
                callbacks=callbacks
            )

        model.save(model_path)
        print(f"💾 Model is saved to {model_path}")

        history_path.write_text(json.dumps(history.history))
        print(f"💾 History is saved to {history_path}")

    return model, history


# Evaluation

## TensorBoard

In [None]:
%load_ext tensorboard

In [None]:
import datetime

# Create a func to build a TensorBoard callback

def create_tensorboard_callback(path_to_logs: Path):
  current_time = datetime.datetime.now().strftime("%Y_%m_%d-%H:%M:%S")
  log_dir = path_to_logs / current_time
  return tf.keras.callbacks.TensorBoard(log_dir)

## Evaluation functions

In [None]:
# Histories dict to collect and compare the metrics
histories: dict[str, tf.keras.callbacks.History] = {}

In [None]:
def plot_confusion_matrix(model_name: str,
                          y_pred: np.ndarray,
                          y_true: np.ndarray) -> None:
    """
    Plot the confusion matrix of a model.
    """
    cm = confusion_matrix(y_true, y_pred)
    sns.heatmap(cm, annot=True, fmt="d", xticklabels=class_names, yticklabels=class_names)
    plt.title("Confusion Matrix for model: " + model_name)
    plt.show()

In [None]:
def plot_training_histories(histories: dict[str, tf.keras.callbacks.History],
                            metric: str = 'accuracy') -> None:
    plt.figure(figsize=(10, 5))
    for name, history in histories.items():
        plt.plot(history.history[metric], label=f'{name} Train')
        plt.plot(history.history[f'val_{metric}'], label=f'{name} Val')
    plt.title(f'{metric.title()} over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel(metric.title())
    plt.legend()
    plt.grid(True)
    plt.show()

#### Print general metrics

In [None]:
from sklearn.metrics import classification_report
import numpy as np
from tensorflow.keras.models import Model

def print_general_metrics(model: Model,
                          model_name: str,
                          model_description: str,
                          to_print_model_summary: bool = True,
                          to_print_classification_report: bool = True,
                          to_plot_confusion_matrix: bool = True) -> None:
    print("\nMODEL:", model_name)
    print(model_description, "\n")

    if to_print_model_summary:
        model.summary()

    val_generator = create_val_generator()

    y_pred = []
    y_true = []

    for X_batch, y_batch in val_generator:
        preds = model.predict(X_batch)
        y_pred.extend(np.argmax(preds, axis=1))
        y_true.extend(np.argmax(y_batch, axis=1))
        if len(y_true) >= val_generator.samples:
            break

    y_pred = np.array(y_pred[:val_generator.samples])
    y_true = np.array(y_true[:val_generator.samples])

    if to_print_classification_report:
        print(classification_report(y_true, y_pred))

    if to_plot_confusion_matrix:
        plot_confusion_matrix(model_name, y_pred, y_true)


In [None]:
from sklearn.metrics import accuracy_score, f1_score
from tensorflow.keras.models import Model

def print_accuracy_and_f1_score(model: Model,
                                model_name: str) -> None:
    val_generator = create_val_generator()

    y_pred = []
    y_true = []

    for X_batch, y_batch in val_generator:
        preds = model.predict(X_batch)
        y_pred.extend(np.argmax(preds, axis=1))
        y_true.extend(np.argmax(y_batch, axis=1))
        if len(y_true) >= val_generator.samples:
            break

    y_pred = np.array(y_pred[:val_generator.samples])
    y_true = np.array(y_true[:val_generator.samples])

    ac: float = accuracy_score(y_true, y_pred)
    f1: float = f1_score(y_true, y_pred, average='macro')  # Use 'macro' for multiclass

    print(f"{model_name} -> accuracy_score: {ac:.4f}, f1_score (macro): {f1:.4f}")


In [None]:
from sklearn.metrics import RocCurveDisplay, roc_curve, auc
from sklearn.preprocessing import label_binarize
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras.models import Model

def plot_roc_auc_score(model: Model, model_name: str, num_classes: int = 10) -> None:
    val_generator = create_val_generator()

    y_true = []
    y_pred_prob = []

    for X_batch, y_batch in val_generator:
        preds = model.predict(X_batch)
        y_pred_prob.append(preds)
        y_true.append(y_batch)
        if len(y_true) * val_generator.batch_size >= val_generator.samples:
            break

    y_true = np.vstack(y_true)[:val_generator.samples]
    y_pred_prob = np.vstack(y_pred_prob)[:val_generator.samples]

    # Ensure one-hot encoded labels
    if y_true.shape[1] != num_classes:
        raise ValueError(f"y_true must be one-hot encoded for multiclass ROC. Got shape {y_true.shape}")

    fpr = dict()
    tpr = dict()
    roc_auc = dict()

    for i in range(num_classes):
        fpr[i], tpr[i], _ = roc_curve(y_true[:, i], y_pred_prob[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # Plot ROC curves
    plt.figure(figsize=(10, 8))
    for i in range(num_classes):
        RocCurveDisplay(fpr=fpr[i], tpr=tpr[i], roc_auc=roc_auc[i], estimator_name=f'Class {i}').plot(ax=plt.gca())

    plt.title(f"{model_name} - ROC AUC Curves")
    plt.grid(True)
    plt.tight_layout()
    plt.show()


In [None]:
from sklearn.metrics import roc_auc_score

def print_roc_auc_score(model: Model, model_name: str, num_classes: int = 10) -> None:
    val_generator = create_val_generator()

    y_true = []
    y_pred_prob = []

    for X_batch, y_batch in val_generator:
        preds = model.predict(X_batch)
        y_pred_prob.append(preds)
        y_true.append(y_batch)
        if len(y_true) * val_generator.batch_size >= val_generator.samples:
            break

    y_true = np.vstack(y_true)[:val_generator.samples]
    y_pred_prob = np.vstack(y_pred_prob)[:val_generator.samples]

    score = roc_auc_score(y_true, y_pred_prob, average='macro', multi_class='ovr')
    print(f"{model_name} -> ROC AUC (macro-average OvR): {score:.4f}")

# Models

## 1. CNN from scratch

### Constructors

In [None]:
models_to_evaluate_scratch_cnn: list[tuple[Model, str, str]] = []  # (model, model_name, description)

In [None]:
from dataclasses import dataclass, field

@dataclass
class ModelConfigCCNScratch:
    model_name: str = "model_1_scratch_cnn"
    num_classes: int = len(class_names)
    metrics: list[str] = field(default_factory=lambda: ["accuracy"])
    optimizer: str = "adam"
    loss_function: str = "categorical_crossentropy"
    activation_function: str = "relu"
    conv_layers: list[tuple[int, int, int]] = field(default_factory=lambda: [(32, 3, 3), (64, 3, 3)])
    dense_layers: list[int] = field(default_factory=lambda: [64])
    epochs: int = 30
    input_shape: tuple[int, int, int] = INPUT_SHAPE
    create_model_fn: Callable[[], Model] | None = None
    class_mode: str | None = "categorical"
    datagen_transformations: dict = field(default_factory=lambda: {
        "rotation_range": 15,
        "horizontal_flip": True,
        "width_shift_range": 0.1,
        "height_shift_range": 0.1,
        "rescale": 1./255,
    })

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint


def train_model_cnn_scratch(model_config: ModelConfigCCNScratch,
                            models_to_evaluate: list[tuple[Model, str, str]] = models_to_evaluate_scratch_cnn) -> None:
    model_description = f"""
    Model with {len(model_config.conv_layers)} conv layers and {len(model_config.dense_layers)} dense layers.
    Data augmentation: {model_config.datagen_transformations}.
    Activation function: {model_config.activation_function}.
    Optimizer: {model_config.optimizer}.
    Loss function: {model_config.loss_function}.
    Metrics: {model_config.metrics}.
    """

    model_name = model_config.model_name
    input_shape = model_config.input_shape
    create_model_fn = model_config.create_model_fn
    activation_function = model_config.activation_function
    optimizer = model_config.optimizer
    conv_layers = model_config.conv_layers
    dense_layers = model_config.dense_layers
    loss_function = model_config.loss_function
    metrics = model_config.metrics
    num_classes = model_config.num_classes

    def create_model_scratch_cnn() -> Model:
        layers = [
            tf.keras.layers.Input(shape=input_shape),
        ]

        for conv_layer in conv_layers:
            layers.append(tf.keras.layers.Conv2D(conv_layer[0], (conv_layer[1], conv_layer[2]), activation=activation_function))
            layers.append(tf.keras.layers.MaxPooling2D())

        layers.append(tf.keras.layers.Flatten())

        for dense_layer in dense_layers:
            layers.append(tf.keras.layers.Dense(dense_layer, activation=activation_function))

        layers.append(tf.keras.layers.Dense(num_classes, activation="softmax"))

        model = tf.keras.Sequential(layers)
        model.compile(optimizer=optimizer, loss=loss_function, metrics=metrics)
        return model

    # Paths
    path_to_model = get_path(f"{model_name}.keras")
    path_to_model_history = get_path(f"{model_name}_history.json")
    path_to_model_logs = get_path(f"{model_name}_logs")

    train_generator = create_train_generator(
        rotation_range=model_config.datagen_transformations.get("rotation_range"),
        horizontal_flip=model_config.datagen_transformations.get("horizontal_flip"),
        width_shift_range=model_config.datagen_transformations.get("width_shift_range"),
        height_shift_range=model_config.datagen_transformations.get("height_shift_range"),
        rescale=model_config.datagen_transformations.get("rescale"),
    )
    val_generator = create_val_generator(
        rescale=model_config.datagen_transformations["rescale"],
    )

    # Train (load) model
    trained_model, model_history = load_or_train_model_from_directory(
        create_fn=create_model_fn or create_model_scratch_cnn,
        model_path=path_to_model,
        history_path=path_to_model_history,
        callbacks=[
            EarlyStopping(patience=5, restore_best_weights=True),
            ModelCheckpoint(str(path_to_model), save_best_only=True),
            create_tensorboard_callback(path_to_model_logs),
        ],
        train_generator=train_generator,
        val_generator=val_generator,
    )

    # Save history
    histories[model_name] = model_history
    models_to_evaluate.append((trained_model, model_name, model_description))


### 1.1. Base

In [None]:
model_config = ModelConfigCCNScratch()

train_model_cnn_scratch(model_config=model_config)

Found 50000 images belonging to 10 classes.
Found 10000 images belonging to 10 classes.
✅ Loaded saved model from drive/MyDrive/ML-practice/computer-vision/model_1_scratch_cnn.keras
✅ Loaded history from drive/MyDrive/ML-practice/computer-vision/model_1_scratch_cnn_history.json


In [None]:
# %tensorboard --logdir 'drive/MyDrive/ML-practice/computer-vision/model_1_scratch_cnn_logs'

### 1.2. Base +1 Conv2D layer

In [None]:
model_config = ModelConfigCCNScratch(
    model_name="model_2_scratch_cnn",
    conv_layers=[(32, 3, 3), (64, 3, 3), (128, 3, 3)],
)

train_model_cnn_scratch(model_config=model_config)

Found 50000 images belonging to 10 classes.
Found 10000 images belonging to 10 classes.
✅ Loaded saved model from drive/MyDrive/ML-practice/computer-vision/model_2_scratch_cnn.keras
✅ Loaded history from drive/MyDrive/ML-practice/computer-vision/model_2_scratch_cnn_history.json


In [None]:
# %tensorboard --logdir 'drive/MyDrive/ML-practice/computer-vision/model_2_scratch_cnn_logs'

### 1.3. Base -1 Conv2D layer

In [None]:
model_config = ModelConfigCCNScratch(
    model_name="model_3_scratch_cnn",
    conv_layers=[(32, 3, 3)],
)

train_model_cnn_scratch(model_config=model_config)

Found 50000 images belonging to 10 classes.
Found 10000 images belonging to 10 classes.
✅ Loaded saved model from drive/MyDrive/ML-practice/computer-vision/model_3_scratch_cnn.keras
✅ Loaded history from drive/MyDrive/ML-practice/computer-vision/model_3_scratch_cnn_history.json


### 1.4. Base increased kernel_size (5, 5)

In [None]:
model_config = ModelConfigCCNScratch(
    model_name="model_4_scratch_cnn",
    conv_layers=[(32, 5, 5), (64, 5, 5)],
)

train_model_cnn_scratch(model_config=model_config)

Found 50000 images belonging to 10 classes.
Found 10000 images belonging to 10 classes.
✅ Loaded saved model from drive/MyDrive/ML-practice/computer-vision/model_4_scratch_cnn.keras
✅ Loaded history from drive/MyDrive/ML-practice/computer-vision/model_4_scratch_cnn_history.json


### 1.5. Base with activation_function="tanh"

In [None]:
model_config = ModelConfigCCNScratch(
    model_name="model_5_scratch_cnn",
    activation_function="tanh",
)

train_model_cnn_scratch(model_config=model_config)

Found 50000 images belonging to 10 classes.
Found 10000 images belonging to 10 classes.
✅ Loaded saved model from drive/MyDrive/ML-practice/computer-vision/model_5_scratch_cnn.keras
✅ Loaded history from drive/MyDrive/ML-practice/computer-vision/model_5_scratch_cnn_history.json


### 1.6. Base with activation_function="leaky_relu"

In [None]:
model_config = ModelConfigCCNScratch(
    model_name="model_6_scratch_cnn",
    activation_function="leaky_relu",
)

train_model_cnn_scratch(model_config=model_config)

Found 50000 images belonging to 10 classes.
Found 10000 images belonging to 10 classes.
✅ Loaded saved model from drive/MyDrive/ML-practice/computer-vision/model_6_scratch_cnn.keras
✅ Loaded history from drive/MyDrive/ML-practice/computer-vision/model_6_scratch_cnn_history.json


### 1.7. Base with optimizer="sgd"

In [None]:
model_config = ModelConfigCCNScratch(
    model_name="model_7_scratch_cnn",
    optimizer="sgd",
)

train_model_cnn_scratch(model_config=model_config)

Found 50000 images belonging to 10 classes.
Found 10000 images belonging to 10 classes.
✅ Loaded saved model from drive/MyDrive/ML-practice/computer-vision/model_7_scratch_cnn.keras
✅ Loaded history from drive/MyDrive/ML-practice/computer-vision/model_7_scratch_cnn_history.json


### 1.8. Base with loss_function="sparse_categorical_crossentropy"

In [None]:
model_config = ModelConfigCCNScratch(
    model_name="model_8_scratch_cnn",
    loss_function="sparse_categorical_crossentropy",
    class_mode=None,
)

train_model_cnn_scratch(model_config=model_config)

Found 50000 images belonging to 10 classes.
Found 10000 images belonging to 10 classes.
✅ Loaded saved model from drive/MyDrive/ML-practice/computer-vision/model_8_scratch_cnn.keras
✅ Loaded history from drive/MyDrive/ML-practice/computer-vision/model_8_scratch_cnn_history.json


### 1.9. Base with dense_layers=[32]

In [None]:
model_config = ModelConfigCCNScratch(
    model_name="model_9_scratch_cnn",
    dense_layers=[32],
)

train_model_cnn_scratch(model_config=model_config)

Found 50000 images belonging to 10 classes.
Found 10000 images belonging to 10 classes.
✅ Loaded saved model from drive/MyDrive/ML-practice/computer-vision/model_9_scratch_cnn.keras
✅ Loaded history from drive/MyDrive/ML-practice/computer-vision/model_9_scratch_cnn_history.json


### 1.10. Base with dense_layers=[128]

In [None]:
model_config = ModelConfigCCNScratch(
    model_name="model_10_scratch_cnn",
    dense_layers=[128],
)

train_model_cnn_scratch(model_config=model_config)

Found 50000 images belonging to 10 classes.
Found 10000 images belonging to 10 classes.
✅ Loaded saved model from drive/MyDrive/ML-practice/computer-vision/model_10_scratch_cnn.keras
✅ Loaded history from drive/MyDrive/ML-practice/computer-vision/model_10_scratch_cnn_history.json


### 1.11. Base without datagen

In [None]:
model_config = ModelConfigCCNScratch(
    model_name="model_11_scratch_cnn",
    datagen_transformations={"rescale": 1./255},
)

train_model_cnn_scratch(model_config=model_config)

Found 50000 images belonging to 10 classes.
Found 10000 images belonging to 10 classes.
✅ Loaded saved model from drive/MyDrive/ML-practice/computer-vision/model_11_scratch_cnn.keras
✅ Loaded history from drive/MyDrive/ML-practice/computer-vision/model_11_scratch_cnn_history.json


### Evaluate built models

In [None]:
for model, model_name, model_description in models_to_evaluate_scratch_cnn:
    # print_general_metrics(
    #     model=model,
    #     model_name=model_name,
    #     model_description=model_description,
    #     to_print_model_summary = False,
    #     to_print_classification_report = True,
    #     to_plot_confusion_matrix = False,
    #   )
    print_accuracy_and_f1_score(model, model_name)
    print(model_description)
    print("-------------------------------------------------------------------\n")


Found 10000 images belonging to 10 classes.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━

model_1_scratch_cnn -> accuracy_score: 0.7214, f1_score: 0.7176758024653712

    Model with 2 conv layers and 1 dense layers.
    Data augmentation: true.
    Activation function: relu.
    Optimizer: adam.
    Loss function: categorical_crossentropy.
    Metrics: ['accuracy'].
    
-------------------------------------------------------------------

model_2_scratch_cnn -> accuracy_score: 0.7531, f1_score: 0.7485525893585591

    Model with 3 conv layers and 1 dense layers.
    Data augmentation: true.
    Activation function: relu.
    Optimizer: adam.
    Loss function: categorical_crossentropy.
    Metrics: ['accuracy'].
    
-------------------------------------------------------------------

model_3_scratch_cnn -> accuracy_score: 0.6319, f1_score: 0.6201943895053794

    Model with 1 conv layers and 1 dense layers.
    Data augmentation: true.
    Activation function: relu.
    Optimizer: adam.
    Loss function: categorical_crossentropy.
    Metrics: ['accuracy'].
    
-------------------------------------------------------------------

model_4_scratch_cnn -> accuracy_score: 0.7174, f1_score: 0.7111854951786144

    Model with 2 conv layers and 1 dense layers.
    Data augmentation: true.
    Activation function: relu.
    Optimizer: adam.
    Loss function: categorical_crossentropy.
    Metrics: ['accuracy'].
    
-------------------------------------------------------------------

model_5_scratch_cnn -> accuracy_score: 0.7069, f1_score: 0.698806651692782

    Model with 2 conv layers and 1 dense layers.
    Data augmentation: true.
    Activation function: tanh.
    Optimizer: adam.
    Loss function: categorical_crossentropy.
    Metrics: ['accuracy'].
    
-------------------------------------------------------------------

model_6_scratch_cnn -> accuracy_score: 0.7614, f1_score: 0.7578671028295253

    Model with 2 conv layers and 1 dense layers.
    Data augmentation: true.
    Activation function: leaky_relu.
    Optimizer: adam.
    Loss function: categorical_crossentropy.
    Metrics: ['accuracy'].
    
-------------------------------------------------------------------

model_7_scratch_cnn -> accuracy_score: 0.6772, f1_score: 0.6723452922515232

    Model with 2 conv layers and 1 dense layers.
    Data augmentation: true.
    Activation function: relu.
    Optimizer: sgd.
    Loss function: categorical_crossentropy.
    Metrics: ['accuracy'].
    
-------------------------------------------------------------------

model_8_scratch_cnn -> accuracy_score: 0.7347, f1_score: 0.7326892475544158

    Model with 2 conv layers and 1 dense layers.
    Data augmentation: true.
    Activation function: relu.
    Optimizer: adam.
    Loss function: sparse_categorical_crossentropy.
    Metrics: ['accuracy'].
    
-------------------------------------------------------------------

model_9_scratch_cnn -> accuracy_score: 0.7194, f1_score: 0.7141690110899095

    Model with 2 conv layers and 1 dense layers.
    Data augmentation: true.
    Activation function: relu.
    Optimizer: adam.
    Loss function: categorical_crossentropy.
    Metrics: ['accuracy'].
    
-------------------------------------------------------------------

model_10_scratch_cnn -> accuracy_score: 0.7663, f1_score: 0.7636443831904395

    Model with 2 conv layers and 1 dense layers.
    Data augmentation: true.
    Activation function: relu.
    Optimizer: adam.
    Loss function: categorical_crossentropy.
    Metrics: ['accuracy'].
    
-------------------------------------------------------------------

model_11_scratch_cnn -> accuracy_score: 0.7003, f1_score: 0.6989571244610545

    Model with 2 conv layers and 1 dense layers.
    Data augmentation: false.
    Activation function: relu.
    Optimizer: adam.
    Loss function: categorical_crossentropy.
    Metrics: ['accuracy'].
    
-------------------------------------------------------------------

### Intermediate results

The intermediate results shows that the best performing models are:

1. Model 10 (accuracy: 0.7663, f1: 0.7636): Base configuration with 2 conv layers and 128 dense layer
2. Model 6 (accuracy: 0.7614, f1: 0.7579): Similar to base but using LeakyReLU activation
3. Model 2 (accuracy: 0.7531, f1: 0.7486): Using 3 conv layers instead of 2

Key findings:
- Having 2-3 convolutional layers performs better than just 1 layer (Model 3: 0.63 accuracy)
- LeakyReLU activation slightly outperforms regular ReLU
- Adam optimizer works better than SGD (Model 7 with SGD: 0.68)

Potential improvements to explore:
- Combine LeakyReLU with 3 conv layers
- Try different learning rates with Adam optimizer
- Experiment with different conv layer filter sizes
- Add batch normalization layers
- Try deeper architectures with more dense layers

In [None]:
models_to_evaluate_scratch_cnn_upd: list[tuple[Model, str, str]] = []  # (model, model_name, description)

### 1.13. Updated base (1)

In [None]:
model_config = ModelConfigCCNScratch(
    model_name="model_13_scratch_cnn",
    conv_layers=[(32, 3, 3), (64, 3, 3), (128, 3, 3)],
    dense_layers=[128],
    activation_function="leaky_relu",
)

train_model_cnn_scratch(
    model_config=model_config,
    models_to_evaluate=models_to_evaluate_scratch_cnn_upd,
)

Found 50000 images belonging to 10 classes.
Found 10000 images belonging to 10 classes.
✅ Loaded saved model from drive/MyDrive/ML-practice/computer-vision/model_13_scratch_cnn.keras
✅ Loaded history from drive/MyDrive/ML-practice/computer-vision/model_13_scratch_cnn_history.json


### 1.14. Updated base (2)

In [None]:
model_config = ModelConfigCCNScratch(
    model_name="model_14_scratch_cnn",
    conv_layers=[(32, 3, 3), (64, 3, 3), (128, 3, 3)],
    dense_layers=[128, 256],
    activation_function="leaky_relu",
)

train_model_cnn_scratch(
    model_config=model_config,
    models_to_evaluate=models_to_evaluate_scratch_cnn_upd,
)

Found 50000 images belonging to 10 classes.
Found 10000 images belonging to 10 classes.
✅ Loaded saved model from drive/MyDrive/ML-practice/computer-vision/model_14_scratch_cnn.keras
✅ Loaded history from drive/MyDrive/ML-practice/computer-vision/model_14_scratch_cnn_history.json


### 1.15. Updated base (3)

In [None]:
model_config = ModelConfigCCNScratch(
    model_name="model_15_scratch_cnn",
    conv_layers=[(32, 3, 3), (64, 3, 3), (128, 3, 3)],
    dense_layers=[128],
    activation_function="relu",
)

train_model_cnn_scratch(
    model_config=model_config,
    models_to_evaluate=models_to_evaluate_scratch_cnn_upd,
)

Found 50000 images belonging to 10 classes.
Found 10000 images belonging to 10 classes.
✅ Loaded saved model from drive/MyDrive/ML-practice/computer-vision/model_15_scratch_cnn.keras
✅ Loaded history from drive/MyDrive/ML-practice/computer-vision/model_15_scratch_cnn_history.json


### Evaluate updated models

In [None]:
for model, model_name, model_description in models_to_evaluate_scratch_cnn_upd:
    # print_general_metrics(
    #     model=model,
    #     model_name=model_name,
    #     model_description=model_description,
    #     to_print_model_summary = False,
    #     to_print_classification_report = True,
    #     to_plot_confusion_matrix = False,
    #   )
    print_accuracy_and_f1_score(model, model_name)
    print(model_description)
    print("-------------------------------------------------------------------\n")


In [None]:
for model, model_name, model_description in models_to_evaluate_scratch_cnn_upd:
    print_general_metrics(
        model=model,
        model_name=model_name,
        model_description=model_description,
        to_print_model_summary = False,
        to_print_classification_report = True,
        to_plot_confusion_matrix = False,
      )
    # print(model_description)
    print("-------------------------------------------------------------------\n")


## 2. Transfer learning

### Constructors

In [None]:
models_to_evaluate_transfer_learning: list[tuple[Model, str, str]] = []  # (model, model_name, description)

In [None]:
IMG_SIZE_MOBILE_NET = 96  # match MobileNetV2 expected input
INPUT_SHAPE_MOBILE_NET = (IMG_SIZE_MOBILE_NET, IMG_SIZE_MOBILE_NET)

# # Resize training and test sets
# X_train_resized = tf.image.resize(X_train, INPUT_SHAPE_MOBILE_NET)
# X_test_resized = tf.image.resize(X_test, INPUT_SHAPE_MOBILE_NET)

In [None]:
from dataclasses import dataclass, field
from tensorflow.keras.applications import MobileNetV2

@dataclass
class ModelConfigTransferLearning:
    model_name: str = "model_transfer_mobilenetv2"
    num_classes: int = len(class_names)
    metrics: list[str] = field(default_factory=lambda: ["accuracy"])
    optimizer: str = "adam"
    loss_function: str = "categorical_crossentropy"
    activation_function: str = "softmax"
    epochs: int = 20
    input_shape: tuple[int, int, int] = (IMG_SIZE_MOBILE_NET, IMG_SIZE_MOBILE_NET, 3)
    create_model_fn: Callable[[], Model] | None = None
    class_mode: str | None = "categorical"
    datagen_transformations: dict = field(default_factory=lambda: {
        "rotation_range": 15,
        "horizontal_flip": True,
        "width_shift_range": 0.1,
        "height_shift_range": 0.1,
        "rescale": 1./255,
    })
    base_model_fn: Callable[[], Model] = lambda: MobileNetV2(input_shape=(96, 96, 3), include_top=False, weights='imagenet')


In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Input
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

def train_model_transfer_learning(
        model_config: ModelConfigTransferLearning,
        models_to_evaluate: list[tuple[Model, str, str]] = models_to_evaluate_transfer_learning,
    ) -> None:

    activation_function = model_config.activation_function

    def create_transfer_model() -> Model:
        base_model = model_config.base_model_fn()
        base_model.trainable = False

        inputs = Input(shape=model_config.input_shape)
        x = preprocess_input(inputs)
        x = base_model(x, training=False)
        x = GlobalAveragePooling2D()(x)
        outputs = Dense(model_config.num_classes, activation=activation_function)(x)

        model = tf.keras.Model(inputs, outputs)
        model.compile(optimizer=model_config.optimizer, loss=model_config.loss_function, metrics=model_config.metrics)
        return model

    path_to_model = get_path(f"{model_config.model_name}.keras")
    path_to_model_history = get_path(f"{model_config.model_name}_history.json")
    path_to_model_logs = get_path(f"{model_config.model_name}_logs")

    train_generator = create_train_generator(
        rotation_range=model_config.datagen_transformations["rotation_range"],
        horizontal_flip=model_config.datagen_transformations["horizontal_flip"],
        width_shift_range=model_config.datagen_transformations["width_shift_range"],
        height_shift_range=model_config.datagen_transformations["height_shift_range"],
        rescale=model_config.datagen_transformations["rescale"],
        target_size=(IMG_SIZE_MOBILE_NET, IMG_SIZE_MOBILE_NET),
    )
    val_generator = create_val_generator(
        rescale=model_config.datagen_transformations["rescale"],
        target_size=(IMG_SIZE_MOBILE_NET, IMG_SIZE_MOBILE_NET),
    )

    trained_model, model_history = load_or_train_model_from_directory(
        create_fn=create_transfer_model,
        model_path=path_to_model,
        history_path=path_to_model_history,
        callbacks=[
            EarlyStopping(patience=5, restore_best_weights=True),
            ModelCheckpoint(str(path_to_model), save_best_only=True),
            create_tensorboard_callback(path_to_model_logs),
        ],
        train_generator=train_generator,
        val_generator=val_generator,
    )

    model_description = f"""
    Transfer learning with {model_config.base_model_fn().__class__.__name__} base.
    Data augmentation: {'true' if model_config.datagen else 'false'}.
    Optimizer: {model_config.optimizer}. Loss: {model_config.loss_function}. Metrics: {model_config.metrics}
    """

    histories[model_config.model_name] = model_history
    models_to_evaluate.append((trained_model, model_config.model_name, model_description))

### 2.1. Base

In [None]:
model_config = ModelConfigTransferLearning()

train_model_transfer_learning(model_config=model_config)

## 3. Fine-Tuning

In [None]:
models_to_evaluate_fine_tuning: list[tuple[Model, str, str]] = []  # (model, model_name, description)

In [None]:
@dataclass
class ModelConfigFineTuning(ModelConfigTransferLearning):
    model_name: str = "model_transfer_mobilenetv2_finetuned"
    fine_tune_at: int = 100  # Unfreeze from this layer

In [None]:
def train_model_fine_tuning(
    model_config: ModelConfigFineTuning,
    models_to_evaluate: list[tuple[Model, str, str]] = models_to_evaluate_fine_tuning,
) -> None:

    def create_finetuned_model() -> Model:
        base_model = model_config.base_model_fn()
        base_model.trainable = True

        # Freeze all layers before `fine_tune_at`
        for layer in base_model.layers[:model_config.fine_tune_at]:
            layer.trainable = False

        inputs = Input(shape=model_config.input_shape)
        x = preprocess_input(inputs)
        x = base_model(x, training=True)
        x = GlobalAveragePooling2D()(x)
        outputs = Dense(model_config.num_classes, activation=model_config.activation_function)(x)

        model = tf.keras.Model(inputs, outputs)
        model.compile(optimizer=model_config.optimizer,
                      loss=model_config.loss_function,
                      metrics=model_config.metrics)
        return model

    path_to_model = get_path(f"{model_config.model_name}.keras")
    path_to_model_history = get_path(f"{model_config.model_name}_history.json")
    path_to_model_logs = get_path(f"{model_config.model_name}_logs")

    train_generator = create_train_generator(
        rotation_range=model_config.datagen_transformations["rotation_range"],
        horizontal_flip=model_config.datagen_transformations["horizontal_flip"],
        width_shift_range=model_config.datagen_transformations["width_shift_range"],
        height_shift_range=model_config.datagen_transformations["height_shift_range"],
        rescale=model_config.datagen_transformations["rescale"],
        target_size=(IMG_SIZE_MOBILE_NET, IMG_SIZE_MOBILE_NET),
    )

    val_generator = create_val_generator(
        rescale=model_config.datagen_transformations["rescale"],
        target_size=(IMG_SIZE_MOBILE_NET, IMG_SIZE_MOBILE_NET),
    )

    trained_model, model_history = load_or_train_model_from_directory(
        create_fn=create_finetuned_model,
        model_path=path_to_model,
        history_path=path_to_model_history,
        callbacks=[
            EarlyStopping(patience=5, restore_best_weights=True),
            ModelCheckpoint(str(path_to_model), save_best_only=True),
            create_tensorboard_callback(path_to_model_logs),
        ],
        train_generator=train_generator,
        val_generator=val_generator,
    )

    model_description = f"""
    Fine-tuned MobileNetV2 from layer {model_config.fine_tune_at}.
    All layers before this are frozen.
    Optimizer: {model_config.optimizer}, Loss: {model_config.loss_function}, Metrics: {model_config.metrics}
    """

    histories[model_config.model_name] = model_history
    models_to_evaluate.append((trained_model, model_config.model_name, model_description))


In [None]:
model_config = ModelConfigFineTuning(fine_tune_at=100)
train_model_fine_tuning(model_config=model_config)
