# elec0135-assignment-cld

## Workings Notebook

I found it useful to work in Kaggle (given the 30 hours free per week of GPU time), then separate out the code into modules.

I've kept this notebook in the repo to show a track record of commits and for my own future reference.

### Kaggle Specific Code

In [5]:
# # Useful cleanups to reset status
# !rm -rf /kaggle/working/data
# !rm /kaggle/working/data.zip
# !rm results.csv
# !rm -rf /kaggle/working/artefacts

In [6]:
# also required in the `/interactive_runner.ipynb`
!pip install gdown



### `/model/util.py`

In [7]:
"""
Functions for creating and training models, used across the various tasks.
"""
import keras
import numpy as np
import pandas as pd
import tensorflow as tf

from dataclasses import dataclass
from pathlib import Path
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from tensorflow.data import Dataset
from tensorflow.keras import layers, callbacks
from tensorflow.keras.models import Model
from tensorflow.keras.layers import BatchNormalization, Conv2D, Dense, Dropout, Flatten, GlobalAveragePooling2D, MaxPooling2D
from typing import NamedTuple, Tuple


class Params(NamedTuple):
    """
    Job Parameters Struct
    """
    image_size: int
    batch_size: int
    epochs: int
    epsilon: float
    early_stopping: bool
    early_stopping_patience: int
    adjust_learning_rate: bool
        
        
class ResultCollector():
    """
    Utility class to collect up and output results from tasks.
    """
    
    TRAIN_DETAILS_FILE = "train_details.csv"
    TEST_SCORES_FILE = "test_scores.csv"
    
    def __init__(
        self,
        path: Path
    ):
        self.path = path
        self.train_details = pd.DataFrame
        self.test_scores = pd.DataFrame

    def add_task_results(self, df_train, df_test) -> None:
        self.add_train_details(df_train)
        self.add_test_scores(df_test)
        
    def add_train_details(self, df: pd.DataFrame) -> None:
        if self.train_details.empty:
            self.train_details = df
        else:
            self.train_details = pd.concat([self.train_details, df])
        
        self._save(self.train_details, self.TRAIN_DETAILS_FILE)        

    def get_train_details(self) -> pd.DataFrame:
        return self.train_details
    
    def add_test_scores(self, df: pd.DataFrame) -> None:
        if self.test_scores.empty:
            self.test_scores = df
        else:
            self.test_scores = pd.concat([self.test_scores, df])
            
        self._save(self.test_scores, self.TEST_SCORES_FILE)
            
    def get_test_scores(self) -> pd.DataFrame:
        return self.test_scores
    
    def restore_results(self) -> None:
        self.train_details = pd.read_csv(self.TRAIN_DETAILS_FILE)
        self.test_scores = pd.read_csv(self.TEST_SCORES_FILE)
    
    def _save(self, df: pd.DataFrame, name: str) -> None:
        df.to_csv(self.path / name, index=False)


@dataclass
class ModelWrapper():
    """
    Utility class to hold the "outer" model, and the inner base model
    so that training can be fine-tuned if required.
    """    
    model: keras.Model
    base_model: keras.Model

        
def create_convnext_base(params: Params) -> ModelWrapper:
    return create_model(tf.keras.applications.ConvNeXtBase, "baseline", DEFAULT_PARAMS)


def create_model(base_model_fn: str, name: str, params: Params) -> ModelWrapper:
    """
    Create Keras application model, e.g.
        tf.keras.applications.EfficientNetV2B0
        tf.keras.applications.ConvNeXtBase
    with a custom top.
    """
    # i = 0
    # name=f"{name}-{(i:=i+1)}"
    # name = [name+str(i) for i in range(17)]
    inputs = keras.Input(shape=(params.image_size, params.image_size, 3))
    # Base
    base_model = base_model_fn(weights='imagenet', include_top=False)
    base_model.trainable = False
    # set training=F here per https://keras.io/guides/transfer_learning/
    x = base_model(inputs, training=False)
    # Head
    x = GlobalAveragePooling2D()(x)
    x = Flatten()(x)
    x = Dense(1024, activation="gelu")(x)
    x = Dropout(0.5)(x)
    x = Dense(1024, activation="gelu")(x)
    x = Dropout(0.5)(x)
    outputs = Dense(5, activation="softmax")(x)
    model = keras.Model(inputs, outputs)

    return ModelWrapper(model, base_model)


def create_model_ablations(base_model_fn: str, name: str, params: Params, fc_layers = 2, fc_neurons = 1024, bn = False) -> ModelWrapper:
    """
    Create Keras application model, e.g.
        tf.keras.applications.EfficientNetV2B0
        tf.keras.applications.ConvNeXtBase
    with a custom top.
    """
    # i = 0
    # name=f"{name}-{(i:=i+1)}"
    # name = [name+str(i) for i in range(17)]
    inputs = keras.Input(shape=(params.image_size, params.image_size, 3))
    # Base
    base_model = base_model_fn(weights='imagenet', include_top=False)
    base_model.trainable = False
    # set training=F here per https://keras.io/guides/transfer_learning/
    x = base_model(inputs, training=False)
    # Head
    x = GlobalAveragePooling2D()(x)
    if bn:
        x = BatchNormalization()(x)
    x = Flatten()(x)
    
    l = 0
    while (l < fc_layers):
        x = Dense(fc_neurons, activation="relu")(x)
        x = Dropout(0.5)(x)
        l = l + 1
    
    outputs = Dense(5, activation="softmax")(x)
    model = keras.Model(inputs, outputs)

    return ModelWrapper(model, base_model)


def run_task(task_id: str, model_wrapper: ModelWrapper,
             ds_train: Dataset, ds_valid: Dataset, ds_test: Dataset,
             params: Params, weights = None) -> Tuple[pd.DataFrame, pd.DataFrame]:
    
    model = model_wrapper.model
    # train
    df_train = train(task_id, model, ds_train, ds_valid, params)
    # test
    test_result = model.evaluate(ds_test)
    df_test = create_test_record(task_id, test_result)
    # save CM too
    save_confusion_matrix(ds_test, model, task_id)
    return df_train, df_test


def train(task_id: str, model: Model,
             ds_train_: Dataset, ds_valid_: Dataset,
             params: Params, weights = None) -> pd.DataFrame:
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(epsilon=params.epsilon),
        loss="categorical_crossentropy",
        metrics=['accuracy']
    )

    early_stopping = callbacks.EarlyStopping(
        min_delta=0.0001,
        patience=params.early_stopping_patience,
        restore_best_weights=True,
        verbose = 1
    )
    
    reduce_lr = callbacks.ReduceLROnPlateau(
        monitor = 'val_loss', factor = 0.3, 
        patience = 3, min_delta = 0.0005, 
        mode = 'min', verbose = 1)
    
    cbs = []
    if params.early_stopping:
        print("Using EarlyStopping")
        cbs += [early_stopping]
    if params.adjust_learning_rate:
        print("Using ReduceLROnPlateau")
        cbs += [reduce_lr]

    history = model.fit(
        ds_train_,
        validation_data=ds_valid_,
        epochs=params.epochs,
        verbose=1,
        callbacks=cbs,
        class_weight=weights
    )
   
    df_hist = pd.DataFrame(history.history)
    df_hist["task_id"] = task_id
    df_hist["epoch"] = df_hist.index
   
    return df_hist


def create_test_record(task_id: str, result: list[float]):
    return pd.DataFrame({"task_id": [task_id], "test_loss" : [result[0]], "test_accuracy": [result[1]]})


def save_confusion_matrix(ds: Dataset, model: Model, task_id: str) -> None:
    path = f"artefacts/conf_mat_{task_id}.png"
    probabilities = model.predict(ds)
    predictions = np.argmax(probabilities, axis=1)

    one_hot_labels = np.concatenate([y for x, y in ds], axis=0)
    labels = [np.argmax(x) for x in one_hot_labels]
    
    result = confusion_matrix(labels, predictions, labels=[0,1,2,3,4], normalize='pred')
    disp = ConfusionMatrixDisplay(result, display_labels=[0,1,2,3,4])
    disp.plot()
    disp.ax_.set_title(task_id)
    
    print(f"Saving confusion matrix to {path}")
    disp.figure_.savefig(f"artefacts/conf_mat_{task_id}.png", dpi=300)
    
    
def create_vgg_like_model(params: Params) -> ModelWrapper:
    inputs = keras.Input(shape=(params.image_size, params.image_size, 3))
    x = Conv2D(32, (3, 3), padding='same', activation='relu')(inputs)
    x = Conv2D(32, (3, 3), padding='same', activation='relu')(x)
    x = MaxPooling2D(pool_size=(2,2))(x)
    x = Dropout(0.25)(x)
    x = Conv2D(64, (3, 3), padding='same', activation='relu')(x)
    x = Conv2D(64, (3, 3), padding='same', activation='relu')(x)
    x = MaxPooling2D(pool_size=(2,2))(x)
    x = Dropout(0.25)(x)
    x = Conv2D(128, (3, 3), padding='same', activation='relu')(x)
    x = Conv2D(128, (3, 3), padding='same', activation='relu')(x)
    x = Conv2D(128, (3, 3), padding='same', activation='relu')(x)
    x = MaxPooling2D(pool_size=(2,2))(x)
    x = Dropout(0.25)(x)

    # classification layers
    x = Flatten()(x)
    x = Dense(1024, activation='relu')(x)
    x = Dropout(0.5)(x)

    outputs = Dense(5, activation="softmax")(x)
    model = keras.Model(inputs, outputs)

    return ModelWrapper(model, None)



def create_simple_model(params: Params) -> Model:
    m = keras.Sequential([
        
        tf.keras.Input(shape=(params.image_size, params.image_size, 3)),
        
        # First Convolutional Block
        layers.Conv2D(filters=32, kernel_size=5, activation="relu", padding='same'),
        layers.Conv2D(filters=32, kernel_size=3, activation="relu", padding='same'),
        layers.MaxPool2D(),
        layers.Dropout(0.2),

        # Second Convolutional Block
        layers.Conv2D(filters=64, kernel_size=3, activation="relu", padding='same'),
        layers.Conv2D(filters=64, kernel_size=3, activation="relu", padding='same'),
        layers.MaxPool2D(),
        layers.Dropout(0.2),

        # Third Convolutional Block
        layers.Conv2D(filters=128, kernel_size=3, activation="relu", padding='same'),
        layers.Conv2D(filters=128, kernel_size=3, activation="relu", padding='same'),
        layers.Conv2D(filters=128, kernel_size=3, activation="relu", padding='same'),
        layers.MaxPool2D(),
        layers.Dropout(0.2),

        # Classifier Head
        layers.Flatten(),
        layers.Dense(512, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(512, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(units=5, activation="softmax"),
    ])
    return ModelWrapper(m, None)

### `/data/data_processing.py`

In [8]:
import gdown
import keras
import pandas as pd
import random
import shutil
import tensorflow as tf
import os
import zipfile

# handle different structure Kaggle (Notebook) vs. Colab (Modules)
# this wouldn't be kept in any "production" version.
try:
    from AMLS_II_assignment23_24.model.util import Params
except ModuleNotFoundError:
    pass

from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.data import Dataset
from tensorflow.data.experimental import AUTOTUNE
from tensorflow.keras import layers, callbacks
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import image_dataset_from_directory
from typing import Tuple


def data_preprocessing(path: Path,
                       params: Params,
                       force=False) -> Tuple[Dataset, Dataset, Dataset, dict]:
    """
    """
    file = download_data(path, force)
    
    data_path = path / "data"
    if force:
        shutil.rmtree(data_path)
        
    if not data_path.exists():
        data_path.mkdir(parents=True, exist_ok=True)
       
        with zipfile.ZipFile(file, "r") as z:
            z.extractall(data_path)
        
    df_images = pd.read_csv((data_path / "train.csv"))
    
    imgs1 = random.sample(df_images[df_images.label==3].image_id.tolist(), k=2577)
    imgs2 = df_images[df_images.label!=3].image_id.tolist()
    
    df_images = df_images[df_images.image_id.isin((imgs1+imgs2))].copy()
    
    X_train, X_test, y_train, y_test = train_test_split(df_images.image_id, df_images.label, test_size=0.2, random_state=12)
    
    X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.25, random_state=12)
    
    train_path = create_ds_tree(X_train, y_train, data_path, "train")
    valid_path = create_ds_tree(X_valid, y_valid, data_path, "valid")
    test_path = create_ds_tree(X_test, y_test, data_path, "test")
    
    ds_train = create_dataset(train_path, params.image_size, params.batch_size)
    ds_valid = create_dataset(valid_path, params.image_size, params.batch_size)
    ds_test = create_dataset(test_path, params.image_size, params.batch_size, False)

    return ds_train, ds_valid, ds_test, extract_class_weights(df_images)


def download_data(path: Path, force=False) -> Path:
    """
    """
    url = "https://drive.google.com/uc?id=1TJBf1HZxAMpowZ92BcgS5N_NPHE7LPOT"
    output = path / "data.zip"
    if not Path(output).exists() or force:
        gdown.download(url, str(output), quiet=False)
    return output


def create_ds_tree(x, y, path: Path, name: str) -> Path:
    """
    Creates the directory structure for the given dataset.
    """
    ds_path = path / name
    if not ds_path.exists():
        ds_path.mkdir(parents=True, exist_ok=True)

        for lab in y.unique():
            (ds_path / str(lab)).mkdir(exist_ok=True)

        source_path = path / "train_images"
        
        for img, lab in zip(x, y):
            src = source_path / img
            dest = ds_path / str(lab) / img
            shutil.move(src, dest)
        
    return ds_path


def create_dataset(path: Path, img_size: int, batch_size: int, shuffle = True) -> Dataset:
    """
    """
    return image_dataset_from_directory(
        path,
        labels='inferred',
        label_mode='categorical',
        image_size=[img_size, img_size],
        batch_size=batch_size,
        seed=12345,
        shuffle=shuffle,
        crop_to_aspect_ratio=True
    )


def extract_class_weights(df_data: pd.DataFrame) -> dict:
    classes = df_data.label.unique()
    class_weights = compute_class_weight(class_weight='balanced',
                                         classes=classes,
                                         y=df_data.label)

    return dict(zip(classes, class_weights))


def convert_dataset(ds: Dataset) -> Dataset:
    """
    """
    def convert_to_float(image, label):
        image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        image = image / 255.0
        return image, label

    return (
        ds
        .map(convert_to_float)
        .cache()
        .prefetch(buffer_size=AUTOTUNE)
    )
    

def augment_dataset(ds: Dataset, num_repeats: int) -> Dataset:
    """
    """
    def augment(image, label):
        seed = 12345
        image = tf.image.random_flip_left_right(image, seed)
        image = tf.image.random_flip_up_down(image, seed)
        image = tf.image.random_brightness(image, 0.2, seed)
        return image, label

    return (
        ds
        .repeat(num_repeats)
        .map(augment)
        .cache()
        .prefetch(buffer_size=AUTOTUNE)
    )

### `/report.py`?

In [9]:
import matplotlib.pyplot as plt
import seaborn as sns

def plot_experiments_comp2(df_history: pd.DataFrame, task_ids: list, epoch_limit = 50) -> None:
    df = df_history[(df_history.task_id.isin(task_ids)) & (df_history.epoch <= epoch_limit)].copy()
    df["loss_gap"] = df.val_loss - df.loss
    df_grp = df[["epoch","task_id", "val_accuracy", "val_loss", "loss_gap"]].groupby(["epoch", "task_id"]).mean()
    fig, (ax1, ax2, ax3) = plt.subplots(ncols=3, figsize=(16, 8))
    sns.lineplot(data=df_grp, x="epoch", y="val_accuracy", hue="task_id",  ax=ax1)
    sns.lineplot(data=df_grp, x="epoch", y="val_loss", hue="task_id",  ax=ax2)
    sns.lineplot(data=df_grp, x="epoch", y="loss_gap", hue="task_id",  ax=ax3)

### `/main.py`

In [10]:
import datetime
import os
import pandas as pd
import tensorflow as tf

# handle different structure Kaggle (Notebook) vs. Colab (Modules)
# this wouldn't be kept in any "production" version.
try:
    from AMLS_II_assignment23_24.data_processing import pre_processing as data
    from AMLS_II_assignment23_24.model import util as model_util
    from AMLS_II_assignment23_24.model.util import Params, ResultCollector
except ModuleNotFoundError:
    pass

from docopt import docopt
from pathlib import Path

tf.random.set_seed(67890)

DEFAULT_PARAMS = Params(255, 196, 50, 0.005, True, 5, False)
## DEFAULT_PARAMS = model_util.Params(50, 196, 1, True, 5, False)
#DEFAULT_PARAMS = Params(50, 196, 1, 0.005, True, 5, False)
print(DEFAULT_PARAMS)

ARTEFACTS_PATH = Path("artefacts")
ARTEFACTS_PATH.mkdir(parents=True, exist_ok=True)

collector = ResultCollector(ARTEFACTS_PATH)

# Process Data
print("==== Loading Data ====")
cwd = os.getcwd()
# ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), DEFAULT_PARAMS)
# print(f"Class Weights: {class_weights}")

# print("==== Task A: Baseline Model ====")

# model = create_convnext_base(DEFAULT_PARAMS)
# df_train, df_test = run_task("A_base", model, ds_train, ds_valid, ds_test, DEFAULT_PARAMS)
# collector.add_task_results(df_train, df_test)

# print("==== Task B: Baseline + Data Augmentation ====")
# {
#     """
#     Per task A, but with data augmentation.
#     """
#     ds_train_aug = augment_dataset(ds_train, 2)
#     model = create_convnext_base(DEFAULT_PARAMS)
#     df_train, df_test = run_task("B_base_aug", model, ds_train_aug, ds_valid, ds_test, DEFAULT_PARAMS)
#     collector.add_task_results(df_train, df_test)    
# }

# print("==== Task C: Baseline + Data Augmentation + Class Weights ====")
# model = create_convnext_base(DEFAULT_PARAMS)
# {
#     """
#     Per task B but, given the large class imbalance, class weight supplied.
#     """
#     ds_train_aug = augment_dataset(ds_train, 2)
#     df_train, df_test = run_task("C_base_aug_wgts", model, ds_train_aug, ds_valid, ds_test, DEFAULT_PARAMS, class_weights)
#     collector.add_task_results(df_train, df_test)    
# }

# print("==== Task D: Baseline + Data Augmentation + Class Weights + Fine Tune ====")
# {
#     """
#     Per task C but, given the large class imbalance, class weight supplied.
#     """
#     fine_tune_params = Params(50, 196, 1, 1e-5, True, 5, False)
#     print(fine_tune_params)
#     model.base_model.trainable = True
#     ds_train_aug = augment_dataset(ds_train, 2)
#     df_train, df_test = run_task("D_base_aug_wgts_ft", model, ds_train_aug, ds_valid, ds_test, DEFAULT_PARAMS, class_weights)
#     collector.add_task_results(df_train, df_test)    
# }


Params(image_size=255, batch_size=196, epochs=50, epsilon=0.005, early_stopping=True, early_stopping_patience=5, adjust_learning_rate=False)
==== Loading Data ====


In [11]:
i = 1

In [12]:
assert 1==2, "stop here"

AssertionError: stop here

In [None]:
i = 2

In [None]:
for bs in [64, 128, 192, 256]:
    print(f"Batch Size: {bs}")
    print(f"Start: {datetime.datetime.now()}")
    batch_size_params = Params(255, bs, 50, 0.005, True, 5, False)
    print(batch_size_params)
    ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), batch_size_params)
    model = create_model(tf.keras.applications.ConvNeXtTiny, "base", batch_size_params)
    df_train, df_test = run_task(f"A_bs_{bs}", model, ds_train, ds_valid, ds_test, batch_size_params)
    collector.add_task_results(df_train, df_test)
    print(model.model.evaluate(ds_test))
    print(f"End: {datetime.datetime.now()}")

In [None]:
for bs in [512, 768, 1024]:
    print(f"Batch Size: {bs}")
    print(f"Start: {datetime.datetime.now()}")
    batch_size_params = Params(255, bs, 50, 0.005, True, 5, False)
    print(batch_size_params)
    ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), batch_size_params)
    model = create_model(tf.keras.applications.ConvNeXtTiny, "base", batch_size_params)
    df_train, df_test = run_task(f"A_bs_{bs}", model, ds_train, ds_valid, ds_test, batch_size_params)
    collector.add_task_results(df_train, df_test)
    print(model.model.evaluate(ds_test))
    print(f"End: {datetime.datetime.now()}")

In [None]:
collector.get_test_scores()

In [None]:
import pandas as pd
df_res = pd.read_csv("/kaggle/working/artefacts/train_details.csv")

In [None]:
plot_experiments_comp2(collector.get_train_details(), collector.get_train_details().task_id.unique())

In [None]:
plot_experiments_comp2(df_res, df_res.task_id.unique())

In [None]:
for eps in [0.0025, 0.0050, 0.0075, 0.01]:
    print(f"Epsilon: {eps}")
    print(f"Start: {datetime.datetime.now()}")
    params = Params(255, 256, 50, eps, True, 5, False)
    print(params)
    ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), params)
    model = create_model(tf.keras.applications.ConvNeXtTiny, "base", params)
    df_train, df_test = run_task(f"B_eps_{eps}", model, ds_train, ds_valid, ds_test, params)
    collector.add_task_results(df_train, df_test)
    print(model.model.evaluate(ds_test))
    print(f"End: {datetime.datetime.now()}")

In [None]:
collector.get_test_scores()

In [None]:
for eps in [0.006, 0.007, 0.008]:
    print(f"Epsilon: {eps}")
    print(f"Start: {datetime.datetime.now()}")
    params = Params(255, 256, 50, eps, True, 5, False)
    print(params)
    ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), params)
    model = create_model(tf.keras.applications.ConvNeXtTiny, "base", params)
    df_train, df_test = run_task(f"B_eps_{eps}", model, ds_train, ds_valid, ds_test, params)
    collector.add_task_results(df_train, df_test)
    print(model.model.evaluate(ds_test))
    print(f"End: {datetime.datetime.now()}")

In [None]:
collector.get_test_scores()

In [None]:
plot_experiments_comp2(collector.get_train_details(), collector.get_train_details().task_id.unique())

In [None]:
for lr in [True, False]:
    print(f"LR: {lr}")
    print(f"Start: {datetime.datetime.now()}")
    params = Params(255, 256, 50, 0.0075, True, 5, lr)
    print(params)
    ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), params)
    model = create_model(tf.keras.applications.ConvNeXtTiny, "base", params)
    df_train, df_test = run_task(f"C_{lr}", model, ds_train, ds_valid, ds_test, params)
    collector.add_task_results(df_train, df_test)
    print(model.model.evaluate(ds_test))
    print(f"End: {datetime.datetime.now()}")

In [None]:
i = 0
for m in [tf.keras.applications.ConvNeXtTiny, tf.keras.applications.ConvNeXtSmall, tf.keras.applications.ConvNeXtBase]:
    print(f"Model: {m}")
    print(f"Start: {datetime.datetime.now()}")
    params = Params(255, 256, 50, 0.0075, True, 5, False)
    print(params)
    ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), params)
    model = create_model(m, "base", params)
    df_train, df_test = run_task(f"C_{str(i)}", model, ds_train, ds_valid, ds_test, params)
    collector.add_task_results(df_train, df_test)
    print(model.model.evaluate(ds_test))
    print(f"End: {datetime.datetime.now()}")
    i = i + 1

In [None]:
plot_experiments_comp2(collector.get_train_details(), ["C_0", "C_1", "C_2"])

In [None]:
collector.get_test_scores()

In [None]:
print(f"Start: {datetime.datetime.now()}")
params = Params(255, 256, 50, 0.0075, True, 7, False)
print(params)
ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), params)
model = create_model(tf.keras.applications.ConvNeXtBase, "base", params)
df_train, df_test = run_task("D_wgts", model, ds_train, ds_valid, ds_test, params, class_weights)
collector.add_task_results(df_train, df_test)
print(model.model.evaluate(ds_test))
print(f"End: {datetime.datetime.now()}")

In [None]:
collector.get_test_scores()

In [None]:
i = 0
for m in [tf.keras.applications.ConvNeXtTiny, tf.keras.applications.EfficientNetB0]:
    print(f"Model: {m}")
    print(f"Start: {datetime.datetime.now()}")
    params = Params(255, 256, 50, 0.0075, True, 5, False)
    print(params)
    ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), params)
    model = create_model(m, "base", params)
    df_train, df_test = run_task(f"E_{str(i)}", model, ds_train, ds_valid, ds_test, params, class_weights)
    collector.add_task_results(df_train, df_test)
    print(model.model.evaluate(ds_test))
    print(f"End: {datetime.datetime.now()}")
    i = i + 1

In [None]:
i = 2
for m in [tf.keras.applications.EfficientNetV2B0]:
    print(f"Model: {m}")
    print(f"Start: {datetime.datetime.now()}")
    params = Params(255, 256, 50, 0.0075, True, 5, False)
    print(params)
    ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), params)
    model = create_model(m, "base", params)
    df_train, df_test = run_task(f"E_{str(i)}", model, ds_train, ds_valid, ds_test, params, class_weights)
    collector.add_task_results(df_train, df_test)
    print(model.model.evaluate(ds_test))
    print(f"End: {datetime.datetime.now()}")
    i = i + 1

In [None]:
plot_experiments_comp2(collector.get_train_details(), ["E_0", "E_1", "E_2"])

In [None]:
m1 = create_model_ablations(tf.keras.applications.ConvNeXtTiny, "base", params, 2, 1024)
m1.model.summary()

In [None]:
i = 0
params = Params(255, 256, 50, 0.0075, True, 7, False)
ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), params)
for m in [tf.keras.applications.ConvNeXtTiny, tf.keras.applications.ConvNeXtBase,
          tf.keras.applications.EfficientNetV2B0, tf.keras.applications.EfficientNetV2M]:
    print(f"Model: {m}")
    print(f"Start: {datetime.datetime.now()}")
    print(params)
    model = create_model(m, "base", params)
    df_train, df_test = run_task(f"C_{str(i)}", model, ds_train, ds_valid, ds_test, params, class_weights)
    collector.add_task_results(df_train, df_test)
    print(model.model.evaluate(ds_test))
    print(f"End: {datetime.datetime.now()}")
    i = i + 1

In [None]:
collector.get_test_scores()

In [None]:
plot_experiments_comp2(collector.get_train_details(), ["C_0", "C_1", "C_2", "C_3"])

In [None]:
i = 0
params = Params(255, 256, 50, 0.0075, True, 7, False)
ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), params)
ds_train_aug = augment_dataset(ds_train, 2)
for m in [tf.keras.applications.ConvNeXtTiny, tf.keras.applications.EfficientNetV2B0]:
    print(f"Model: {m}")
    print(f"Start: {datetime.datetime.now()}")
    print(params)
    model = create_model(m, "base", params)
    df_train, df_test = run_task(f"D_{str(i)}", model, ds_train_aug, ds_valid, ds_test, params, class_weights)
    collector.add_task_results(df_train, df_test)
    print(model.model.evaluate(ds_test))
    print(f"End: {datetime.datetime.now()}")
    i = i + 1

In [None]:
# out of memory ^
i = 1
params = Params(255, 256, 50, 0.0075, True, 7, False)
ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), params)
ds_train_aug = augment_dataset(ds_train, 2)
for m in [tf.keras.applications.EfficientNetV2B0]:
    print(f"Model: {m}")
    print(f"Start: {datetime.datetime.now()}")
    print(params)
    model = create_model(m, "base", params)
    df_train, df_test = run_task(f"D_{str(i)}", model, ds_train_aug, ds_valid, ds_test, params, class_weights)
    collector.add_task_results(df_train, df_test)
    print(model.model.evaluate(ds_test))
    print(f"End: {datetime.datetime.now()}")
    i = i + 1

In [None]:
params = Params(255, 256, 50, 0.0075, True, 7, False)
ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), params)
print(f"#FC: 1 - 1024")
print(f"Start: {datetime.datetime.now()}")
print(params)
model = create_model_ablations(tf.keras.applications.EfficientNetV2B0, "base", params, 1, 1024)
print(model.model.summary())
df_train, df_test = run_task(f"E_1_1024", model, ds_train, ds_valid, ds_test, params, class_weights)
collector.add_task_results(df_train, df_test)
print(model.model.evaluate(ds_test))
print(f"End: {datetime.datetime.now()}")

In [None]:
params = Params(255, 256, 50, 0.0075, True, 7, False)
ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), params)
print(f"No Class Weights")
print(f"Start: {datetime.datetime.now()}")
print(params)
model = create_model_ablations(tf.keras.applications.EfficientNetV2B0, "base", params, 2, 1024)
print(model.model.summary())
df_train, df_test = run_task(f"F_no_wgts", model, ds_train, ds_valid, ds_test, params)
collector.add_task_results(df_train, df_test)
print(model.model.evaluate(ds_test))
print(f"End: {datetime.datetime.now()}")

### Task G - Custom Model

In [None]:
params = Params(255, 256, 50, 0.0075, True, 7, False)
ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), params)
print(f"Custom Model")
print(f"Start: {datetime.datetime.now()}")
print(params)
model = create_simple_model(params)
print(model.model.summary())
df_train, df_test = run_task(f"G_cust_model", model, convert_dataset(ds_train), convert_dataset(ds_valid), convert_dataset(ds_test), params)
collector.add_task_results(df_train, df_test)
print(model.model.evaluate(ds_test))
print(f"End: {datetime.datetime.now()}")

### Task H: Fine-Tune

In [None]:
params = Params(255, 256, 50, 0.0075, True, 7, False)
ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), params)
print(f"Fine Tune")
print(f"Start: {datetime.datetime.now()}")
print(params)
model = create_model_ablations(tf.keras.applications.EfficientNetV2B0, "base", params, 2, 1024)
print(model.model.summary())
df_train, df_test = run_task(f"H_std", model, ds_train, ds_valid, ds_test, params, class_weights)
collector.add_task_results(df_train, df_test)
print(model.model.evaluate(ds_test))
print(f"End: {datetime.datetime.now()}")

In [None]:
model.base_model.trainable = True

In [None]:
params = Params(255, 256, 50, 1e-5, True, 7, False)
print(f"Fine Tune")
print(f"Start: {datetime.datetime.now()}")
print(params)
df_train, df_test = run_task(f"H_tune", model, ds_train, ds_valid, ds_test, params, class_weights)
collector.add_task_results(df_train, df_test)
print(model.model.evaluate(ds_test))
print(f"End: {datetime.datetime.now()}")

In [None]:
params = Params(255, 256, 50, 0.0075, True, 7, False)
ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), params)
print(f"Crop")
print(f"Start: {datetime.datetime.now()}")
print(params)
model = create_model_ablations(tf.keras.applications.EfficientNetV2B0, "base", params, 2, 1024)
print(model.model.summary())
df_train, df_test = run_task(f"I", model, ds_train, ds_valid, ds_test, params, class_weights)
collector.add_task_results(df_train, df_test)
print(model.model.evaluate(ds_test))
print(f"End: {datetime.datetime.now()}")

In [None]:
params = Params(255, 256, 50, 0.0075, True, 7, False)
ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), params)
print(f"Batch Norm")
print(f"Start: {datetime.datetime.now()}")
print(params)
model = create_model_ablations(tf.keras.applications.EfficientNetV2B0, "base", params, 2, 1024)
print(model.model.summary())
df_train, df_test = run_task(f"J", model, ds_train, ds_valid, ds_test, params, class_weights)
collector.add_task_results(df_train, df_test)
print(model.model.evaluate(ds_test))
print(f"End: {datetime.datetime.now()}")

In [None]:
ds = image_dataset_from_directory(
        "/kaggle/working/data/train/0/",
        labels=list(np.repeat(0, 663)),
        label_mode='categorical',
        image_size=[255, 255],
        batch_size=256,
        seed=12345,
        shuffle=True,
        crop_to_aspect_ratio=True
    )
ds_aug = augment_dataset(ds, 2)

In [None]:
ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), params)
ds_train_over = tf.data.Dataset.concatenate(ds_train, ds)
ds_train_over = ds_train_over.shuffle(20000)

In [None]:
def augment_dataset2(ds: Dataset, num_repeats: int) -> Dataset:
    """
    """
    def augment(image, label):
        seed = 12345
        image = tf.image.random_flip_left_right(image, seed)
        image = tf.image.random_flip_up_down(image, seed)
        image = tf.image.random_brightness(image, 0.2, seed)
        return image, label

    return (
        ds
        .repeat(num_repeats)
        .map(augment)
    )

params = Params(255, 256, 50, 0.0075, True, 7, False)
ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), params)

ds_over = ds_train.unbatch().filter(lambda x, label: tf.equal(tf.argmax(label, axis=0), 0))
ds_aug = augment_dataset2(ds_over, 2)

ds_train_over = tf.data.Dataset.concatenate(ds_train.unbatch(), ds_aug)
ds_train_over = ds_train_over.shuffle(100000)
ds_train_over = ds_train_over.batch(256)

In [None]:
def augment_dataset2(ds: Dataset, num_repeats: int) -> Dataset:
    """
    """
    def augment(image, label):
        seed = 12345
        image = tf.image.random_flip_left_right(image, seed)
        image = tf.image.random_flip_up_down(image, seed)
        image = tf.image.random_brightness(image, 0.2, seed)
        return image, label

    return (
        ds
        .repeat(num_repeats)
        .map(augment)
    )

params = Params(255, 256, 50, 0.0075, True, 7, False)
ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), params)

ds_over = ds_train.unbatch().filter(lambda x, label: tf.equal(tf.argmax(label, axis=0), 0))
ds_aug = augment_dataset2(ds_over, 2)

ds_train_over = tf.data.Dataset.concatenate(ds_train.unbatch(), ds_aug)
ds_train_over = ds_train_over.shuffle(100000)
ds_train_over = ds_train_over.batch(256)

In [None]:
print(f"Over-sample")
print(f"Start: {datetime.datetime.now()}")
print(params)
model = create_model_ablations(tf.keras.applications.EfficientNetV2B0, "base", params, 2, 1024)
print(model.model.summary())
df_train, df_test = run_task(f"K_1", model, ds_train_over, ds_valid, ds_test, params, class_weights)
collector.add_task_results(df_train, df_test)
print(model.model.evaluate(ds_test))
print(f"End: {datetime.datetime.now()}")

In [None]:
def augment_dataset2(ds: Dataset, num_repeats: int) -> Dataset:
    """
    """
    def augment(image, label):
        seed = 12345
        image = tf.image.random_flip_left_right(image, seed)
        image = tf.image.random_flip_up_down(image, seed)
        image = tf.image.random_brightness(image, 0.2, seed)
        return image, label

    return (
        ds
        .repeat(num_repeats)
        .map(augment)
    )

params = Params(255, 256, 50, 0.0075, True, 7, False)
ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), params)

ds_over = ds_train.unbatch().filter(lambda x, label: tf.equal(tf.argmax(label, axis=0), 0))
#ds_aug = augment_dataset2(ds_over, 2)

ds_train_over = tf.data.Dataset.concatenate(ds_train.unbatch(), ds_over)
ds_train_over = ds_train_over.shuffle(100000)
ds_train_over = ds_train_over.batch(256)

In [None]:
print(f"Over-sample")
print(f"Start: {datetime.datetime.now()}")
print(params)
model = create_model_ablations(tf.keras.applications.EfficientNetV2B0, "base", params, 2, 1024)
print(model.model.summary())
df_train, df_test = run_task(f"K_2", model, ds_train_over, ds_valid, ds_test, params, class_weights)
collector.add_task_results(df_train, df_test)
print(model.model.evaluate(ds_test))
print(f"End: {datetime.datetime.now()}")

In [None]:
params = Params(255, 256, 50, 0.0075, True, 7, False)
ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), params)
print(f"Over-sample")
print(f"Start: {datetime.datetime.now()}")
print(params)
model = create_model_ablations(tf.keras.applications.EfficientNetV2B0, "base", params, 2, 1024)
print(model.model.summary())
df_train, df_test = run_task(f"K_3", model, ds_train, ds_valid, ds_test, params, class_weights)
collector.add_task_results(df_train, df_test)
print(model.model.evaluate(ds_test))
print(f"End: {datetime.datetime.now()}")

In [None]:
plot_experiments_comp2(collector.get_train_details(), ["K_1", "K_2", "K_3"])

In [None]:
def train(task_id: str, model: Model,
             ds_train_: Dataset, ds_valid_: Dataset,
             params: Params, weights = None) -> pd.DataFrame:

    print("using adamw")
    model.compile(
        optimizer=tf.keras.optimizers.AdamW(epsilon=params.epsilon),
        loss="categorical_crossentropy",
        metrics=['accuracy']
    )

    early_stopping = callbacks.EarlyStopping(
        min_delta=0.0001,
        patience=params.early_stopping_patience,
        restore_best_weights=True,
        verbose = 1
    )
    
    reduce_lr = callbacks.ReduceLROnPlateau(
        monitor = 'val_loss', factor = 0.3, 
        patience = 3, min_delta = 0.0005, 
        mode = 'min', verbose = 1)
    
    cbs = []
    if params.early_stopping:
        print("Using EarlyStopping")
        cbs += [early_stopping]
    if params.adjust_learning_rate:
        print("Using ReduceLROnPlateau")
        cbs += [reduce_lr]

    history = model.fit(
        ds_train_,
        validation_data=ds_valid_,
        epochs=params.epochs,
        verbose=1,
        callbacks=cbs,
        class_weight=weights
    )
   
    df_hist = pd.DataFrame(history.history)
    df_hist["task_id"] = task_id
    df_hist["epoch"] = df_hist.index
   
    return df_hist

In [None]:
params = Params(255, 256, 50, 0.0075, True, 7, False)
ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), params)
print(f"adamw")
print(f"Start: {datetime.datetime.now()}")
print(params)
model = create_model_ablations(tf.keras.applications.EfficientNetV2B0, "base", params, 2, 1024)
print(model.model.summary())
df_train, df_test = run_task(f"L", model, ds_train, ds_valid, ds_test, params, class_weights)
collector.add_task_results(df_train, df_test)
print(model.model.evaluate(ds_test))
print(f"End: {datetime.datetime.now()}")

In [None]:
plot_experiments_comp2(collector.get_train_details(), ["L", "K_3"])

In [None]:
params = Params(255, 256, 50, 0.0075, True, 7, False)
ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), params)
print(f"reduced class 3 baseline")
print(f"Start: {datetime.datetime.now()}")
print(params)
model = create_model_ablations(tf.keras.applications.EfficientNetV2B0, "base", params, 2, 1024)
print(model.model.summary())
df_train, df_test = run_task(f"M", model, ds_train, ds_valid, ds_test, params, class_weights)
collector.add_task_results(df_train, df_test)
print(model.model.evaluate(ds_test))
print(f"End: {datetime.datetime.now()}")

In [None]:
params = Params(255, 256, 75, 0.0075, True, 7, False)
ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), params)

ds_over = ds_train.unbatch().filter(lambda x, label: tf.equal(tf.argmax(label, axis=0), 0))
#ds_aug = augment_dataset2(ds_over, 2)

ds_train_over = tf.data.Dataset.concatenate(ds_train.unbatch(), ds_over)
ds_train_over = ds_train_over.shuffle(100000)
ds_train_over = ds_train_over.batch(256)

In [None]:
print(f"Over-sample")
print(f"Start: {datetime.datetime.now()}")
print(params)
model = create_model_ablations(tf.keras.applications.EfficientNetV2B0, "base", params, 2, 1024)
print(model.model.summary())
df_train, df_test = run_task(f"M_1", model, ds_train_over, ds_valid, ds_test, params)
collector.add_task_results(df_train, df_test)
print(model.model.evaluate(ds_test))
print(f"End: {datetime.datetime.now()}")

In [None]:
params = Params(255, 256, 50, 0.0075, True, 7, False)
ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), params)
for n in [256, 512, 2048]:
    print(f"FC #n: {n}")
    print(f"Start: {datetime.datetime.now()}")
    print(params)
    model = create_model_ablations(tf.keras.applications.EfficientNetV2B0, "base", params, 2, n)
    print(model.model.summary())
    df_train, df_test = run_task(f"E_2_{str(n)}", model, ds_train, ds_valid, ds_test, params, class_weights)
    collector.add_task_results(df_train, df_test)
    print(model.model.evaluate(ds_test))
    print(f"End: {datetime.datetime.now()}")

In [None]:
plot_experiments_comp2(collector.get_train_details(), ["E_2_256", "E_2_512", "E_2_2048"])

In [None]:
collector.get_test_scores()

In [None]:
params = Params(255, 128, 50, 0.0075, True, 7, False)
ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), params)
for n in [256, 512, 1024, 2048]:
    print(f"FC #n: {n}")
    print(f"Start: {datetime.datetime.now()}")
    print(params)
    model = create_model_ablations(tf.keras.applications.EfficientNetV2B0, "base", params, 1, n)
    print(model.model.summary())
    df_train, df_test = run_task(f"E_1_{str(n)}", model, ds_train, ds_valid, ds_test, params, class_weights)
    collector.add_task_results(df_train, df_test)
    print(model.model.evaluate(ds_test))
    print(f"End: {datetime.datetime.now()}")

In [None]:
plot_experiments_comp2(collector.get_train_details(), ["E_1_256", "E_1_512"])

In [None]:
print(f"Start: {datetime.datetime.now()}")
params = Params(255, 256, 50, 0.0075, True, 7, False)
print(params)
ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), params)
model = create_model(tf.keras.applications.EfficientNetB0, "base", params)
ds_train_aug = augment_dataset(ds_train, 2)
df_train, df_test = run_task("F", model, ds_train_aug, ds_valid, ds_test, params, class_weights)
collector.add_task_results(df_train, df_test)
print(model.model.evaluate(ds_test))
print(f"End: {datetime.datetime.now()}")

In [None]:
collector.get_test_scores()

In [None]:
tf.keras.applications.EfficientNetB0

In [None]:
i = 0
for aug in [1, 2]:
    print(f"Repeats: {aug}")
    print(f"Start: {datetime.datetime.now()}")
    params = Params(255, 256, 50, 0.0075, True, 5, False)
    print(params)
    ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), params)
    model = create_model(tf.keras.applications.ConvNeXtBase, "base", params)
    df_train, df_test = run_task(f"C_{str(i)}", model, ds_train, ds_valid, ds_test, params)
    collector.add_task_results(df_train, df_test)
    print(model.model.evaluate(ds_test))
    print(f"End: {datetime.datetime.now()}")
    i = i + 1

In [None]:
print(datetime.datetime.now())
model_z = create_convnext_base(DEFAULT_PARAMS)
df_train, df_test = run_task("Z_base", model_z, ds_train, ds_valid, ds_test, DEFAULT_PARAMS)
collector.add_task_results(df_train, df_test)
print(datetime.datetime.now())

In [None]:
print(datetime.datetime.now())
ds_train_aug = augment_dataset(ds_train, 3)
model_y = create_convnext_base(DEFAULT_PARAMS)
df_train, df_test = run_task("Y_base", model_y, ds_train_aug, ds_valid, ds_test, DEFAULT_PARAMS)
collector.add_task_results(df_train, df_test)
print(datetime.datetime.now())

In [None]:
print(datetime.datetime.now())
ds_train_aug = augment_dataset(ds_train, 2)
model_b = create_convnext_base(DEFAULT_PARAMS)
df_train, df_test = run_task("B_base_aug", model_b, ds_train_aug, ds_valid, ds_test, DEFAULT_PARAMS)
collector.add_task_results(df_train, df_test)
print(datetime.datetime.now())

In [None]:
model_b.model.save("/kaggle/working/artefacts/model_b.keras")

In [None]:
del model_b
del model

In [None]:
print(datetime.datetime.now())
ds_train_aug = augment_dataset(ds_train, 2)
model_c = create_convnext_base(DEFAULT_PARAMS)
df_train, df_test = run_task("C_base_aug_wgts", model_c, ds_train_aug, ds_valid, ds_test, DEFAULT_PARAMS, class_weights)
collector.add_task_results(df_train, df_test)  
print(datetime.datetime.now())

In [None]:
model_c.model.save("/kaggle/working/artefacts/model_c.keras")

In [None]:
model_c.model.evaluate(ds_test)

In [None]:
pd.read_csv("/kaggle/working/artefacts/test_scores.csv")

In [None]:
collector.get_test_scores()

In [None]:
del model_c

In [None]:
print(datetime.datetime.now())
fine_tune_params = Params(50, 196, 1, 1e-5, True, 5, False)
print(fine_tune_params)
model_c.base_model.trainable = True
df_train, df_test = run_task("D_base_aug_wgts_ft", model_c, ds_train, ds_valid, ds_test, DEFAULT_PARAMS, class_weights)
collector.add_task_results(df_train, df_test)
print(datetime.datetime.now())

In [None]:
print(datetime.datetime.now())
ds_train_aug = augment_dataset(ds_train, 2)
model_d = create_model(tf.keras.applications.ConvNeXtTiny, "baseline", DEFAULT_PARAMS)
df_train, df_test = run_task("B_base_aug", model_d, ds_train_aug, ds_valid, ds_test, DEFAULT_PARAMS)
collector.add_task_results(df_train, df_test)
print(datetime.datetime.now())

In [None]:
df_train

In [None]:
df_test

In [None]:
ARTEFACTS_PATH = Path("artefacts")
ARTEFACTS_PATH.mkdir(parents=True, exist_ok=True)

In [None]:
ARTEFACTS_PATH

In [None]:
collector = ResultCollector(ARTEFACTS_PATH)
collector.add_task_results(df_train, df_test)
collector.add_task_results(df_train2, df_test2)

In [None]:
collector.add_train_details(df_train)
collector.add_test_scores(df_test)

In [None]:
collector.get_train_details()

In [None]:
collector.get_test_scores()

In [None]:
pd.read_csv(ARTEFACTS_PATH / "train_details.csv")

In [None]:
pd.read_csv(ARTEFACTS_PATH / "test_scores.csv")

`older_code`

In [None]:
#!rm -rf /kaggle/working/data
#!rm /kaggle/working/data.zip

In [None]:
# import keras

# import pandas as pd
# import random
# import shutil
# import tensorflow as tf
# import os
# import zipfile

# from collections import Counter

# from pathlib import Path
# from sklearn.model_selection import train_test_split

# from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
# from tensorflow.keras import layers, callbacks
# from tensorflow.keras.models import Model


# from tensorflow.keras.layers import Dense, Dropout, Flatten, GlobalAveragePooling2D

# from tensorflow.data import Dataset

# from typing import NamedTuple, Tuple

# import matplotlib.pyplot as plt
# import seaborn as sns

In [None]:
# def data_cleanup() -> None:
#     # clean-up
#     try:
#         os.rmdir((data_path / "train_images"))
#         os.remove(file)
#     except Exception:
#          pass

In [None]:
df = create_test_record("test_task", [1.0, 2.0])

In [None]:
pd.concat([df, create_test_record("test_task", [1.0, 2.0])])

In [None]:




# Large VGG-like model
def fatVGG(cifarInput, num_classes, name="vgg"):
    name = [name+str(i) for i in range(17)]
    
    # convolution and max pooling layers
    vgg = Conv2D(32, (3, 3), padding='same', activation='relu', name=name[0])(cifarInput)
    vgg = Conv2D(32, (3, 3), padding='same', activation='relu', name=name[1])(vgg)
    vgg = MaxPooling2D(pool_size=(2,2), name=name[2])(vgg)
    vgg = Dropout(0.25, name=name[3])(vgg)
    vgg = Conv2D(64, (3, 3), padding='same', activation='relu', name=name[4])(vgg)
    vgg = Conv2D(64, (3, 3), padding='same', activation='relu', name=name[5])(vgg)
    vgg = MaxPooling2D(pool_size=(2,2), name=name[6])(vgg)
    vgg = Dropout(0.25, name=name[7])(vgg)
    vgg = Conv2D(128, (3, 3), padding='same', activation='relu', name=name[8])(vgg)
    vgg = Conv2D(128, (3, 3), padding='same', activation='relu', name=name[9])(vgg)
    vgg = Conv2D(128, (3, 3), padding='same', activation='relu', name=name[10])(vgg)
    vgg = MaxPooling2D(pool_size=(2,2), name=name[11])(vgg)
    vgg = Dropout(0.25, name=name[12])(vgg)

    # classification layers
    vgg = Flatten(name=name[13])(vgg)
    vgg = Dense(512, activation='relu', name=name[14])(vgg)
    vgg = Dropout(0.5, name=name[15])(vgg)
    vgg = Dense(num_classes, activation='softmax', name=name[16])(vgg)
    return vgg


def model_convnext_tiny(params: Params) -> Model:
    
    base_model = tf.keras.applications.ConvNeXtTiny(weights='imagenet', include_top=False)
    base_model.trainable = False
    
    img_size = params.image_size
    
    return keras.Sequential([
        
        tf.keras.Input(shape=(img_size, img_size, 3)),
        
        base_model,
        layers.GlobalAveragePooling2D(),

        # Classifier Head
        layers.Flatten(),
        layers.Dense(1024, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(1024, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(units=5, activation="softmax"),
    ])


def model_convnext_base(params: Params) -> Model:
    
    base_model = tf.keras.applications.ConvNeXtBase(weights='imagenet', include_top=False)
    base_model.trainable = False
    
    img_size = params.image_size
    
    return keras.Sequential([
        
        tf.keras.Input(shape=(img_size, img_size, 3)),
        
        base_model,
        layers.GlobalAveragePooling2D(),

        # Classifier Head
        layers.Flatten(),
        layers.Dense(1024, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(1024, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(units=5, activation="softmax"),
    ])


def model_convnext_tiny_1fc(params: Params) -> Model:
    
    base_model = tf.keras.applications.ConvNeXtTiny(weights='imagenet', include_top=False)
    base_model.trainable = False
    
    img_size = params.image_size
    
    return keras.Sequential([
        
        tf.keras.Input(shape=(img_size, img_size, 3)),
        
        base_model,
        layers.GlobalAveragePooling2D(),

        # Classifier Head
        layers.Flatten(),
        layers.Dense(1024, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(units=5, activation="softmax"),
    ])


def model_effnet(params: Params) -> Model:
    
    base_model = tf.keras.applications.EfficientNetB0(weights='imagenet', include_top=False)
    base_model.trainable = False
    
    img_size = params.image_size
    
    return keras.Sequential([
        
        tf.keras.Input(shape=(img_size, img_size, 3)),
        
        base_model,
        layers.GlobalAveragePooling2D(),

        # Classifier Head
        layers.Flatten(),
        layers.Dense(1024, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(1024, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(units=5, activation="softmax"),
    ])


def model_effnetv2(params: Params) -> Model:
    
    base_model = tf.keras.applications.EfficientNetV2B0(weights='imagenet', include_top=False)
    base_model.trainable = False
    
    img_size = params.image_size
    
    return keras.Sequential([
        
        tf.keras.Input(shape=(img_size, img_size, 3)),
        
        base_model,
        layers.GlobalAveragePooling2D(),

        # Classifier Head
        layers.Flatten(),
        layers.Dense(1024, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(1024, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(units=5, activation="softmax"),
    ])


def effnet2(params: Params) -> Model:
    base_model = tf.keras.applications.EfficientNetV2B0(weights='imagenet', include_top=False)
    base_model.trainable = False
    inputs = keras.Input(shape=(params.image_size, params.image_size, 3))
    x = base_model(inputs, training=False)
    x = keras.layers.GlobalAveragePooling2D()(x)
    x = keras.layers.Flatten()(x)
    x = keras.layers.Dense(1024, activation="relu")(x)
    x = keras.layers.Dropout(0.5)(x)
    x = keras.layers.Dense(1024, activation="relu")(x)
    x = keras.layers.Dropout(0.5)(x)
    outputs = keras.layers.Dense(5, activation="softmax")(x)
    model = keras.Model(inputs, outputs)
    return model
    
    
#keras.utils.plot_model(mod.model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)


def train(task_id: str, model: Model,
             ds_train_: Dataset, ds_valid_: Dataset,
             params: Params, weights = None) -> pd.DataFrame:
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(epsilon=0.005),
        loss="categorical_crossentropy",
        metrics=['accuracy']
    )

    early_stopping = callbacks.EarlyStopping(
        min_delta=0.0001,
        patience=params.early_stopping_patience,
        restore_best_weights=True,
        verbose = 1
    )
    
    reduce_lr = callbacks.ReduceLROnPlateau(
        monitor = 'val_loss', factor = 0.3, 
        patience = 3, min_delta = 0.001, 
        mode = 'min', verbose = 1)
    
    cbs = []
    if params.early_stopping:
        print("Using EarlyStopping")
        cbs += [early_stopping]
    if params.adjust_learning_rate:
        print("Using ReduceLROnPlateau")
        cbs += [reduce_lr]

    history = model.fit(
        ds_train_,
        validation_data=ds_valid_,
        epochs=params.epochs,
        verbose=1,
        callbacks=cbs,
        class_weight=weights
    )
   
    df_hist = pd.DataFrame(history.history)
    df_hist = df_hist.reset_index()
    df_hist["task_id"] = task_id
    df_hist["epoch"] = df_hist.index
   
    return df_hist


def run_experiment(exp_id: str, sub_exp_id: int, model_fn: str,
                      ds_train_: Dataset, ds_valid_: Dataset, params: Params, weights = None) -> Tuple[Model, pd.DataFrame]:
    
    model = model_fn(params)
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(epsilon=0.005),
        loss="categorical_crossentropy",
        metrics=['accuracy']
    )

    early_stopping = callbacks.EarlyStopping(
        min_delta=0.0001,
        patience=params.early_stopping_patience,
        restore_best_weights=True,
        verbose = 1
    )
    
    reduce_lr = callbacks.ReduceLROnPlateau(
        monitor = 'val_loss', factor = 0.3, 
        patience = 3, min_delta = 0.001, 
        mode = 'min', verbose = 1)
    
    cbs = []
    if params.early_stopping:
        print("Using EarlyStopping")
        cbs += [early_stopping]
    if params.adjust_learning_rate:
        print("Using ReduceLROnPlateau")
        cbs += [reduce_lr]

    history = model.fit(
        ds_train_,
        validation_data=ds_valid_,
        epochs=params.epochs,
        verbose=1,
        callbacks=cbs,
        class_weight=weights
    )
    
    
    
    df_hist = pd.DataFrame(history.history)
    df_hist = df_hist.reset_index()
    df_hist["exp_id"] = exp_id
    df_hist["sub_exp_id"] = sub_exp_id
    df_hist["epoch"] = df_hist.index
   
    return model, df_hist

In [None]:
def add_results(df_all_results, df_new_results):
    if df_all_results.empty:
        return df_new_results
    else:
        return pd.concat([df_all_results, df_new_results])
    

def add_test_results(df_all_results: pd.DataFrame, exp_id: str, res: list[float]):
    df_res = pd.DataFrame({"exp_id": [exp_id], "test_loss" : [res[0]], "test_accuracy": [res[1]]})
    if df_all_results.empty:
        return df_res
    else:
        return pd.concat([df_all_results, df_res])


def recover_results() -> pd.DataFrame:
    file = "results.csv"
    if Path(file).exists():
        return pd.read_csv(file)
    return pd.DataFrame()


def recover_test_results() -> pd.DataFrame:
    file = "test_results.csv"
    if Path(file).exists():
        return pd.read_csv(file)
    return pd.DataFrame()

In [None]:
def plot_experiment_results(df_history: pd.DataFrame, exp_id: str, epoch_limit = 50) -> None:
    df = df_history[(df_history.exp_id == exp_id) & (df_history.epoch <= epoch_limit)].copy()
    df["loss_gap"] = df.val_loss - df.loss
    df_loss = df[["epoch", "loss", "val_loss"]].groupby(["epoch"]).mean()
    df_loss_gap = df[["epoch", "loss_gap"]].groupby(["epoch"]).mean()
    df_acc = df[["epoch", "accuracy", "val_accuracy"]].groupby(["epoch"]).mean()
    df_loss.plot()
    df_loss_gap.plot()
    df_acc.plot();
    
def plot_experiments_comp(df_history: pd.DataFrame, exp_id: list, y_dim: str, epoch_limit = 50) -> None:
    df = df_history[(df_history.exp_id.isin(exp_id)) & (df_history.epoch <= epoch_limit)].copy()
    df["loss_gap"] = df.val_loss - df.loss
    df_grp = df[["epoch","exp_id", y_dim]].groupby(["epoch", "exp_id"]).mean()
    sns.lineplot(data=df_grp, x="epoch", y=y_dim, hue="exp_id")

In [None]:
def plot_experiments_comp2(df_history: pd.DataFrame, exp_id: list, epoch_limit = 50) -> None:
    df = df_history[(df_history.exp_id.isin(exp_id)) & (df_history.epoch <= epoch_limit)].copy()
    df["loss_gap"] = df.val_loss - df.loss
    df_grp = df[["epoch","exp_id", "val_accuracy", "val_loss", "loss_gap"]].groupby(["epoch", "exp_id"]).mean()
    fig, (ax1, ax2, ax3) = plt.subplots(ncols=3, figsize=(16, 8))
    sns.lineplot(data=df_grp, x="epoch", y="val_accuracy", hue="exp_id",  ax=ax1)
    sns.lineplot(data=df_grp, x="epoch", y="val_loss", hue="exp_id",  ax=ax2)
    sns.lineplot(data=df_grp, x="epoch", y="loss_gap", hue="exp_id",  ax=ax3)

`main.py`

In [None]:
cwd = os.getcwd()
ds_train, ds_valid, ds_test, class_weights = data_preprocessing(Path(cwd), DEFAULT_PARAMS)
print(f"Class Weights: {class_weights}")

#df_results = recover_results()
#df_test_results = recover_test_results()

In [None]:
# augmented datasets
#ds_train_aug = augment_dataset(ds_train, 1)
#ds_train_aug_lg = augment_dataset(ds_train, 2)
#ds_train_aug_xlg = augment_dataset(ds_train, 4)

In [None]:
df_results.head()

In [None]:
df_test_results.head()

`experiments/tasks`

In [None]:
DEFAULT_PARAMS = Params(255, 196, 2, True, 5, False)

In [None]:
ARTIFACTS_PATH = Path("artefacts").mkdir(parents=True, exist_ok=True)


In [None]:
def run_task(task_id: str, model_wrapper: ModelWrapper,
             ds_train_: Dataset, ds_valid_: Dataset, ds_test_: Dataset,
             params: Params, weights = None) -> Tuple[pd.DataFrame, pd.DataFrame]:
    
    model = model_wrapper.model
    # train
    df_train = train(task_id, model, ds_train, ds_valid, DEFAULT_PARAMS)
    # test
    test_result = model.evaluate(ds_test)
    df_test = create_test_record(task_id, test_result)
    # save CM too
    save_confusion_matrix(ds_test, model, task_id)
    return df_train, df_test

In [None]:
def create_test_record(task_id: str, result: list[float]):
    return pd.DataFrame({"task_id": [task_id], "test_loss" : [result[0]], "test_accuracy": [result[1]]})

In [None]:
baseline_model = create_model(tf.keras.applications.ConvNeXtBase, "baseline_model", DEFAULT_PARAMS)
df_train, df_test = run_task("convnextbase", baseline_model, ds_train, ds_valid, ds_test, DEFAULT_PARAMS)

In [None]:
df_train

In [None]:
df_test

In [None]:
    df_train = run_task("model_convnext_tiny", baseline_model.model,
                   ds_train, ds_valid, DEFAULT_PARAMS)

In [None]:
df_results = add_results(df_results, df_hist)

In [None]:
df_results

In [None]:
baseline_model.model.evaluate(ds_valid)

In [None]:
tr = baseline_model.model.evaluate(ds_test)
df_test_results = add_test_results(df_test_results, "convnext_tiny", tr)
tr

In [None]:
baseline_model.base_model.trainable = True
df_hist2 = run_task("convnext_tiny_ft", baseline_model.model,
                   ds_train, ds_valid, DEFAULT_PARAMS)

In [None]:
tr2 = baseline_model.model.evaluate(ds_test)
df_test_results = add_test_results(df_test_results, "convnext_tiny_ft", tr2)
tr2

In [None]:
df_test_results

In [None]:
plot_confusion_matrix(ds_test, baseline_model.model)

In [None]:
df_hist = run_task("model_convnext_tiny", baseline_model.model,
                   ds_train, ds_valid, DEFAULT_PARAMS)

In [None]:
(m, df_hist) = run_experiment("model_convnext_tiny", 1, model_convnext_tiny, ds_train_aug_lg, ds_valid, DEFAULT_PARAMS)
df_results = add_results(df_results, df_hist)
    
df_results.to_csv("results.csv")

m.evaluate(ds_test)

In [None]:
(m1, df_hist) = run_experiment("model_convnext_base_orig_img", 1, model_convnext_base, augment_dataset(ds_train, 4), ds_valid, DEFAULT_PARAMS)
df_results = add_results(df_results, df_hist)
    
df_results.to_csv("results.csv")

m1.evaluate(ds_test)

In [None]:
(m1a, df_hist) = run_experiment("model_convnext_base_orig_img_all", 1, model_convnext_base, augment_dataset(ds_train, 1), ds_valid, DEFAULT_PARAMS, class_weights)
df_results = add_results(df_results, df_hist)
    
df_results.to_csv("results.csv")

m1.evaluate(ds_test)

In [None]:
(m1b, df_hist) = run_experiment("model_convnext_base_orig_img_all_no_wgt", 1, model_convnext_base, augment_dataset(ds_train, 1), ds_valid, DEFAULT_PARAMS)
df_results = add_results(df_results, df_hist)
    
df_results.to_csv("results.csv")

m1b.evaluate(ds_test)

In [None]:
(m2, df_hist) = run_experiment("model_convnext_tiny_aug_sm", 1, model_convnext_tiny, ds_train_aug, ds_valid, DEFAULT_PARAMS)
df_results = add_results(df_results, df_hist)

df_results.to_csv("results.csv")

m2.evaluate(ds_test)

In [None]:
plot_experiments_comp(df_results,["model_convnext_tiny", "model_convnext_tiny_aug", "model_convnext_tiny_aug4_sm"], "val_accuracy")

In [None]:
plot_experiments_comp(df_results,["model_convnext_tiny", "model_convnext_tiny_aug"], "val_loss")

In [None]:
(m3, df_hist) = run_experiment("model_convnext_tiny_aug4_sm", 1, model_convnext_tiny, ds_train_aug_lg, ds_valid, DEFAULT_PARAMS)
df_results = add_results(df_results, df_hist)

df_results.to_csv("results.csv")

m3.evaluate(ds_test)

In [None]:
df_results.head()

In [None]:
plot_experiments_comp(df_results,["model_convnext_tiny_aug", "model_convnext_tiny_sm", "model_convnext_tiny_aug_sm", "model_convnext_tiny_aug4_sm"], "val_accuracy")

In [None]:
(m4, df_hist) = run_experiment("model_convnext_tiny_1fc", 1, model_convnext_tiny_1fc, ds_train_aug_lg, ds_valid, DEFAULT_PARAMS)
df_results = add_results(df_results, df_hist)

df_results.to_csv("results.csv")

m4.evaluate(ds_test)

In [None]:
plot_experiments_comp2(df_results,["model_convnext_tiny_1fc", "model_convnext_tiny_aug", "model_convnext_tiny_sm", "model_convnext_tiny_aug_sm", "model_convnext_tiny_aug4_sm"], "val_loss")

In [None]:
(m4, df_hist) = run_experiment("model_effnetv2", 1, model_effnetv2, ds_train_aug_lg, ds_valid, DEFAULT_PARAMS)
df_results = add_results(df_results, df_hist)
    
df_results.to_csv("results.csv")

m4.evaluate(ds_test)

In [None]:
(m5, df_hist) = run_experiment("model_effnet", 1, model_effnet, ds_train_aug_lg, ds_valid, DEFAULT_PARAMS)
df_results = add_results(df_results, df_hist)

df_results.to_csv("results.csv")

r5 = m5.evaluate(ds_test)
df_test_results = add_test_results(df_test_results, "model_effnet", r5)
print(r5)

In [None]:
(m6, df_hist) = run_experiment("model_effnet_wgts", 1, model_effnet, ds_train_aug_lg, ds_valid, DEFAULT_PARAMS, class_weights)
df_results = add_results(df_results, df_hist)
    
df_results.to_csv("results.csv")

r6 = m6.evaluate(ds_test)
df_test_results = add_test_results(df_test_results, "model_effnet", r6)
print(r6)

In [None]:
print(r6)

In [None]:
(m7, df_hist) = run_experiment("effnet2", 1, effnet2, ds_train_aug_lg, ds_valid, DEFAULT_PARAMS, class_weights)
df_results = add_results(df_results, df_hist)
    
df_results.to_csv("results.csv")

r7 = m7.evaluate(ds_test)
df_test_results = add_test_results(df_test_results, "effnet2", r6)
print(r7)

In [None]:
plot_experiments_comp2(df_results,["model_effnet", "model_effnet_wgts"])

In [None]:
plot_experiments_comp2(df_results,["model_convnext_tiny_1fc", "model_convnext_tiny_aug", "model_effnetv2", "model_effnet"])

In [None]:
params = DEFAULT_PARAMS
weights = class_weights
    
base_model = tf.keras.applications.EfficientNetV2B0(weights='imagenet', include_top=False)
base_model.trainable = False
inputs = keras.Input(shape=(params.image_size, params.image_size, 3))
x = base_model(inputs, training=False)
x = keras.layers.GlobalAveragePooling2D()(x)
x = keras.layers.Flatten()(x)
#x = keras.layers.Dense(1024, activation="relu")(x)
#x = keras.layers.Dropout(0.5)(x)
x = keras.layers.Dense(1024, activation="relu")(x)
x = keras.layers.Dropout(0.5)(x)
outputs = keras.layers.Dense(5, activation="softmax")(x)
model = keras.Model(inputs, outputs)

model.compile(
    optimizer=tf.keras.optimizers.Adam(epsilon=0.005),
    loss="categorical_crossentropy",
    metrics=['accuracy']
)

early_stopping = callbacks.EarlyStopping(
    min_delta=0.0001,
    patience=params.early_stopping_patience,
    restore_best_weights=True,
    verbose = 1
)

reduce_lr = callbacks.ReduceLROnPlateau(
    monitor = 'val_loss', factor = 0.3, 
    patience = 3, min_delta = 0.001, 
    mode = 'min', verbose = 1)

cbs = []
if params.early_stopping:
    print("Using EarlyStopping")
    cbs += [early_stopping]
if params.adjust_learning_rate:
    print("Using ReduceLROnPlateau")
    cbs += [reduce_lr]

history = model.fit(
    ds_train_aug_lg,
    validation_data=ds_valid,
    epochs=params.epochs,
    verbose=1,
    callbacks=cbs,
    class_weight = None
)


In [None]:
model.evaluate(ds_test)

In [None]:
base_model.trainable = True

model.compile(
    optimizer=tf.keras.optimizers.Adam(epsilon=1e-5),
    loss="categorical_crossentropy",
    metrics=['accuracy']
)

history2 = model.fit(
    ds_train_aug_lg,
    validation_data=ds_valid,
    epochs=params.epochs,
    verbose=1,
    callbacks=cbs,
    class_weight=None
)

In [None]:
model.evaluate(ds_test)
# [0.4968397915363312, 0.8268691301345825]

In [None]:
model_eff = model

In [None]:
params = DEFAULT_PARAMS
weights = class_weights
    
base_model = tf.keras.applications.ConvNeXtTiny(weights='imagenet', include_top=False)
base_model.trainable = False
inputs = keras.Input(shape=(params.image_size, params.image_size, 3))
x = base_model(inputs, training=False)
x = keras.layers.GlobalAveragePooling2D()(x)
x = keras.layers.Flatten()(x)
x = keras.layers.Dense(1024, activation="relu")(x)
x = keras.layers.Dropout(0.5)(x)
x = keras.layers.Dense(1024, activation="relu")(x)
x = keras.layers.Dropout(0.5)(x)
outputs = keras.layers.Dense(5, activation="softmax")(x)
model = keras.Model(inputs, outputs)

model.compile(
    optimizer=tf.keras.optimizers.Adam(epsilon=0.005),
    loss="categorical_crossentropy",
    metrics=['accuracy']
)

early_stopping = callbacks.EarlyStopping(
    min_delta=0.0001,
    patience=params.early_stopping_patience,
    restore_best_weights=True,
    verbose = 1
)

reduce_lr = callbacks.ReduceLROnPlateau(
    monitor = 'val_loss', factor = 0.3, 
    patience = 3, min_delta = 0.001, 
    mode = 'min', verbose = 1)

cbs = []
if params.early_stopping:
    print("Using EarlyStopping")
    cbs += [early_stopping]
if params.adjust_learning_rate:
    print("Using ReduceLROnPlateau")
    cbs += [reduce_lr]

history = model.fit(
    ds_train_aug_lg,
    validation_data=ds_valid,
    epochs=params.epochs,
    verbose=1,
    callbacks=cbs,
    class_weight = weights
)

In [None]:
model.evaluate(ds_test)

In [None]:
base_model.trainable = True

model.compile(
    optimizer=tf.keras.optimizers.Adam(epsilon=1e-5),
    loss="categorical_crossentropy",
    metrics=['accuracy']
)

history2 = model.fit(
    ds_train_aug_lg,
    validation_data=ds_valid,
    epochs=params.epochs,
    verbose=1,
    callbacks=cbs,
    class_weight = weights
)

In [None]:
model.model.evaluate(ds_test)

In [None]:
del ds_train

In [None]:
preds = model.predict(ds_test)

In [None]:
m1.evaluate(ds_test)

In [None]:
m1.evaluate(ds_valid)

In [None]:
probabilities = model.model.predict(xs)
predictions = np.argmax(probabilities, axis=1)
#Counter(predictions)

In [None]:
xs = np.concatenate([x for x, y in ds_test], axis=0)

In [None]:
probabilities = m1.predict(ds_test2)
predictions = np.argmax(probabilities, axis=1)
Counter(predictions)

In [None]:
probabilities = m1a.predict(ds_test2)
predictions = np.argmax(probabilities, axis=1)
Counter(predictions)

In [None]:
import matplotlib.pyplot as plt

In [None]:
y = np.concatenate([y for x, y in ds_test], axis=0)
ys = [np.argmax(x) for x in y]

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
#Create confusion matrix and normalizes it over predicted (columns)
result = confusion_matrix(ys, predictions, labels=[0,1,2,3,4], normalize='pred')
print(result)

In [None]:
disp = ConfusionMatrixDisplay(result, display_labels=[0,1,2,3,4])
disp.plot()
plt.show()

In [None]:
ds_test2 = image_dataset_from_directory(
        Path("/kaggle/working/data/test/"),
        labels='inferred',
        label_mode='categorical',
        image_size=[255, 255],
        batch_size=196,
        shuffle=False
    )

In [None]:
model.model.evaluate(ds_test2)

In [None]:
probabilities = model.model.predict(ds_test2)
predictions = np.argmax(probabilities, axis=1)
#Counter(predictions)

y = np.concatenate([y for x, y in ds_test2], axis=0)
ys = [np.argmax(x) for x in y]

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
#Create confusion matrix and normalizes it over predicted (columns)
result = confusion_matrix(ys, predictions, labels=[0,1,2,3,4], normalize='pred')
disp = ConfusionMatrixDisplay(result, display_labels=[0,1,2,3,4])
disp.plot()
plt.show()

In [None]:


def plot_confusion_matrix(ds: Dataset, model: Model) -> None:
    probabilities = model.predict(ds)
    predictions = np.argmax(probabilities, axis=1)

    one_hot_labels = np.concatenate([y for x, y in ds], axis=0)
    labels = [np.argmax(x) for x in one_hot_labels]
    
    result = confusion_matrix(labels, predictions, labels=[0,1,2,3,4], normalize='pred')
    disp = ConfusionMatrixDisplay(result, display_labels=[0,1,2,3,4])
    disp.plot()
    plt.show()

In [None]:
m1b.evaluate(ds_valid)

In [None]:
m1b.evaluate(ds_test)

In [None]:
plot_confusion_matrix(ds_test2, m1b)