# Mount drive and append path to PYTONPATH


In [None]:
import os
import sys

from google.colab import drive

drive.mount("/content/drive")
sys.path.append("/content/drive/MyDrive/DeepLCMS/train_google_colab")

# Import and install libraries

In [None]:
%%capture
!pip install lightning
!pip install timm
!pip install torchinfo

In [None]:
import gc
from typing import Optional, Tuple

import colab_functions
import colab_utils
import pandas as pd
import prepare_data
import pytorch_lightning as pl
import timm
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchinfo
import train_NN
from google.colab import drive
from lightning.pytorch.loggers import CSVLogger
from pytorch_lightning import LightningModule
from pytorch_lightning.callbacks import Callback, EarlyStopping
from pytorch_lightning.trainer.trainer import Trainer
from timm import create_model
from torchmetrics.classification import (
    BinaryAUROC,
    BinaryF1Score,
    BinaryPrecision,
    BinaryRecall,
)
import seaborn as sns

In [None]:
# Set the CUDA_VISIBLE_DEVICES environment variable
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Unzip data

In [None]:
!unzip -q experiment.zip

# Check if GPU is used

In [None]:
device = colab_functions.get_device()

# Findings the best version of resnet


In [None]:
%%script echo skipping

resnet_models = timm.list_models("resnet*", pretrained=True)[90:]


for pretrained_model in resnet_models:
    try:
        temp_model = train_NN.PretrainedModelEvaluator(pretrained_model)

        (
            preprocess_train,
            preprocess_val,
            preprocess_test,
        ) = prepare_data.get_timm_transforms(temp_model)

        (
            train_dataloader,
            val_dataloader,
            test_dataloader,
        ) = prepare_data.get_dataloaders(
            preprocess_train=preprocess_train,
            preprocess_val=preprocess_val,
            preprocess_test=preprocess_test,
        )

        # metrics_callback = train_NN.MetricsCallback()
        logger = CSVLogger("logs", name=str(pretrained_model))

        trainer = Trainer(
            max_epochs=50,
            log_every_n_steps=1,
            logger=logger,
            callbacks=[EarlyStopping(monitor="val_loss", mode="min")],
        )

        trainer.fit(
            model=temp_model,
            train_dataloaders=train_dataloader,
            val_dataloaders=val_dataloader,
        )

        # Clean up resources
        resources_to_delete = [
            temp_model,
            preprocess_train,
            preprocess_val,
            preprocess_test,
            train_dataloader,
            val_dataloader,
            test_dataloader,
            trainer,
        ]

        gc.collect()
    except RuntimeError or ValueError as e:
        pass

results_df = colab_functions.get_experiment_results()
results_df.to_csv("resnet_models_results_part2.csv", index=True)
colab_functions.plot_experiment_results(results_df)

In [None]:
# Load saved result from disk
results_df = pd.read_csv("resnet_models_results.csv")

# Filter rows with variable containing 'val'
filtered_df = results_df[results_df["variable"].str.contains("val")]


# Apply aggregation with groupby and calculate max and min values per metric
result_df_max = (
    filtered_df.groupby(["variable", "experiment"])
    .value.max()
    .to_frame()
    .reset_index(drop=False)
    .query('variable != "val_loss"')
    .pivot(index="experiment", columns="variable", values="value")
    .round(3)
)

result_df_min = (
    filtered_df.groupby(["variable", "experiment"])
    .value.min()
    .to_frame()
    .reset_index(drop=False)
    .query('variable == "val_loss"')
    .pivot(index="experiment", columns="variable", values="value")
    .round(3)
)

# concatanate the two dfs
score_board = pd.concat([result_df_max, result_df_min], axis="columns").sort_values(
    by="val_loss"
)
score_board

In [None]:
timm_model_db = pd.read_csv(
    "https://raw.githubusercontent.com/huggingface/pytorch-image-models/main/results/results-imagenet.csv"
)

In [None]:
score_board_w_params = (
    score_board.merge(
        timm_model_db.loc[lambda df: df.model.isin(score_board.index), :][
            ["model", "param_count"]
        ],
        left_index=True,
        right_on="model",
    )
    .sort_values(by="val_loss")
    .assign(param_count=lambda df: df.param_count.astype(float))
    .drop(columns="val_recall")
    .melt(id_vars=["model", "param_count"])
    .replace(
        {
            "val_acc": "Validation accuracy",
            "val_loss": "Validation loss",
            "val_precision": "Validation precision",
            "val_f1": "Validation F1",
        }
    )
)

In [None]:
with sns.plotting_context("talk", font_scale=0.8):
    grid = sns.lmplot(
        data=score_board_w_params,
        x="param_count",
        y="value",
        col="variable",
        col_wrap=2,
        facet_kws=dict(sharex=False, sharey=False),
        scatter_kws={"s": 100, "alpha": 0.5},
    )
    grid.set_titles(
        row_template="{row_name}", col_template="{col_name}", fontweight="bold", size=16
    )
    grid.savefig("resnet_results_w_parameters.png")

In [None]:
%reload_ext tensorboard
%tensorboard --logdir='/content/lightning_logs'