# Mount drive and append path to PYTONPATH


In [None]:
import os
import sys

from google.colab import drive

drive.mount("/content/drive")
sys.path.append("/content/drive/MyDrive/DeepLCMS/gpu_modules")

# Import and install libraries

In [None]:
%%capture
!pip install lightning
!pip install timm
!pip install torchinfo
!pip install scikit-posthocs
!pip install optuna
!pip install torchcam

In [None]:
import colab_functions
import colab_utils
import pandas as pd
import prepare_data
import timm
import train_NN
from lightning.pytorch import loggers, callbacks, tuner, trainer
from pathlib import Path
from tqdm import tqdm
from IPython.display import clear_output
import seaborn as sns
import matplotlib.pyplot as plt
import scikit_posthocs as sp

In [None]:
# Set the CUDA_VISIBLE_DEVICES environment variable
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Unzip data

In [None]:
!unzip -q experiment.zip

# Check if GPU is used

In [None]:
device = colab_functions.get_device()

# Getting the candidate models based on Experiment #1


In [None]:
candidates_df = pd.read_csv("exp_2_candidates.csv")

pretrained_models = candidates_df.model

#Inspect a model and its dataloader


In [None]:
PRETRAINED_MODEL = "convnextv2_nano.fcmae_ft_in22k_in1k_384"

model = train_NN.PretrainedModel(
    pretrained_model_name=PRETRAINED_MODEL, learning_rate=0.001
)
datamodule = prepare_data.LCMSDataModule(
    model,
    data_dir=Path("/content/ST001618_Opium_study_LC_MS_500"),
)
model.show_architecture()

In [None]:
datamodule.inspect_dataloader("train")

# Training loop

In [None]:
for model_name in tqdm(pretrained_models):
    try:
        model = train_NN.PretrainedModel(
            pretrained_model_name=model_name, learning_rate=0.001
        )
        datamodule = prepare_data.LCMSDataModule(
            model,
            data_dir=Path("/content/ST001618_Opium_study_LC_MS_500"),
        )

        logger = loggers.CSVLogger("logs", name=str(model_name))

        trainer_ = trainer.Trainer(
            max_epochs=50,
            log_every_n_steps=1,
            logger=logger,
            precision="16-mixed",
            callbacks=[
                callbacks.EarlyStopping(monitor="val_loss", mode="min", patience=10)
            ],
        )
        trainer_.fit(model=model, datamodule=datamodule)

        del model, datamodule, trainer_
        clear_output(wait=True)

    except RuntimeError as e:
        print(f"{model_name} could not run because {e}")

results_df = colab_functions.get_experiment_results()
results_df.to_csv("pretrained_model_results.csv", index=False)

## Results

In [None]:
# This experiment was divided in two parts, hence the two csv files read_ins
results_df = (
    pd.concat(
        [
            pd.read_csv(csv_file)
            for csv_file in list(Path.cwd().glob("pretrained_model_results*"))
        ],
        axis="index",
    )
    .reset_index(drop=True)
    .assign(
        param_count=lambda df: df.experiment.map(
            candidates_df.loc[:, ["model", "param_count"]]
            .set_index("model")
            .squeeze()
            .to_dict()
        )
    )
)

results_df

In [None]:
# next we take a look at what models achieved the lowest val_losses

best_models = (
    results_df.pivot(
        index=["epoch", "experiment", "param_count"], columns="variable", values="value"
    )
    .reset_index()
    .sort_values(by=["experiment", "epoch"])
    .loc[lambda df: df.groupby("experiment")["val_loss"].idxmin()]
    .sort_values(["val_loss", "val_f1"], ascending=[True, False])
)

best_models.head(10)

In [None]:
best_models_melted = (
    best_models.assign(
        family=lambda df: df.experiment.str.split("_", expand=True)[0]
        .str.split(".", expand=True)[0]
        .str.replace("\d+", "")
        .replace({"convnextv": "convnext", "densenetblurd": "densenet"})
    )
    .loc[
        :,
        [
            "experiment",
            "param_count",
            "val_accuracy",
            "val_f1",
            "val_loss",
            "val_precision",
            "val_recall",
            "family",
        ],
    ]
    .melt(
        id_vars=["experiment", "family", "param_count"],
        value_vars=[
            "val_accuracy",
            "val_f1",
            "val_loss",
            "val_precision",
            "val_recall",
        ],
    )
)
best_models_melted

In [None]:
with sns.plotting_context("talk", font_scale=0.8):
    grid = sns.lmplot(
        x="param_count",
        y="value",
        hue="variable",
        col="family",
        data=best_models_melted,
        height=3,
        facet_kws=dict(sharex=False, sharey=True),
    )
    # Add a main title to the entire FacetGrid
    # grid.fig.suptitle(f"{metric}", fontweight="bold", size=16, y=1.05)
    grid.set_titles(
        row_template="{row_name}", col_template="{col_name}", fontweight="bold", size=16
    )
    grid.savefig("summary.png")

# Evaluating variability of top models from the three families

In [None]:
candidate_models = [
    "convnext_large_mlp.clip_laion2b_augreg_ft_in1k_384",
    "mobileone_s3.apple_in1k",
    "mobilevitv2_200.cvnets_in22k_ft_in1k_384",
]

for model_name in tqdm(candidate_models):
    for round in range(1, 6):
        print(f"Round {round}, working on: {model_name}")
        model = PretrainedModel(pretrained_model_name=model_name, learning_rate=0.001)
        datamodule = LCMSDataModule(
            model,
            data_dir=Path("/kaggle/input/ST001618_Opium_study_LC_MS_500"),
        )

        logger = loggers.CSVLogger("logs", name=str(model_name))

        trainer_ = trainer.Trainer(
            max_epochs=50,
            log_every_n_steps=1,
            logger=logger,
            precision="16-mixed",
            callbacks=[
                callbacks.EarlyStopping(monitor="val_loss", mode="min", patience=10)
            ],
        )

        trainer_.fit(model=model, datamodule=datamodule)

        del model, datamodule, trainer_
        clear_output(wait=True)

results_df = colab_functions.get_experiment_results(direcory=r"/kaggle/working/logs")
results_df.to_csv("top_pretrained_model_results_replicates.csv", index=False)
colab_functions.plot_experiment_results(results_df)

## Results

In [None]:
results_df = pd.read_csv("/content/top_pretrained_model_results_replicates -v2.csv")

In [None]:
colab_functions.plot_experiment_results(results_df, bbox_to_anchor=(1.25, 0.97))

In [None]:
# we will get the minimum and maximum values across the experiments for all variables

min_values = (
    results_df.groupby(["exp_nr", "experiment", "variable"])
    .agg({"value": "min"})
    .reset_index()
    .sort_values(by=["experiment", "exp_nr", "variable"])
    .query("variable.str.contains('val') and variable.str.contains('_loss')")
)

max_values = (
    results_df.groupby(["exp_nr", "experiment", "variable"])
    .agg({"value": "max"})
    .reset_index()
    .sort_values(by=["experiment", "exp_nr", "variable"])
    .query("variable.str.contains('val') and ~variable.str.contains('_loss')")
)
max_values

concat_summary = pd.concat([min_values, max_values], axis=0)
concat_summary

In [None]:
with sns.plotting_context("talk", font_scale=0.8):
    grid = sns.FacetGrid(concat_summary, col="variable", col_wrap=5, sharex=False)
    grid.map_dataframe(
        sns.barplot,
        y="experiment",
        x="value",
        capsize=0.15,
    )

    grid.set_titles(
        row_template="{row_name}", col_template="{col_name}", fontweight="bold", size=16
    )
    grid.set_axis_labels("", "")

    # Add labels to each bar
    for ax in grid.axes.flatten():
        for container in ax.containers:
            ax.bar_label(
                container,
                labels=[f"{x:.2f}" for x in container.datavalues],
                fontsize=10,
                padding=17,
            )

    plt.tight_layout()

    grid.savefig("experiment_result.png")

# Testing statistical significance with Dunn’s test

In [None]:
results_dict = {}

for metric in concat_summary.variable.unique():
    temp_df = concat_summary.query("variable == @metric")
    # print(temp_df)
    dunn_test_results = sp.posthoc_dunn(
        a=temp_df, val_col="value", group_col="experiment", p_adjust="fdr_bh"
    )

    # Add the results to the dictionary
    results_dict[metric] = dunn_test_results

In [None]:
(
    pd.concat(results_dict)
    .loc[lambda df: df.apply(lambda row: any(row < 0.05), axis=1), :]
    .assign(sum_value=lambda df: df.sum(axis=1))
    .drop_duplicates(subset="sum_value")
    .drop(columns="sum_value")
)

According to the replication study, convnext_large emerged as the most effective model, surpassing both mobileone and mobilevitv2. Despite exhibiting similar patterns, convnext_large exhibited a statistically significant advantage over mobilevitv2 in validation loss (p = 0.01) and achieved significantly better validation recall (p < 0.05). Among the three models tested, mobileone consistently underperformed its counterparts in all performance metrics, except for validation recall, where it narrowly outperformed mobilevitv2 (p < 0.05)