# Mount drive and append path to PYTONPATH


In [None]:
from google.colab import drive
import os
import sys

drive.mount("/content/drive")
sys.path.append("/content/drive/MyDrive/DeepLCMS/train_google_colab")

# Import and install libraries

In [None]:
%%capture
!pip install lightning
!pip install timm
!pip install torchinfo
!pip install scikit-posthocs
!pip install optuna
!pip install torchcam

In [None]:
import colab_functions
import colab_utils
import pandas as pd
import prepare_data
import timm
import train_NN
from lightning.pytorch import loggers, callbacks, tuner, trainer
from pathlib import Path

In [None]:
# Set the CUDA_VISIBLE_DEVICES environment variable
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Unzip data

In [None]:
!unzip -q "*.zip"

# Check if GPU is used

In [None]:
device = colab_functions.get_device()

# Taking a look at the list of Timm pretrained models

In [None]:
timm_model_db = pd.read_csv(
    "https://raw.githubusercontent.com/huggingface/pytorch-image-models/main/results/results-imagenet.csv"
).assign(param_count=lambda df: df.param_count.str.replace(",", "").astype(float))

In [None]:
timm_model_list = [
    "Aggregating Nested Transformers",
    "BiT",
    "Bottleneck Transformers",
    "CaiT",
    "CoaT",
    "CoAtNet",
    "ConvNeXt",
    "ConvNeXt-V2",
    "ConViT",
    "CspNet",
    "DeiT",
    "DeiT-III",
    "DenseNet",
    "DLA",
    "DPN",
    "EdgeNeXt",
    "EfficientFormer",
    "EfficientNet",
    "FBNet",
    "MixNet",
    "MNASNet",
    "MobileNet",
    "Single-Path NAS",
    "TinyNet",
    "EfficientViT",
    "EVA",
    "FastViT",
    "FlexiViT",
    "FocalNet",
    "GCViT",
    "GhostNet",
    "GhostNet-V2",
    "gMLP",
    "HRNet",
    "InceptionNeXt",
    "Inception-V3",
    "Inception-ResNet-V2",
    "Lambda",
    "LeViT",
    "MaxViT",
    "MobileNet-V3",
    "LCNet",
    "MobileOne",
    "MobileViT",
    "MobileViT-V2",
    "MViT-V2",
    "NASNet-A",
    "NesT",
    "NFNet-F",
    "NF-RegNet",
    "PNasNet",
    "RegNet",
    "RegNetZ",
    "RepVGG",
    "RepGhostNet",
    "RepViT",
    "ResMLP",
    "ResNeXt",
    "SEResNet",
    "ResNet-RS",
    "Res2Net",
    "ResNeSt",
    "ReXNet",
    "Swin",
    "TResNet",
    "Twins",
    "Xception",
    "XCiT",
]

In [None]:
cleaned_timm_model_list = pd.Series(timm_model_list).str.replace(" ", "").str.lower()

In [None]:
minimal_param_models_by_family = []

for model_name in cleaned_timm_model_list:
    query_result = (
        timm_model_db.query("model.str.contains(@model_name)")
        .sort_values(by="param_count")
        .head(1)
    )

    if query_result.shape[0] != 0:
        result_dict = {
            "model_family": query_result.model.values[0],
            "minimal_param_model_count": query_result.param_count.values[0],
        }

        minimal_param_models_by_family.append(result_dict)

minimal_param_models_by_family_df = pd.DataFrame(minimal_param_models_by_family)

minimal_param_models_by_family_df

# Findings the best architecture family based on the models with least parameters


In [None]:
PRETRAINED_MODEL = "convnextv2_nano.fcmae_ft_in22k_in1k_384"

model = train_NN.PretrainedModel(
    pretrained_model_name=PRETRAINED_MODEL, learning_rate=0.001
)
datamodule = prepare_data.LCMSDataModule(
    model,
    data_dir=Path("/content/ST001618_Opium_study_LC_MS_500"),
)
train_NN.show_architecture(model)

In [None]:
prepare_data.inspect_dataloader(datamodule.train_dataloader())

In [None]:
logger = loggers.CSVLogger("logs", name=str(PRETRAINED_MODEL))

trainer_ = trainer.Trainer(
    max_epochs=1,
    log_every_n_steps=1,
    logger=logger,
    precision="16-mixed",
    callbacks=[callbacks.EarlyStopping(monitor="val_loss", mode="min", patience=10)],
)
# Create a Tuner
# tuner_ = tuner.Tuner(trainer_)
# lr_finder = tuner_.lr_find(model, early_stop_threshold=None)

trainer_.fit(model=model, datamodule=datamodule)

In [None]:
results_df = colab_functions.get_experiment_results()
results_df.to_csv("pretrained_model_results.csv", index=False)
colab_functions.plot_experiment_results(results_df)

In [None]:
# Filter rows with variable containing 'val'
filtered_df = df[df["variable"].str.contains("val")]


# Apply aggregation with groupby and calculate max and min values per metric
result_df_max = (
    filtered_df.groupby(["variable", "experiment"])
    .value.max()
    .to_frame()
    .reset_index(drop=False)
    .query('variable != "val_loss"')
    .pivot(index="experiment", columns="variable", values="value")
    .round(3)
)

result_df_min = (
    filtered_df.groupby(["variable", "experiment"])
    .value.min()
    .to_frame()
    .reset_index(drop=False)
    .query('variable == "val_loss"')
    .pivot(index="experiment", columns="variable", values="value")
    .round(3)
)

# concatanate the two dfs
pd.concat([result_df_max, result_df_min], axis="columns").sort_values(by="val_loss")