# Mount drive and append path to PYTONPATH


In [None]:
import os
import sys

from google.colab import drive, files, runtime

drive.mount("/content/drive")
sys.path.append("/content/drive/MyDrive/DeepLCMS/train_google_colab")

# Import and install libraries

In [None]:
%%capture
!pip install lightning
!pip install timm
!pip install torchinfo
!pip install scikit-posthocs
!pip install optuna
!pip install torchcam

In [None]:
import colab_functions
import colab_utils
import pandas as pd
import prepare_data
import timm
import train_NN
from lightning.pytorch import loggers, callbacks, tuner, trainer, LightningModule

from pathlib import Path

In [None]:
# Set the CUDA_VISIBLE_DEVICES environment variable
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Unzip data

In [None]:
!unzip -q "/content/drive/MyDrive/DeepLCMS/ST001618_Opium_study_LC_MS_500_augmented.zip"

In [None]:
img_folder = Path("/content/ST001618_Opium_study_LC_MS_500_augmented")

# Check if GPU is used

In [None]:
device = colab_functions.get_device()

# Final training with optimized settings

In [None]:
model = train_NN.PretrainedModel()
datamodule = prepare_data.LCMSDataModule(
    model, data_dir=img_folder, re_prob=0.2, color_jitter=0.2
)
model.show_architecture()

In [None]:
logger = loggers.CSVLogger("logs", name="final_training")
checkpoint_callback = callbacks.ModelCheckpoint(
    save_top_k=1, monitor="val_loss", mode="min"
)

trainer_ = trainer.Trainer(
    max_epochs=50,
    log_every_n_steps=1,
    logger=logger,
    precision="16-mixed",
    enable_checkpointing=True,
    callbacks=[
        callbacks.EarlyStopping(monitor="val_loss", mode="min", patience=10),
        checkpoint_callback,
    ],
)

# # Create a Tuner
# tuner_ = tuner.Tuner(trainer_)

# # Find optimal learning rate
# lr_finder = tuner_.lr_find(model, datamodule=datamodule, early_stop_threshold=None)

# # Extract best learning rate
# best_lr = lr_finder.suggestion()

trainer_.fit(model=model, datamodule=datamodule)

Most optimal learning rate found = 0.006918309709189364.




In [None]:
results_df = colab_functions.get_experiment_results()
results_df.to_csv("optimized_model.csv", index=False)
colab_functions.plot_experiment_results(results_df)

The best epoch, in terms of validation loss was #28 (0.123633)

In [None]:
results_df.query("variable.str.contains('val')").sort_values(by="value").groupby(
    "variable"
).head(1).query("variable.str.contains('val_loss')")

In [None]:
results_df.query("epoch == 35 and variable.str.contains('val')")

The corresponding metrics:

* val_loss : 0.123633
* val_f1 : 0.982043
* val_precision : 1.0
* val_accuracy : 0.981481
* val_recall : 0.965142

In [None]:
print(checkpoint_callback.best_model_path)  # prints path to the best model's checkpoint
print(checkpoint_callback.best_model_score)  #

In [None]:
print(checkpoint_callback.best_model_path)  # prints path to the best model's checkpoint
print(checkpoint_callback.best_model_score)  # and prints it score

best_model = train_NN.PretrainedModel.load_from_checkpoint(
    checkpoint_callback.best_model_path
)

# Evaluate the test set


In [None]:
predictions = trainer_.predict(best_model, datamodule.test_dataloader())

As you can see our model performs exceptionally good:
Accuracy: 0.90 | F1: 0.93 | Precision: 0.86 | Recall: 1.00


In [None]:
colab_functions.evaluate_predictions(
    logits=predictions, test_dataloader=datamodule.test_dataloader()
)

In [None]:
colab_functions.inspect_predictions(
    logits=predictions, test_dataloader=datamodule.test_dataloader()
)

# Visualize Spatial importance of features via TorchCam


In [None]:
best_model = train_NN.Resnet_model.load_from_checkpoint(
    checkpoint_callback.best_model_path
)

In [None]:
colab_functions.plot_activation(
    datamodule.test_dataloader(),
    device=device,
    model=best_model,
    save=True,
    layers=["model.norm_pre", "model.stages", "model.head"],
)