# Mount drive and append path to PYTONPATH


In [None]:
import os
import sys

from google.colab import drive, files, runtime

drive.mount("/content/drive")
sys.path.append("/content/drive/MyDrive/DeepLCMS/train_google_colab")

# Import and install libraries

In [None]:
%%capture
!pip install lightning
!pip install timm
!pip install torchinfo
!pip install scikit-posthocs
!pip install optuna
!pip install torchcam

In [None]:
import colab_functions
import colab_utils
import pandas as pd
import prepare_data
import timm
import train_NN
from lightning.pytorch import loggers, callbacks, tuner, trainer, LightningModule

from pathlib import Path

In [None]:
# Set the CUDA_VISIBLE_DEVICES environment variable
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Unzip data

In [None]:
!unzip -q "/content/drive/MyDrive/DeepLCMS/ST001618_Opium_study_LC_MS_500_augmented.zip"

In [None]:
img_folder = Path("/content/ST001618_Opium_study_LC_MS_500_augmented")

# Check if GPU is used

In [None]:
device = colab_functions.get_device()

# Final training with optimized settings

In [None]:
model = train_NN.PretrainedModel()
datamodule = prepare_data.LCMSDataModule(
    model,
    data_dir=img_folder,
)
model.show_architecture()

In [None]:
logger = loggers.CSVLogger("logs", name="final_training")
checkpoint_callback = callbacks.ModelCheckpoint(
    save_top_k=1, monitor="val_loss", mode="min"
)

trainer_ = trainer.Trainer(
    max_epochs=50,
    log_every_n_steps=1,
    logger=logger,
    precision="16-mixed",
    enable_checkpointing=True,
    callbacks=[
        callbacks.EarlyStopping(monitor="val_loss", mode="min", patience=10),
        checkpoint_callback,
    ],
)

# # Create a Tuner
# tuner_ = tuner.Tuner(trainer_)

# # Find optimal learning rate
# lr_finder = tuner_.lr_find(model, datamodule=datamodule, early_stop_threshold=None)

# # Extract best learning rate
# best_lr = lr_finder.suggestion()

trainer_.fit(model=model, datamodule=datamodule)

Most optimal learning rate found = 0.006918309709189364.




In [None]:
results_df = colab_functions.get_experiment_results()
results_df.to_csv("optimized_model.csv", index=False)
colab_functions.plot_experiment_results(results_df)

The best epoch, in terms of validation loss was #28 (0.123633)

In [None]:
results_df.query("variable.str.contains('val')").sort_values(by="value").groupby(
    "variable"
).head(1).query("variable.str.contains('val_loss')")

In [None]:
results_df.query("epoch == 26 and variable.str.contains('val')")

The corresponding metrics:

* val_loss : 0.123633
* val_f1 : 0.982043
* val_precision : 1.0
* val_accuracy : 0.981481
* val_recall : 0.965142

In [None]:
print(checkpoint_callback.best_model_path)  # prints path to the best model's checkpoint
print(checkpoint_callback.best_model_score)  #

In [None]:
print(checkpoint_callback.best_model_path)  # prints path to the best model's checkpoint
print(checkpoint_callback.best_model_score)  # and prints it score

best_model = train_NN.PretrainedModel.load_from_checkpoint(
    checkpoint_callback.best_model_path
)

# Evaluate the test set


In [None]:
predictions = trainer_.predict(best_model, datamodule.test_dataloader())

As you can see our model performs exceptionally good:
Accuracy: 0.90 | F1: 0.93 | Precision: 0.86 | Recall: 1.00


In [None]:
colab_functions.evaluate_predictions(
    logits=predictions, test_dataloader=datamodule.test_dataloader()
)

# Random predictions

As demonstrated below, generating random predictions aligned with the distribution of our test set results in:

*   Accuracy of 0.50
*   F1 score of 0.59
*   Precision of 0.73
* Recall of 0.50

indicating that our model significantly outperforms random guessing.


In [None]:
from sklearn import metrics
import numpy as np

RANGE = 1000

results = {"Accuracy": [], "Precision": [], "Recall": [], "F1": []}

for _ in range(RANGE):
    true = np.concatenate([np.zeros(8), np.ones(22)])
    predicted = np.random.choice([0, 1], 30)

    scores = {
        "Accuracy": metrics.accuracy_score(true, predicted),
        "Precision": metrics.precision_score(true, predicted),
        "Recall": metrics.recall_score(true, predicted),
        "F1": metrics.f1_score(true, predicted),
    }

    for key, value in scores.items():
        results[key].append(value)

mean_results = {key: np.mean(values) for key, values in results.items()}

print(
    f"Accuracy: {mean_results['Accuracy']:.2f}, F1: {mean_results['F1']:.2f}, Precision: {mean_results['Precision']:.2f}, Recall: {mean_results['Recall']:.2f}"
)

In [None]:
colab_functions.inspect_predictions(
    logits=predictions, test_dataloader=datamodule.test_dataloader()
)

# Visualize Spatial importance of features via TorchCam


In [None]:
best_model = train_NN.Resnet_model.load_from_checkpoint(
    checkpoint_callback.best_model_path
)

In [None]:
colab_functions.plot_activation(
    datamodule.test_dataloader(), device="cuda", model=model, save=False
)

In [None]:
import colab_utils
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import torch
import torch.utils.data
import torchmetrics
import torchvision
from PIL import Image
from torchcam.methods import LayerCAM
from torchcam.utils import overlay_mask
from torchvision.io.image import read_image
from torchvision.transforms.functional import to_pil_image


def plot_activation(
    dataloader,
    device: str,
    model,
    save: bool = True,
):
    """
    Generate a 3x3 grid of images with Class Activation Maps (CAM) and optionally save the plot.

    Parameters:
    - dataloader (Any): The DataLoader containing the images and labels.
    - device (Any): The device on which the model should run (e.g., 'cuda' or 'cpu').
    - model (Any): The neural network model.
    - save (bool, optional): Whether to save the plot as 'plot_activation.png'. Default is True.

    Example:
    ```python
    from torchvision import models, transforms
    from torch.utils.data import DataLoader
    from your_dataset_module import YourDataset  # Replace 'your_dataset_module' with the actual module name

    # Assuming you have a DataLoader named 'your_dataloader' and a device 'cuda'
    your_dataloader = DataLoader(YourDataset(...), batch_size=32, shuffle=True)
    your_model = models.resnet50(pretrained=True)
    your_model.to('cuda')

    plot_activation(your_dataloader, 'cuda', your_model, save=True)
    ```
    """
    for param in model.parameters():
        param.requires_grad = True

    images, labels = next(iter(dataloader))
    images, labels = images.to(device), labels.to(device)

    # Create a 3x3 grid for displaying images
    fig = plt.figure(figsize=(12, 12))
    rows, cols = 3, 3

    for i in range(1, rows * cols + 1):
        fig.add_subplot(rows, cols, i)

        # Choose a random index
        random_index = np.random.randint(0, len(dataloader.dataset) - 1)

        # Retrieve the CAM from several layers at the same time
        cam_extractor = LayerCAM(model)

        # Preprocess your data and feed it to the model
        out = model(images[random_index].unsqueeze(0))
        # Retrieve the CAM by passing the class index and the model output
        cams = cam_extractor(out.squeeze(0).argmax().item(), out)

        result = overlay_mask(
            to_pil_image(images[random_index]), to_pil_image(cams, mode="F"), alpha=0.5
        )
        plt.imshow(result)
        plt.title(f"Class: {labels[random_index]}")
        plt.axis(False)

        cam_extractor.remove_hooks()

    if save:
        plt.savefig("plot_activation.png", bbox_inches="tight", dpi=300)

    plt.show()  # Display the plot

In [None]:
plot_activation(datamodule.test_dataloader(), device="cuda", model=model, save=False)

In [None]:
model