### **Example Notebook for pretreining: FASTSIAM**

In [1]:
import lightning as L
import numpy as np
import torch
from torch.utils.data import DataLoader
from torchmetrics import JaccardIndex
from matplotlib import pyplot as plt
from pathlib import Path
import os
from lightning.pytorch.callbacks import ModelCheckpoint

from minerva.data.datasets.supervised_dataset import SupervisedReconstructionDataset
from minerva.data.readers.png_reader import PNGReader
from minerva.data.readers.tiff_reader import TiffReader
from minerva.models.ssl.fastsiam import FastSiam  # Import TriBYOL from Minerva
from minerva.transforms.transform import _Transform
from minerva.pipelines.lightning_pipeline import SimpleLightningPipeline
from minerva.data.data_modules.parihaka import ParihakaDataModule
from minerva.transforms.transform import Padding
from minerva.models.nets.image.deeplabv3 import DeepLabV3Backbone



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Define paths to the training images and annotations
train_path = "/workspaces/Minerva-Dev/shared_data/seam_ai_datasets/seam_ai/images/"
annotation_path = "/workspaces/Minerva-Dev/shared_data/seam_ai_datasets/seam_ai/annotations/"

In [3]:
# Define a padding transformation to ensure consistent image sizes
transform = Padding(1006, 590)

In [4]:
# Initialize the Parihaka DataModule for handling data loading and splitting
data_module = ParihakaDataModule(
    root_data_dir=train_path,
    root_annotation_dir=annotation_path,
    train_transforms=None,
    batch_size=2,
)

In [5]:
# Initialize the DeepLabV3 backbone for feature extraction
backbone = DeepLabV3Backbone()

# Initialize the FastSiam model for self-supervised learning
fastsiam_model = FastSiam(
    backbone=backbone,     # Use DeepLabV3 as the backbone
    in_dim=2048,           # Dimensionality of the input of the projection head
    hid_dim=2048,           # Dimensionality of the hidden layer in the MLP
    out_dim=2048,          # Dimensionality of the output of the prediction head
    lr=1e-3,               # Learning rate for the optimizer
    num_classes=6          # Number of classes
)

In [6]:
# Define checkpoint location
model_name = "fastsiam"
ckpt_save_path = "/workspaces/Minerva-Dev/checkpoints/"

# Define a checkpoint callback to save the best model based on validation loss
checkpoint_callback = ModelCheckpoint(
    dirpath=ckpt_save_path,                         # Directory to save the checkpoints
    filename=model_name,                            # File name for the checkpoint
    save_top_k=1,                                   # Save only the best model
    monitor="val_loss",                             # Monitor validation loss to determine the best model
    mode="min",                                     # Minimize validation loss
    save_weights_only=False,                        # Save the entire model, not just weights
)

In [7]:
# Initialize the PyTorch Lightning Trainer
trainer = L.Trainer(
    max_epochs=2,                   # Number of epochs to train
    accelerator="gpu",              # Use GPU for training
    devices=1,                      # Number of GPUs to use
    callbacks=[checkpoint_callback] # Include the checkpoint callback
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [8]:
# Initialize the pipeline for training and testing
pipeline = SimpleLightningPipeline(
    model=fastsiam_model,  # FastSiam model for training
    trainer=trainer,      # Trainer instance
    save_run_status=True, # Save the status of the training run
)

/usr/local/lib/python3.10/dist-packages/lightning/fabric/utilities/seed.py:42: No seed found, seed set to 0
Seed set to 0


Log directory set to: /workspaces/Minerva-Dev/docs/notebooks/examples/seismic/facies_classification/parihaka/lightning_logs/version_289


In [9]:
# Run training
pipeline.run(data=data_module, task="fit")

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /workspaces/Minerva-Dev/checkpoints exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Pipeline info saved at: /workspaces/Minerva-Dev/docs/notebooks/examples/seismic/facies_classification/parihaka/lightning_logs/version_289/run_2024-12-21-17-29-49bfc5004ea2164c468dc01df68124fcb4.yaml



  | Name                        | Type              | Params | Mode 
--------------------------------------------------------------------------
0 | backbone                    | DeepLabV3Backbone | 25.6 M | train
1 | prediction_branch_projector | SimSiamMLPHead    | 2.1 M  | train
2 | prediction_branch_predictor | SimSiamMLPHead    | 8.4 M  | train
3 | target_branch_backbone      | DeepLabV3Backbone | 25.6 M | train
4 | target_branch_projector     | SimSiamMLPHead    | 8.4 M  | train
5 | global_avg_pool             | AdaptiveAvgPool2d | 0      | train
--------------------------------------------------------------------------
70.0 M    Trainable params
0         Non-trainable params
70.0 M    Total params
280.034   Total estimated model params size (MB)
320       Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=47` in the `DataLoader` to improve performance.


                                                                           

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=47` in the `DataLoader` to improve performance.


Epoch 1: 100%|██████████| 560/560 [10:42<00:00,  0.87it/s, v_num=289, train_loss_step=-.948, val_loss_step=-.547, val_loss_epoch=-.745, train_loss_epoch=-.884]

`Trainer.fit` stopped: `max_epochs=2` reached.


Epoch 1: 100%|██████████| 560/560 [10:42<00:00,  0.87it/s, v_num=289, train_loss_step=-.948, val_loss_step=-.547, val_loss_epoch=-.745, train_loss_epoch=-.884]
Pipeline info saved at: /workspaces/Minerva-Dev/docs/notebooks/examples/seismic/facies_classification/parihaka/lightning_logs/version_289/run_2024-12-21-17-29-49bfc5004ea2164c468dc01df68124fcb4.yaml


In [10]:
# Load and test from the best checkpoint
pipeline.run(data=data_module, task="test", ckpt_path=f"{ckpt_save_path}/{model_name}.ckpt")

Restoring states from the checkpoint path at /workspaces/Minerva-Dev/checkpoints//fastsiam.ckpt


Pipeline info saved at: /workspaces/Minerva-Dev/docs/notebooks/examples/seismic/facies_classification/parihaka/lightning_logs/version_289/run_2024-12-21-17-29-49bfc5004ea2164c468dc01df68124fcb4.yaml


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Loaded model weights from the checkpoint at /workspaces/Minerva-Dev/checkpoints//fastsiam.ckpt
/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=47` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 100/100 [01:07<00:00,  1.47it/s]


Pipeline info saved at: /workspaces/Minerva-Dev/docs/notebooks/examples/seismic/facies_classification/parihaka/lightning_logs/version_289/run_2024-12-21-17-29-49bfc5004ea2164c468dc01df68124fcb4.yaml


[{'test_loss_epoch': -0.8782209753990173}]