# Fine-tunning DinoV2-PUP on Parihaka Dataset

## Imports

In [1]:
from common import get_data_module, get_trainer_pipeline
import torch
from minerva.models.ssl.byol import (
    BYOL
)
from functools import partial
import os

## Variaveis

In [2]:
# root_data_dir = "/workspaces/HIAAC-KR-Dev-Container/shared_data/seam_ai_datasets/seam_ai/images"
# root_annotation_dir = "/workspaces/HIAAC-KR-Dev-Container/shared_data/seam_ai_datasets/seam_ai/annotations"

root_data_dir = "../../../../../../../shared_data/seam_ai_datasets/seam_ai/images"
root_annotation_dir = "../../../../../../../shared_data/seam_ai_datasets/seam_ai/annotations"

print(os.path.exists(root_data_dir))
print(os.path.exists(root_annotation_dir))



img_size = (1008, 784)          # Change this to the size of the images in the dataset
model_name = "byol"             # Model name (just identifier)
dataset_name = "seam_ai"        # Dataset name (just identifier)
single_channel = False          # If True, the model will be trained with single channel images (instead of 3 channels)

log_dir = "./logs"              # Directory to save logs
batch_size = 8                  # Batch size    
seed = 42                       # Seed for reproducibility
num_epochs = 75                 # Number of epochs to train
is_debug = True                 # If True, only 3 batch will be processed for 3 epochs
accelerator = "gpu"             # CPU or GPU
devices = 1                     # Num GPUs

True
True


## Data Module

In [3]:
data_module = get_data_module(
    root_data_dir=root_data_dir,
    root_annotation_dir=root_annotation_dir,
    img_size=img_size,
    batch_size=batch_size,
    seed=seed,
    single_channel=single_channel
)

data_module

DataModule
    Data: ../../../../../../../shared_data/seam_ai_datasets/seam_ai/images
    Annotations: ../../../../../../../shared_data/seam_ai_datasets/seam_ai/annotations
    Batch size: 8

In [4]:
# Just to check if the data module is working
data_module.setup("fit")
train_batch_x, train_batch_y = next(iter(data_module.train_dataloader()))
train_batch_x.shape, train_batch_y.shape

(torch.Size([8, 3, 1008, 784]), torch.Size([8, 1, 1008, 784]))

## **** Create and Load model HERE ****

In [5]:
import torchvision.models.segmentation as models
from minerva.models.nets.image.deeplabv3 import DeepLabV3Backbone
import os 
from torch import nn

# wheights_path = "/workspaces/HIAAC-KR-Dev-Container/shared_data/notebooks_e_pesos/V1/V1_E300_B32_S256_both_N.pth"
wheights_path = "../../../../../../../shared_data/notebooks_e_pesos/V1/V1_E300_B32_S256_seam_ai.pth"
print(os.path.exists(wheights_path))

backbone = models.deeplabv3_resnet50().backbone
# backbone = DeepLabV3Backbone()
backbone.load_state_dict(torch.load(wheights_path))

  from .autonotebook import tqdm as notebook_tqdm


True


<All keys matched successfully>

In [6]:
@torch.no_grad()
def deactivate_requires_grad(model: nn.Module):
    """Deactivates the requires_grad flag for all parameters of a model."""
    for param in model.parameters():
        param.requires_grad = False

In [7]:
import lightning as L
from collections import OrderedDict
from torchmetrics import JaccardIndex, F1Score
from torch.nn import functional as F

class SegmentationModel(L.LightningModule):
        
        """Segmentation Model for downstream task.
        Combines the backbone and the prediction head.
        """
        
        def __init__(self, num_classes=6, 
                    backbone=None,
                    pred_head=None,
                    loss_fn=None,
                    learning_rate=0.001, 
                    freeze_backbone=False,
                    ):
            
            super().__init__()
            self.backbone = backbone
            self.pred_head = pred_head 
            self.loss_fn = loss_fn if loss_fn else torch.nn.CrossEntropyLoss()
            self.lr = learning_rate
            
            self.IoU = JaccardIndex(num_classes=num_classes, task='multiclass')
            self.F1 = F1Score(num_classes=num_classes, task='multiclass')
            
            self.freeze_backbone = freeze_backbone
            if self.freeze_backbone:
                deactivate_requires_grad(self.backbone)
            
        def forward(self, x):
            input_shape = x.shape[-2:]  # Save the original input shape
            features = self.backbone(x)
            if isinstance(features, OrderedDict):
                features = features['out']
            x = self.pred_head(features)
            return F.interpolate(x, size=input_shape, mode="bilinear", align_corners=False)
            
        def training_step(self, batch, batch_idx):
            X, y = batch
            y_hat = self.forward(X)
            
            # Compute the loss
            y = y.squeeze(1).to(torch.long) 
            loss = self.loss_fn(y_hat, y)
            IoU = self.IoU(y_hat, y)
            F1 = self.F1(y_hat, y)
            
            self.log("train_loss", loss, sync_dist=True)
            self.log("train_IoU", IoU, sync_dist=True)
            self.log("tain_F1", F1, sync_dist=True)
            return loss
        
        def validation_step(self, batch, batch_idx):
            X, y = batch
            y_hat = self.forward(X)

            y = y.squeeze(1).to(torch.long)
            val_loss = self.loss_fn(y_hat, y)
            val_IoU = self.IoU(y_hat, y)
            val_F1 = self.F1(y_hat.argmax(dim=1), y)

            self.log("val_loss", val_loss, sync_dist=True)
            self.log("val_IoU", val_IoU, sync_dist=True)
            self.log("val_F1", val_F1, sync_dist=True)
            return val_loss
        
        def test_step(self, batch, batch_idx):
            X, y = batch
            y_hat = self.forward(X)

            y = y.squeeze(1).to(torch.long)
            test_loss = self.loss_fn(y_hat, y)
            test_IoU = self.IoU(y_hat, y)
            test_F1 = self.F1(y_hat.argmax(dim=1), y)

            self.log("test_loss", test_loss, sync_dist=True)
            self.log("test_IoU", test_IoU, sync_dist=True)
            self.log("test_F1", test_F1, sync_dist=True)
            return test_loss
        
        def configure_optimizers(self):
            optimizer = torch.optim.Adam(params=self.parameters(), lr=self.lr, weight_decay=0.0005)
            return optimizer

In [8]:
from minerva.models.nets.image.deeplabv3 import DeepLabV3, DeepLabV3PredictionHead

pred_head = DeepLabV3PredictionHead(num_classes=6)

model = SegmentationModel(
    backbone = backbone,
    pred_head = pred_head,
    loss_fn = torch.nn.CrossEntropyLoss(),
    learning_rate = 0.001,
    num_classes = 6,
)

model

SegmentationModel(
  (backbone): IntermediateLayerGetter(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsam

In [9]:
# ckpt_file = "/workspaces/HIAAC-KR-Dev-Container/Minerva-Dev/docs/notebooks/examples/seismic/facies_classification/f3/dinov2_vits14_pretrain.pth"
# ckpt = torch.load(ckpt_file, map_location="cpu")


# Only using the backbone with random prediction head. No need for this step

## Pipeline

In [10]:
pipeline = get_trainer_pipeline(
    model=model,
    model_name=model_name,
    dataset_name=dataset_name,
    log_dir=log_dir,
    num_epochs=num_epochs,
    accelerator=accelerator,
    devices=devices,
    is_debug=is_debug,
    seed=seed,
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Seed set to 42


Log directory set to: /workspaces/workspace/Minerva-Dev/docs/notebooks/examples/seismic/facies_classification/parihaka/logs/byol/seam_ai


In [None]:
pipeline.run(data_module, task="fit")

/usr/local/lib/python3.10/dist-packages/lightning/fabric/loggers/csv_logs.py:269: Experiment logs directory ./logs/byol/seam_ai exists and is not empty. Previous log files in this directory will be deleted when the new ones are saved!
/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory ./logs/byol/seam_ai/checkpoints exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Pipeline info saved at: /workspaces/workspace/Minerva-Dev/docs/notebooks/examples/seismic/facies_classification/parihaka/logs/byol/seam_ai/run_2024-12-12-21-00-139a6e588eea444d3aa3fbce3ec9b55962.yaml



  | Name      | Type                    | Params | Mode 
--------------------------------------------------------------
0 | backbone  | IntermediateLayerGetter | 23.5 M | train
1 | pred_head | DeepLabV3PredictionHead | 16.1 M | train
2 | loss_fn   | CrossEntropyLoss        | 0      | train
3 | IoU       | MulticlassJaccardIndex  | 0      | train
4 | F1        | MulticlassF1Score       | 0      | train
--------------------------------------------------------------
39.6 M    Trainable params
0         Non-trainable params
39.6 M    Total params
158.540   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=47` in the `DataLoader` to improve performance.


                                                                           

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=47` in the `DataLoader` to improve performance.
/usr/local/lib/python3.10/dist-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (3) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 2: 100%|██████████| 3/3 [00:04<00:00,  0.63it/s, v_num=m_ai]

In [None]:
print(f"Checkpoint saved at {pipeline.trainer.checkpoint_callback.last_model_path}")

Checkpoint saved at ./logs/dinov2_pup/seam_ai/checkpoints/last-v2.ckpt


: 