# Fine-tunning Sam on Parihaka Dataset

## Imports

In [1]:
from common import get_data_module, get_trainer_pipeline
import torch
from torchmetrics import JaccardIndex
from minerva.models.nets.image.sam import Sam
from functools import partial

  from .autonotebook import tqdm as notebook_tqdm


## Variaveis

In [2]:
root_data_dir = "/workspaces/Minerva-Discovery/shared_data/seam_ai_datasets/seam_ai/images"
root_annotation_dir = "/workspaces/Minerva-Discovery/shared_data/seam_ai_datasets/seam_ai/annotations"

checkpoint = "/workspaces/Minerva-Discovery/shared_data/weights_sam/checkpoints_sam/sam_vit_b_01ec64.pth"
vit_model = "vit-b"

img_size = (1008, 784)          # Change this to the size of the images in the dataset
model_name = "sam_vit_b"       # Model name (just identifier)
dataset_name = "seam_ai"        # Dataset name (just identifier)
single_channel = True          # If True, the model will be trained with single channel images (instead of 3 channels)

log_dir = "./logs"              # Directory to save logs
batch_size = 1                  # Batch size    
seed = 42                       # Seed for reproducibility
num_epochs = 100                # Number of epochs to train
is_debug = True                 # If True, only 3 batch will be processed for 3 epochs
accelerator = "gpu"             # CPU or GPU
devices = 1                     # Num GPUs

num_classes = 6                 # Num classes
multimask_output = True         # If True, return num_classes of masks. Else, return only the best mask.

## Data Module

In [3]:
data_module = get_data_module(
    root_data_dir=root_data_dir,
    root_annotation_dir=root_annotation_dir,
    img_size=img_size,
    batch_size=batch_size,
    seed=seed,
    single_channel=single_channel
)

data_module

DataModule
    Data: /workspaces/Minerva-Discovery/shared_data/seam_ai_datasets/seam_ai/images
    Annotations: /workspaces/Minerva-Discovery/shared_data/seam_ai_datasets/seam_ai/annotations
    Batch size: 1

In [4]:
# Just to check if the data module is working
data_module.setup("fit")
train_batch_x, train_batch_y = next(iter(data_module.train_dataloader()))
train_batch_x.shape, train_batch_y.shape

(torch.Size([1, 1, 1008, 784]), torch.Size([1, 1, 1008, 784]))

## **** Create and Load model HERE ****

In [5]:
model = Sam(
    train_metrics={"mIoU": JaccardIndex(task="multiclass", num_classes=num_classes)},
    val_metrics={"mIoU": JaccardIndex(task="multiclass", num_classes=num_classes)},
    test_metrics={"mIoU": JaccardIndex(task="multiclass", num_classes=num_classes)},
    vit_type=vit_model,
    checkpoint=checkpoint,
    num_multimask_outputs=num_classes,
    iou_head_depth=num_classes,
    multimask_output=multimask_output
    # apply_freeze=apply_freeze, # if you need to freeze some layer
    # apply_adapter=apply_adapter # if you need to apply an adapter to a layer
)

model

  state_dict = torch.load(f)


Error when load original weights. Applying now remaping.
Prompt Encoder freeze!


Sam(
  (loss_fn): CrossEntropyLoss()
  (model): _SAM(
    (image_encoder): ImageEncoderViT(
      (patch_embed): PatchEmbed(
        (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (blocks): ModuleList(
        (0-11): 12 x Block(
          (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
          (attn): Attention(
            (qkv): Linear(in_features=768, out_features=2304, bias=True)
            (proj): Linear(in_features=768, out_features=768, bias=True)
          )
          (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
          (mlp): MLPBlock(
            (lin1): Linear(in_features=768, out_features=3072, bias=True)
            (lin2): Linear(in_features=3072, out_features=768, bias=True)
            (act): GELU(approximate='none')
          )
        )
      )
      (neck): Sequential(
        (0): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): LayerNorm2d()
        (2): Conv2d(256, 256,

## Pipeline

In [6]:
pipeline = get_trainer_pipeline(
    model=model,
    model_name=model_name,
    dataset_name=dataset_name,
    log_dir=log_dir,
    num_epochs=num_epochs,
    accelerator=accelerator,
    devices=devices,
    is_debug=is_debug,
    seed=seed,
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Seed set to 42


Log directory set to: /workspaces/Minerva-Discovery/Minerva-Dev/docs/notebooks/examples/seismic/facies_classification/parihaka/logs/sam_vit_b/seam_ai


In [7]:
pipeline.run(data_module, task="fit")

/usr/local/lib/python3.10/dist-packages/lightning/fabric/loggers/csv_logs.py:268: Experiment logs directory ./logs/sam_vit_b/seam_ai exists and is not empty. Previous log files in this directory will be deleted when the new ones are saved!
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Pipeline info saved at: /workspaces/Minerva-Discovery/Minerva-Dev/docs/notebooks/examples/seismic/facies_classification/parihaka/logs/sam_vit_b/seam_ai/run_2024-12-20-20-27-53159c070878bd4f4f9e59f9091f322718.yaml



  | Name    | Type             | Params | Mode 
-----------------------------------------------------
0 | loss_fn | CrossEntropyLoss | 0      | train
1 | model   | _SAM             | 94.4 M | train
-----------------------------------------------------
94.3 M    Trainable params
6.2 K     Non-trainable params
94.4 M    Total params
377.415   Total estimated model params size (MB)
268       Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


                                                                           

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.
/usr/local/lib/python3.10/dist-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (3) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 2: 100%|██████████| 3/3 [00:00<00:00,  3.78it/s, v_num=m_ai, train_loss_step=1.520, train_mIoU_step=0.0698, val_loss_step=1.450, val_mIoU_step=0.0768, val_loss_epoch=1.450, val_mIoU_epoch=0.0768, train_loss_epoch=1.550, train_mIoU_epoch=0.0667]

`Trainer.fit` stopped: `max_epochs=3` reached.


Epoch 2: 100%|██████████| 3/3 [00:09<00:00,  0.32it/s, v_num=m_ai, train_loss_step=1.520, train_mIoU_step=0.0698, val_loss_step=1.450, val_mIoU_step=0.0768, val_loss_epoch=1.450, val_mIoU_epoch=0.0768, train_loss_epoch=1.550, train_mIoU_epoch=0.0667]
Pipeline info saved at: /workspaces/Minerva-Discovery/Minerva-Dev/docs/notebooks/examples/seismic/facies_classification/parihaka/logs/sam_vit_b/seam_ai/run_2024-12-20-20-27-53159c070878bd4f4f9e59f9091f322718.yaml


In [8]:
print(f"Checkpoint saved at {pipeline.trainer.checkpoint_callback.last_model_path}")

Checkpoint saved at ./logs/sam_vit_b/seam_ai/checkpoints/last.ckpt
