In [1]:
from common import get_data_module
import torch
from minerva.models.ssl.dinov2 import (
    DinoVisionTransformer,
    SETR_MLA,
    NestedTensorBlock,
    MemEffAttention,
    DinoV2
)
from functools import partial
from common import get_evaluation_pipeline

  from .autonotebook import tqdm as notebook_tqdm


Not using xFormers lib!


In [2]:
root_data_dir = "/workspaces/HIAAC-KR-Dev-Container/shared_data/seam_ai_datasets/seam_ai/images"
root_annotation_dir = "/workspaces/HIAAC-KR-Dev-Container/shared_data/seam_ai_datasets/seam_ai/annotations"
img_size = (1008, 784)          # Change this to the size of the images in the dataset
model_name = "dinov2_mla"       # Model name (just identifier)
dataset_name = "seam_ai"        # Dataset name (just identifier)
single_channel = False          # If True, the model will be trained with single channel images (instead of 3 channels)

log_dir = "./logs"              # Directory to save logs
batch_size = 1                  # Batch size    
seed = 42                       # Seed for reproducibility
num_epochs = 100                # Number of epochs to train
is_debug = False                 # If True, only 3 batch will be processed for 3 epochs
accelerator = "gpu"             # CPU or GPU
devices = 1                     # Num GPUs

In [3]:
data_module = get_data_module(
    root_data_dir=root_data_dir,
    root_annotation_dir=root_annotation_dir,
    img_size=img_size,
    batch_size=batch_size,
    seed=seed,
    single_channel=single_channel, 
)

data_module

DataModule
    Data: /workspaces/HIAAC-KR-Dev-Container/shared_data/seam_ai_datasets/seam_ai/images
    Annotations: /workspaces/HIAAC-KR-Dev-Container/shared_data/seam_ai_datasets/seam_ai/annotations
    Batch size: 1

In [4]:
# Just to check if the data module is working
data_module.setup("predict")
train_batch_x, train_batch_y = next(iter(data_module.predict_dataloader()))
train_batch_x.shape, train_batch_y.shape

(torch.Size([1, 3, 1008, 784]), torch.Size([1, 1008, 784]))

In [5]:
backbone = DinoVisionTransformer(
    patch_size=14,
    embed_dim=384,
    depth=12,
    num_heads=6,
    mlp_ratio=4,
    block_fn=partial(NestedTensorBlock, attn_class=MemEffAttention),  # type: ignore
    init_values=1e-5,
    block_chunks=0,
)

head = SETR_MLA(embedding_dim=384, num_classes=6)

In [6]:
model = DinoV2(
    backbone=backbone,
    head=head,
    loss_fn=torch.nn.CrossEntropyLoss(),
    output_shape = (1008, 784),
    middle=True
)

model

DinoV2(
  (backbone): DinoVisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 384, kernel_size=(14, 14), stride=(14, 14))
      (norm): Identity()
    )
    (blocks): ModuleList(
      (0-11): 12 x NestedTensorBlock(
        (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
        (attn): MemEffAttention(
          (qkv): Linear(in_features=384, out_features=1152, bias=True)
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=384, out_features=384, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (ls1): LayerScale()
        (drop_path1): Identity()
        (norm2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=384, out_features=1536, bias=True)
          (act): GELU(approximate='none')
          (fc2): Linear(in_features=1536, out_features=384, bias=True)
          (drop): Dropout(p=0.0, inplace=False)
        )
     

In [7]:
ckpt_file = "/workspaces/HIAAC-KR-Dev-Container/Minerva-Dev/docs/notebooks/examples/seismic/facies_classification/parihaka/logs/dinov2_mla/seam_ai/checkpoints/last.ckpt"

from minerva.models.loaders import FromPretrained

model = FromPretrained(
    model,
    ckpt_path=ckpt_file,
    strict=False,
    error_on_missing_keys=True
)
model
# model.load_state_dict(ckpt["model"])

Model loaded from /workspaces/HIAAC-KR-Dev-Container/Minerva-Dev/docs/notebooks/examples/seismic/facies_classification/parihaka/logs/dinov2_mla/seam_ai/checkpoints/last.ckpt


DinoV2(
  (backbone): DinoVisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 384, kernel_size=(14, 14), stride=(14, 14))
      (norm): Identity()
    )
    (blocks): ModuleList(
      (0-11): 12 x NestedTensorBlock(
        (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
        (attn): MemEffAttention(
          (qkv): Linear(in_features=384, out_features=1152, bias=True)
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=384, out_features=384, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (ls1): LayerScale()
        (drop_path1): Identity()
        (norm2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=384, out_features=1536, bias=True)
          (act): GELU(approximate='none')
          (fc2): Linear(in_features=1536, out_features=384, bias=True)
          (drop): Dropout(p=0.0, inplace=False)
        )
     

In [8]:
evaluation_pipeline = get_evaluation_pipeline(
    model=model,
    model_name=model_name,
    dataset_name=dataset_name,
    log_dir=log_dir,
    accelerator=accelerator,
    devices=devices,
    is_debug=False,
    seed=seed,
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Seed set to 42


Log directory set to: /workspaces/HIAAC-KR-Dev-Container/Minerva-Dev/docs/notebooks/examples/seismic/facies_classification/parihaka/logs/dinov2_mla/seam_ai/evaluation


In [9]:
result = evaluation_pipeline.run(data_module, task="evaluate")

print("----")
print(result)

/usr/local/lib/python3.10/dist-packages/lightning/fabric/loggers/csv_logs.py:268: Experiment logs directory ./logs/dinov2_mla/seam_ai exists and is not empty. Previous log files in this directory will be deleted when the new ones are saved!
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Pipeline info saved at: /workspaces/HIAAC-KR-Dev-Container/Minerva-Dev/docs/notebooks/examples/seismic/facies_classification/parihaka/logs/dinov2_mla/seam_ai/evaluation/run_2024-12-10-13-40-49cef01c8dde814c23bd13a73fbeb922c2.yaml


/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=47` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 200/200 [00:53<00:00,  3.75it/s]
Inference took: 56.354 seconds!
Predictions saved to /workspaces/HIAAC-KR-Dev-Container/Minerva-Dev/docs/notebooks/examples/seismic/facies_classification/parihaka/logs/dinov2_mla/seam_ai/evaluation/predictions.npy. Shape: torch.Size([200, 6, 1008, 784])
Running classification metrics...
Metrics saved to /workspaces/HIAAC-KR-Dev-Container/Minerva-Dev/docs/notebooks/examples/seismic/facies_classification/parihaka/logs/dinov2_mla/seam_ai/evaluation/metrics_2024-12-10-13-40-49cef01c8dde814c23bd13a73fbeb922c2.yaml
----
{'predictions': {'file': '/workspaces/HIAAC-KR-Dev-Container/Minerva-Dev/docs/notebooks/examples/seismic/facies_classification/parihaka/logs/dinov2_mla/seam_ai/evaluation/predictions.npy', 'shape': [200, 6, 1008, 784], 'time': 56.35387182235718}, 'classification': {'mIoU': [0.6264116764068604], 'acc': [0.8873153328895569]}}
