In [1]:
from common import get_data_module, get_trainer_pipeline
import torch
from minerva.models.nets.image.vit import SFM_BasePatch16_Downstream
from functools import partial
from minerva.models.loaders import FromPretrained

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
root_data_dir = "/workspaces/HIAAC-KR-Dev-Container/shared_data/seam_ai_datasets/seam_ai/images"
root_annotation_dir = "/workspaces/HIAAC-KR-Dev-Container/shared_data/seam_ai_datasets/seam_ai/annotations"
img_size = (512, 512)          # Change this to the size of the images in the dataset
model_name = "sfm-base-patch16"       # Model name (just identifier)
dataset_name = "seam_ai"        # Dataset name (just identifier)
single_channel = True          # If True, the model will be trained with single channel images (instead of 3 channels)

log_dir = "./logs"              # Directory to save logs
batch_size = 1                  # Batch size    
seed = 42                       # Seed for reproducibility
num_epochs = 100                # Number of epochs to train
is_debug = False                 # If True, only 3 batch will be processed for 3 epochs
accelerator = "gpu"             # CPU or GPU
devices = 1                     # Num GPUs

In [3]:
data_module = get_data_module(
    root_data_dir=root_data_dir,
    root_annotation_dir=root_annotation_dir,
    img_size=img_size,
    batch_size=batch_size,
    seed=seed,
    single_channel=single_channel, 
)

data_module

DataModule
    Data: /workspaces/HIAAC-KR-Dev-Container/shared_data/seam_ai_datasets/seam_ai/images
    Annotations: /workspaces/HIAAC-KR-Dev-Container/shared_data/seam_ai_datasets/seam_ai/annotations
    Batch size: 1

In [4]:
# Just to check if the data module is working
data_module.setup("predict")
train_batch_x, train_batch_y = next(iter(data_module.predict_dataloader()))
train_batch_x.shape, train_batch_y.shape

(torch.Size([1, 1006, 590]), torch.Size([1, 1006, 590]))

In [5]:
model = SFM_BasePatch16_Downstream(
    img_size=(512, 512),
    num_classes=6,
    in_chans=1
)

model

SFM_BasePatch16_Downstream(
  (backbone): VisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(1, 768, kernel_size=(16, 16), stride=(16, 16))
      (norm): Identity()
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (patch_drop): Identity()
    (norm_pre): Identity()
    (blocks): Sequential(
      (0): Block(
        (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=768, out_features=2304, bias=True)
          (q_norm): Identity()
          (k_norm): Identity()
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=768, out_features=768, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (ls1): Identity()
        (drop_path1): Identity()
        (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (act): GELU(ap

In [6]:
model = FromPretrained(
    model=model,
    ckpt_path="/workspaces/HIAAC-KR-Dev-Container/Minerva-Dev/docs/notebooks/examples/seismic/facies_classification/parihaka/logs/sfm-base-patch16/seam_ai/checkpoints/last.ckpt",
    strict=False,
    ckpt_load_weights_only=False,
    error_on_missing_keys=True,
)

Model loaded from /workspaces/HIAAC-KR-Dev-Container/Minerva-Dev/docs/notebooks/examples/seismic/facies_classification/parihaka/logs/sfm-base-patch16/seam_ai/checkpoints/last.ckpt


In [None]:
from minerva.engines.patch_inferencer_engine import VotingPatchInferencer

model = VotingPatchInferencer(
    model=model,
    num_classes=6,
    input_shape=(1, 512, 512),
    output_shape=(1006, 590),
)

In [8]:
from common import get_trainer

trainer = get_trainer(
    model_name=model_name,
    dataset_name=dataset_name,
    log_dir=log_dir,
    accelerator=accelerator,
    devices=devices,
    is_debug=False,
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [9]:
from minerva.pipelines.lightning_pipeline import SimpleLightningPipeline
from pathlib import Path
from torchmetrics import JaccardIndex, Accuracy

evaluation_pipeline = SimpleLightningPipeline(
    model=model,
    trainer=trainer,
    save_run_status=False,
    seed=42,
    log_dir=Path(log_dir) / model_name / dataset_name / "evaluation",
    classification_metrics={
        "mIoU": JaccardIndex(num_classes=6, average="macro", task="multiclass"),
        "acc": Accuracy(num_classes=6, task="multiclass"),
    }
)

Seed set to 42


Log directory set to: /workspaces/HIAAC-KR-Dev-Container/Minerva-Dev/docs/notebooks/examples/seismic/facies_classification/parihaka/logs/sfm-base-patch16/seam_ai/evaluation


In [10]:
trainer.predict(model, data_module)

/usr/local/lib/python3.10/dist-packages/lightning/fabric/loggers/csv_logs.py:268: Experiment logs directory ./logs/sfm-base-patch16/seam_ai exists and is not empty. Previous log files in this directory will be deleted when the new ones are saved!
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=47` in the `DataLoader` to improve performance.


Predicting DataLoader 0:   0%|          | 0/200 [00:00<?, ?it/s][forward] x.shape: torch.Size([1, 1006, 590]), self.input_shape: (1, 512, 512)
Forward features: torch.Size([1, 1, 512, 512])


ValueError: operands could not be broadcast together with shapes (2,) (3,) 