In [1]:
!git clone https://github.com/ckapelonis02/sam2-fine-tune.git
%cd /kaggle/working/sam2-fine-tune

Cloning into 'sam2-fine-tune'...
remote: Enumerating objects: 366, done.[K
remote: Counting objects: 100% (82/82), done.[K
remote: Compressing objects: 100% (59/59), done.[K
remote: Total 366 (delta 46), reused 57 (delta 23), pack-reused 284 (from 1)[K
Receiving objects: 100% (366/366), 83.00 MiB | 21.69 MiB/s, done.
Resolving deltas: 100% (52/52), done.
/kaggle/working/sam2-fine-tune


In [None]:
%pip install -e .

Obtaining file:///kaggle/working/sam2-fine-tune
  Installing build dependencies ... [?25l[?25hdone
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Getting requirements to build editable ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
Collecting hydra-core>=1.3.2 (from SAM-2==1.0)
  Downloading hydra_core-1.3.2-py3-none-any.whl.metadata (5.5 kB)
Collecting iopath>=0.1.10 (from SAM-2==1.0)
  Downloading iopath-0.1.10.tar.gz (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting portalocker (from iopath>=0.1.10->SAM-2==1.0)
  Downloading portalocker-3.1.1-py3-none-any.whl.metadata (8.6 kB)
Downloading hydra_core-1.3.2-py3-none-any.whl (154 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.5/154.5 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading 

In [None]:
import kagglehub
path = kagglehub.model_download("metaresearch/segment-anything-2/pyTorch/sam2-hiera-tiny")

In [None]:
import sys
sys.path.append("/kaggle/input/segment-anything-2/pytorch/sam2-hiera-tiny/1/")

In [None]:
import torch
import numpy as np
import random
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from tqdm import tqdm
from sam2.build_sam import build_sam2
from sam2.sam2_image_predictor import SAM2ImagePredictor
from sam2.train_helper import *

cleanup()

# Model Initialization
sam2_model = build_sam2(
    config_file="../sam2_configs/sam2_hiera_t.yaml",
    ckpt_path="/kaggle/input/segment-anything-2/pytorch/sam2-hiera-tiny/1/sam2_hiera_tiny.pt",
    device="cuda",
    apply_postprocessing=False
)
predictor = SAM2ImagePredictor(sam2_model)
predictor.model.sam_mask_decoder.train(True)
predictor.model.sam_prompt_encoder.train(True)

# Optimizer & Scheduler
optimizer = optim.AdamW(predictor.model.parameters(), lr=1e-5, weight_decay=4e-5)
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=10000, eta_min=1e-7)
scaler = torch.cuda.amp.GradScaler()

# Dataset Configuration
data_size = 2000
file_names = list(range(1, data_size + 1))
train_size = int(0.8 * data_size)
train_files, val_files = file_names[:train_size], file_names[train_size:]

train_data = read_dataset("/kaggle/input/2k-hd-cropped/i", "/kaggle/input/2k-hd-cropped/m", train_files)
val_data = read_dataset("/kaggle/input/2k-hd-cropped/i", "/kaggle/input/2k-hd-cropped/m", val_files)

# Training Parameters
max_masks = 150
epochs = 10
best_val_iou = 0.0
gradient_accumulation_steps = 4
patience = 3  # Number of epochs to wait before early stopping
no_improvement_count = 0  # Counter for no improvement in validation IoU

# Training Loop
for epoch in range(epochs):
    total_iou = 0
    total_loss = 0
    random.shuffle(train_files)
    
    print(f"\nEpoch {epoch+1}/{epochs}")

    for itr in tqdm(range(train_size), desc="Training Progress"):
        with torch.cuda.amp.autocast():
            image, masks, input_point, input_label = read_batch(train_data, itr % train_size, max_masks)
            prd_mask, prd_scores, gt_mask = process_batch(predictor, image, masks, input_point, input_label)

            if prd_mask is None:
                continue

            iou, loss = compute_iou_loss(prd_mask, prd_scores, gt_mask)
            loss = loss / gradient_accumulation_steps

            scaler.scale(loss).backward()

            if (itr + 1) % gradient_accumulation_steps == 0:
                scaler.step(optimizer)
                scaler.update()
                predictor.model.zero_grad()

            scheduler.step()
            total_iou += iou.mean().item()
            total_loss += loss.item()

    mean_iou = total_iou / train_size
    mean_loss = total_loss / train_size

    val_iou = evaluate(predictor, val_data, val_files, max_masks)
    print(f"Epoch {epoch+1}: Train IoU = {mean_iou:.4f}, Train Loss = {mean_loss:.4f}, Val IoU = {val_iou:.4f}")

    if val_iou > best_val_iou:
        best_val_iou = val_iou
        torch.save(predictor.model.state_dict(), "best_model.torch")
        print(f"New best model saved, Val IoU = {best_val_iou:.4f}")
        no_improvement_count = 0  # Reset counter when improvement is seen
    else:
        no_improvement_count += 1
        print(f"No improvement in validation IoU for {no_improvement_count} epochs.")

    # Early stopping check
    if no_improvement_count >= patience:
        print(f"Early stopping triggered. No improvement in validation IoU for {patience} consecutive epochs.")
        break


In [None]:
!mkdir /kaggle/working/sam2-fine-tune/results

In [None]:
import numpy as np
import torch
import cv2
import hydra
import matplotlib.pyplot as plt
import os
import time
from PIL import Image
from sam2.build_sam import build_sam2
from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
from sam2.test_helper import test_generator
from sam2.train_helper import cleanup

cleanup()

# Configurations
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:256"
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

hydra.core.global_hydra.GlobalHydra.instance().clear()
hydra.initialize_config_module('sam2', version_base='1.2')

sam2_model = build_sam2(
    config_file="../sam2_configs/sam2_hiera_t.yaml",
    ckpt_path="/kaggle/input/segment-anything-2/pytorch/sam2-hiera-tiny/1/sam2_hiera_tiny.pt",
    device="cuda",
    apply_postprocessing=False
)

mask_generator = SAM2AutomaticMaskGenerator(
    model=sam2_model,
    points_per_side=32,
    points_per_batch=4,
    pred_iou_thresh=0.75,
    stability_score_thresh=0.92,
    stability_score_offset=0.91,
    mask_threshold=0.4,
    box_nms_thresh=0.7,
    crop_n_layers=2,
    crop_nms_thresh=0.7,
    crop_overlap_ratio=0.3,
    crop_n_points_downscale_factor=2,
    point_grids=None,
    min_mask_region_area=25.0,
    output_mode="binary_mask",
    use_m2m=False,
    multimask_output=True,
    load_model="/kaggle/input/trained-with-epochs/pytorch/default/1/best_model.torch"
)

import pandas as pd

# Read the CSV file
df = pd.read_csv("/kaggle/input/temp-csv/crops.csv")  # Replace with your actual file path

# Access specific columns
file_names = df["file_name"]
rows = df["rows"]
cols = df["cols"]

# Example: Iterate over the data
for file_name, row, col in zip(file_names, rows, cols):
    print(f"File: {file_name}, Rows: {row}, Cols: {col}")
    start_time = time.time()
    test_generator(
        mask_generator=mask_generator,
        img_path=f"/kaggle/input/evaluation-dataset/evaluation_dataset/images_set/{file_name}.jpg",
        output_path=f"/kaggle/working/sam2-fine-tune/results/{file_name}.png",
        rows=row,
        cols=col,
        max_mask_crop_region=0.1,
        show_masks=False
    )
    print(f"Time taken: {time.time() - start_time}")


In [None]:
import optuna
import numpy as np
import time
from sam2.build_sam import build_sam2
from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
from sam2.test_helper import test_generator
from evaluate import *

def objective(trial):
    points_per_side = 128
    points_per_batch = 32
    pred_iou_thresh = trial.suggest_float('pred_iou_thresh', 0.5, 0.9)
    stability_score_thresh = trial.suggest_float('stability_score_thresh', 0.7, 0.95)
    stability_score_offset = trial.suggest_float('stability_score_offset', 0.7, 1.2)
    mask_threshold = trial.suggest_float('mask_threshold', 0.0, 0.6)
    box_nms_thresh = 0.7
    crop_n_layers = 2
    crop_nms_thresh = 0.7
    crop_overlap_ratio = 0.3
    crop_n_points_downscale_factor = 2
    min_mask_region_area = 25.0
    use_m2m = False

    sam2_model = build_sam2(
        config_file="../sam2_configs/sam2_hiera_t.yaml",
        ckpt_path="/kaggle/input/segment-anything-2/pytorch/sam2-hiera-tiny/1/sam2_hiera_tiny.pt",
        device="cuda",
        apply_postprocessing=False
    )

    mask_generator = SAM2AutomaticMaskGenerator(
        model=sam2_model,
        points_per_side=points_per_side,
        points_per_batch=points_per_batch,
        pred_iou_thresh=pred_iou_thresh,
        stability_score_thresh=stability_score_thresh,
        stability_score_offset=stability_score_offset,
        mask_threshold=mask_threshold,
        box_nms_thresh=box_nms_thresh,
        crop_n_layers=crop_n_layers,
        crop_nms_thresh=crop_nms_thresh,
        crop_overlap_ratio=crop_overlap_ratio,
        crop_n_points_downscale_factor=crop_n_points_downscale_factor,
        min_mask_region_area=min_mask_region_area,
        use_m2m=use_m2m
    )

    img_path = "/kaggle/input/evaluation-dataset/images_set/butterfly.jpg"
    output_path = "/kaggle/working/sam2-fine-tune/results/butterfly.png"

    start_time = time.time()
    test_generator(
        mask_generator=mask_generator,
        img_path=img_path,
        output_path=output_path,
        rows=1,
        cols=1,
        max_mask_crop_region=0.1,
        show_masks=False
    )
    print(f"Test run took {time.time() - start_time} seconds")

    gt, pred = read_masks("/kaggle/input/evaluation-dataset/masks_set/butterfly.png", output_path)
    metrics = evaluate_pred(gt, pred)
    iou_score = metrics['IoU']

    return iou_score

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=20)

print("Best Hyperparameters:", study.best_params)
print("Best IoU Score:", study.best_value)