## Pre-setup: When setting the cache after reset

In [None]:
WORKSPACE = Path("/content/workspace")
WORKSPACE.mkdir(parents=True, exist_ok=True)
DRIVE_REPO = "https://github.com/dsubedi753/TORTOISE"

In [None]:
%cd $WORKSPACE
!git clone $DRIVE_REPO

In [None]:
os.environ["PROJECT_ROOT"] = "/content/workspace/TORTOISE"
PROJECT_FOLDER = Path(os.environ["PROJECT_ROOT"])
DATA_FOLDER = PROJECT_FOLDER / "data"
tiles_dir = DATA_FOLDER / "tiles"
imageset_dir = DATA_FOLDER / "imageset"

In [None]:
# Put tiles and image_set on respective folder

In [None]:
import subprocess

print("\nCounting tiles...")
count = int(subprocess.check_output(f'find "{tiles_dir}" -type f | wc -l', shell=True))
print(f"✔ Found {count} files in {tiles_dir}")

count = int(subprocess.check_output(f'find "{imageset_dir}" -type f | wc -l', shell=True))
print(f"✔ Found {count} files in {imageset_dir}")

print("\nSetup complete. Ready to train!")

## Setup: Once the folder and tiles are staged, setup envrionment variables and source scripts

In [None]:
import os
from pathlib import Path
os.environ["PROJECT_ROOT"] = "/content/workspace/TORTOISE"

In [None]:
PROJECT_FOLDER = Path(os.environ["PROJECT_ROOT"])
DATA_FOLDER = PROJECT_FOLDER / "data"
tiles_dir = DATA_FOLDER / "tiles"

print("Project folder:", PROJECT_FOLDER)
print("Data folder:", DATA_FOLDER)
print("Tiles folder:", tiles_dir)

In [None]:
import sys
from pathlib import Path

# Setup paths
src_path = PROJECT_FOLDER / "src"

print("Project folder:", PROJECT_FOLDER)
print("Data folder:", DATA_FOLDER)
print("Tiles folder:", tiles_dir)
print("Source folder:", src_path)


import sys
if str(src_path) not in sys.path:
    sys.path.insert(0, str(src_path))


## Refresh: git pull and reload packages

In [None]:
%cd $PROJECT_FOLDER
!git pull -f origin

In [None]:
import importlib

import tortoise.dataset
importlib.reload(tortoise.dataset)

import tortoise.model
importlib.reload(tortoise.model)

import tortoise.train
importlib.reload(tortoise.train)

import tortoise.dataloader
importlib.reload(tortoise.dataloader)

import tortoise.augmentations
importlib.reload(tortoise.augmentations)

import tortoise.hparams
importlib.reload(tortoise.hparams)

import tortoise.checkpoints
importlib.reload(tortoise.checkpoints)

import tortoise.utils
importlib.reload(tortoise.utils)

## Step 1: Setup device and model using hyperparameter

In [None]:
import torch
import torch.optim as optim
from tortoise.train import *
from tortoise.dataloader import *
from tortoise.hparams import *
from tortoise.checkpoints import *
from tortoise.utils import *

In [None]:
hparams = load_hparams()
print(hparams)

## Step 2: Build dataloaders (with pin_memory for GPU)

In [None]:
# Build dataloaders
# Note: pin_memory is automatically enabled when CUDA is available
train_loader, val_loader, test_loader, set_maps = build_dataloaders(
    tiles_dir=tiles_dir,
    csv_file=DATA_FOLDER / "tile_index.csv",
    batch_size=hparams["train"]["batch_size"],
    seed=hparams["dataset"]["seed"],
    train_ratio=hparams["dataset"]["train_ratio"],
    val_ratio=hparams["dataset"]["val_ratio"],
    use_ms=hparams["dataset"]["use_ms"],
    use_rgb=hparams["dataset"]["use_rgb"],
    num_workers=hparams["train"]["num_workers"],
)

print(f"Train batches: {len(train_loader)}")
print(f"Val batches:   {len(val_loader)}")
print(f"Test batches:  {len(test_loader)}")

In [None]:
print(f"Train dataset: {len(train_loader.dataset)}")
print(f"Val datset:   {len(val_loader.dataset)}")
print(f"Test dataset:  {len(test_loader.dataset)}")

## Step 3: Verify data is on GPU

In [None]:
hparams = load_hparams()
print(hparams)
device = get_device()
print_device_info()


model = build_model(hparams).to(device)


print(f"Model moved to {device}")
print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")

In [None]:

# Optimizer and learning rate scheduler
use_amp = True
optimizer = build_optimizer(model, hparams)
# scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.5)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20)
scaler = torch.amp.GradScaler() if use_amp else None


num_epochs = hparams['train']['epochs']
pos_weight = torch.tensor([2.75], device=device)

checkpoint_path = PROJECT_FOLDER / "checkpoints" / "best_model.pth"

model, train_losses, val_losses, train_ious, val_ious = train_model(
    model,
    train_loader,
    val_loader,
    optimizer,
    scheduler,
    device,
    pos_weight,
    num_epochs=num_epochs,
    checkpoint_path=checkpoint_path,
    use_amp=True,
    scaler = scaler,
    alpha = 0.8,
    threshold = 0.6,
    early_stopping_patience = None,
)




In [None]:


def save_training_history(
    path,
    train_losses,
    val_losses,
    train_ious,
    val_ious
):
    data = {
        "train_losses": train_losses,
        "val_losses": val_losses,
        "train_ious": train_ious,
        "val_ious": val_ious,
    }
    with open(path, "wb") as f:
        pickle.dump(data, f)


In [None]:
save_training_history(
    "training_history.pkl",
    train_losses,
    val_losses,
    train_ious,
    val_ious
)

In [None]:
# Save the final epoch
save_checkpoint(
    model=model,
    optimizer=optimizer,
    epoch=num_epochs,
    scaler = scaler,
    hparams=hparams,
)

In [None]:
metric_images = evaluate_images(model, test_images, metrics = ["brier", "iou", "dice", "recall", "precision", "fpr", "fnr","boundary_iou"], threshold = 0.6, output_list= ["pred","image","logits","labels","mask"])