In [1]:
import zipfile
import os

zip_path = "/teamspace/studios/this_studio/hand_segmentation_splitted-20251112T075029Z-1-001.zip"
extract_dir = "/teamspace/studios/this_studio/unzipped"  # or any folder you want

os.makedirs(extract_dir, exist_ok=True)

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

print("âœ… Unzipped to:", extract_dir)


âœ… Unzipped to: /teamspace/studios/this_studio/unzipped


In [1]:
import os
import sys
import time
import shutil
from glob import glob
from collections import OrderedDict
from pathlib import Path

import numpy as np
from PIL import Image
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, ConcatDataset

In [24]:
HAND_SPLIT_BASE = "/teamspace/studios/this_studio/unzipped/hand_segmentation_splitted"
PRETRAINED_WEIGHTS = "/teamspace/studios/this_studio/weights_hands_epoch_38.pth.tar"
OUTPUT_DIR = "/teamspace/studios/this_studio/models_checkpoints"

assert Path(HAND_SPLIT_BASE).exists(), f"Dataset not found: {HAND_SPLIT_BASE}"
assert Path(PRETRAINED_WEIGHTS).exists(), f"Weights not found: {PRETRAINED_WEIGHTS}"
os.makedirs(OUTPUT_DIR, exist_ok=True)

BASE_DATASETS = ['egohands', 'ego_youtube_hands', 'gtea', 'hand_over_face']
DATASETS = BASE_DATASETS + ['combined4']

BATCH_SIZE = 4
EPOCHS = 15
LR = 1e-4
NUM_CLASSES = 2
IMG_SIZE = (512, 512)
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

IMG_MEAN = np.array([0.485, 0.456, 0.406]).reshape((1, 1, 3))
IMG_STD = np.array([0.229, 0.224, 0.225]).reshape((1, 1, 3))

print(f"--- Configuration ---")
print(f"Device: {DEVICE}")
print(f"Dataset Base: {HAND_SPLIT_BASE}")
print(f"Weights: {PRETRAINED_WEIGHTS}")
print(f"Checkpoints: {OUTPUT_DIR}")
print(f"Batch Size: {BATCH_SIZE}")
print(f"Epochs: {EPOCHS}")
print(f"---------------------")

--- Configuration ---
Device: cuda
Dataset Base: /teamspace/studios/this_studio/unzipped/hand_segmentation_splitted
Weights: /teamspace/studios/this_studio/weights_hands_epoch_38.pth.tar
Checkpoints: /teamspace/studios/this_studio/models_checkpoints
Batch Size: 4
Epochs: 15
---------------------


In [3]:
# Cell: inspect checkpoint keys & basic info
import os, torch
p = PRETRAINED_WEIGHTS
print("exists:", os.path.exists(p), "size (MB):", os.path.getsize(p)/(1024*1024))
ck = torch.load(p, map_location="cpu")
print("Loaded type:", type(ck))
if isinstance(ck, dict):
    print("Keys:", list(ck.keys()))
    print("Saved epoch:", ck.get('epoch'))
    print("Saved loss:", ck.get('loss'))
else:
    print("Not a dict â€” likely a raw state_dict.")


exists: True size (MB): 1351.9839839935303


Loaded type: <class 'dict'>
Keys: ['epoch', 'model_state_dict', 'optimizer_state_dict', 'loss']
Saved epoch: 38
Saved loss: 0.06732065253596328


In [4]:
def batchnorm(in_planes):
    return nn.BatchNorm2d(in_planes, affine=True, eps=1e-5, momentum=0.1)

def conv3x3(in_planes, out_planes, stride=1, bias=False):
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=bias)

def conv1x1(in_planes, out_planes, stride=1, bias=False):
    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride,
                     padding=0, bias=bias)

def convbnrelu(in_planes, out_planes, kernel_size, stride=1, groups=1, act=True):
    if act:
        return nn.Sequential(
            nn.Conv2d(in_planes, out_planes, kernel_size, stride=stride,
                      padding=int(kernel_size / 2.), groups=groups, bias=False),
            batchnorm(out_planes),
            nn.ReLU6(inplace=True))
    else:
        return nn.Sequential(
            nn.Conv2d(in_planes, out_planes, kernel_size, stride=stride,
                      padding=int(kernel_size / 2.), groups=groups, bias=False),
            batchnorm(out_planes))

class CRPBlock(nn.Module):
    def __init__(self, in_planes, out_planes, n_stages):
        super(CRPBlock, self).__init__()
        for i in range(n_stages):
            setattr(self, '{}_{}'.format(i + 1, 'outvar_dimred'),
                    conv3x3(in_planes if (i == 0) else out_planes,
                            out_planes, stride=1, bias=False))
        self.n_stages = n_stages
        self.maxpool = nn.MaxPool2d(kernel_size=5, stride=1, padding=2)

    def forward(self, x):
        top = x
        for i in range(self.n_stages):
            top = self.maxpool(top)
            top = getattr(self, '{}_{}'.format(i + 1, 'outvar_dimred'))(top)
            x = top + x
        return x

stages_suffixes = {0 : '_conv', 1 : '_conv_relu_varout_dimred'}

class RCUBlock(nn.Module):
    def __init__(self, in_planes, out_planes, n_blocks, n_stages):
        super(RCUBlock, self).__init__()
        for i in range(n_blocks):
            for j in range(n_stages):
                setattr(self, '{}{}'.format(i + 1, stages_suffixes[j]),
                        conv3x3(in_planes if (i == 0) and (j == 0) else out_planes,
                                out_planes, stride=1, bias=(j == 0)))
        self.n_blocks = n_blocks
        self.n_stages = n_stages

    def forward(self, x):
        for i in range(self.n_blocks):
            residual = x
            for j in range(self.n_stages):
                x = F.relu(x)
                x = getattr(self, '{}{}'.format(i + 1, stages_suffixes[j]))(x)
            x += residual
        return x

class BasicBlock(nn.Module):
    expansion = 1
    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride
    def forward(self, x):
        residual = x
        out = self.conv1(x); out = self.bn1(out); out = self.relu(out)
        out = self.conv2(out); out = self.bn2(out)
        if self.downsample is not None:
            residual = self.downsample(x)
        out += residual; out = self.relu(out)
        return out

class Bottleneck(nn.Module):
    expansion = 4
    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride
    def forward(self, x):
        residual = x
        out = self.conv1(x); out = self.bn1(out); out = self.relu(out)
        out = self.conv2(out); out = self.bn2(out); out = self.relu(out)
        out = self.conv3(out); out = self.bn3(out)
        if self.downsample is not None:
            residual = self.downsample(x)
        out += residual; out = self.relu(out)
        return out

class RefineNet(nn.Module):
    def __init__(self, block, layers, num_classes=21):
        self.inplanes = 64
        super(RefineNet, self).__init__()
        self.do = nn.Dropout(p=0.5)
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

        self.p_ims1d2_outl1_dimred = conv3x3(2048, 512, bias=False)
        self.adapt_stage1_b = self._make_rcu(512, 512, 2, 2)
        self.mflow_conv_g1_pool = self._make_crp(512, 512, 4)
        self.mflow_conv_g1_b = self._make_rcu(512, 512, 3, 2)
        self.mflow_conv_g1_b3_joint_varout_dimred = conv3x3(512, 256, bias=False)

        self.p_ims1d2_outl2_dimred = conv3x3(1024, 256, bias=False)
        self.adapt_stage2_b = self._make_rcu(256, 256, 2, 2)
        self.adapt_stage2_b2_joint_varout_dimred = conv3x3(256, 256, bias=False)
        self.mflow_conv_g2_pool = self._make_crp(256, 256, 4)
        self.mflow_conv_g2_b = self._make_rcu(256, 256, 3, 2)
        self.mflow_conv_g2_b3_joint_varout_dimred = conv3x3(256, 256, bias=False)

        self.p_ims1d2_outl3_dimred = conv3x3(512, 256, bias=False)
        self.adapt_stage3_b = self._make_rcu(256, 256, 2, 2)
        self.adapt_stage3_b2_joint_varout_dimred = conv3x3(256, 256, bias=False)
        self.mflow_conv_g3_pool = self._make_crp(256, 256, 4)
        self.mflow_conv_g3_b = self._make_rcu(256, 256, 3, 2)
        self.mflow_conv_g3_b3_joint_varout_dimred = conv3x3(256, 256, bias=False)

        self.p_ims1d2_outl4_dimred = conv3x3(256, 256, bias=False)
        self.adapt_stage4_b = self._make_rcu(256, 256, 2, 2)
        self.adapt_stage4_b2_joint_varout_dimred = conv3x3(256, 256, bias=False)
        self.mflow_conv_g4_pool = self._make_crp(256, 256, 4)
        self.mflow_conv_g4_b = self._make_rcu(256, 256, 3, 2)

        self.clf_conv = nn.Conv2d(256, num_classes, kernel_size=3, stride=1, padding=1, bias=True)

    def _make_crp(self, in_planes, out_planes, stages):
        layers = [CRPBlock(in_planes, out_planes, stages)]
        return nn.Sequential(*layers)

    def _make_rcu(self, in_planes, out_planes, blocks, stages):
        layers = [RCUBlock(in_planes, out_planes, blocks, stages)]
        return nn.Sequential(*layers)

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )
        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))
        return nn.Sequential(*layers)

    def forward(self, x):
        inp_size = x.size()[2:]
        x = self.conv1(x); x = self.bn1(x); x = self.relu(x); x = self.maxpool(x)
        l1 = self.layer1(x); l2 = self.layer2(l1); l3 = self.layer3(l2); l4 = self.layer4(l3)

        l4 = self.do(l4); l3 = self.do(l3)

        x4 = self.p_ims1d2_outl1_dimred(l4)
        x4 = self.adapt_stage1_b(x4); x4 = self.relu(x4)
        x4 = self.mflow_conv_g1_pool(x4); x4 = self.mflow_conv_g1_b(x4)
        x4 = self.mflow_conv_g1_b3_joint_varout_dimred(x4)
        x4 = nn.Upsample(size=l3.size()[2:], mode='bilinear', align_corners=True)(x4)

        x3 = self.p_ims1d2_outl2_dimred(l3)
        x3 = self.adapt_stage2_b(x3); x3 = self.adapt_stage2_b2_joint_varout_dimred(x3)
        x3 = x3 + x4; x3 = F.relu(x3)
        x3 = self.mflow_conv_g2_pool(x3); x3 = self.mflow_conv_g2_b(x3)
        x3 = self.mflow_conv_g2_b3_joint_varout_dimred(x3)
        x3 = nn.Upsample(size=l2.size()[2:], mode='bilinear', align_corners=True)(x3)

        x2 = self.p_ims1d2_outl3_dimred(l2)
        x2 = self.adapt_stage3_b(x2); x2 = self.adapt_stage3_b2_joint_varout_dimred(x2)
        x2 = x2 + x3; x2 = F.relu(x2)
        x2 = self.mflow_conv_g3_pool(x2); x2 = self.mflow_conv_g3_b(x2)
        x2 = self.mflow_conv_g3_b3_joint_varout_dimred(x2)
        x2 = nn.Upsample(size=l1.size()[2:], mode='bilinear', align_corners=True)(x2)

        x1 = self.p_ims1d2_outl4_dimred(l1)
        x1 = self.adapt_stage4_b(x1); x1 = self.adapt_stage4_b2_joint_varout_dimred(x1)
        x1 = x1 + x2; x1 = F.relu(x1)
        x1 = self.mflow_conv_g4_pool(x1); x1 = self.mflow_conv_g4_b(x1)
        x1 = self.do(x1)

        out = self.clf_conv(x1)
        out = F.interpolate(out, size=inp_size, mode='bilinear', align_corners=True)
        return out

def rf101(num_classes=21, imagenet=False, pretrained=False, **kwargs):
    model = RefineNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, **kwargs)
    return model

In [14]:
class HandSegmentationDataset(Dataset):
    def __init__(self, img_root, mask_root, split='train'):
        self.img_dir = os.path.join(img_root, split)
        self.mask_dir = os.path.join(mask_root, split)
        self.files = [f for f in os.listdir(self.img_dir) if f.lower().endswith(('jpg', 'png', 'jpeg'))]

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.files[idx])
        mask_path = os.path.join(self.mask_dir, self.files[idx].rsplit('.',1)[0] + '.jpg')
        image = np.array(Image.open(img_path).convert('RGB').resize(IMG_SIZE, Image.BILINEAR)).astype(np.float32)/255.
        mask = np.array(Image.open(mask_path).convert('L').resize(IMG_SIZE, Image.NEAREST))
        mask = (mask>0).astype(np.int64)
        norm = (image - IMG_MEAN)/IMG_STD
        img_tensor = torch.tensor(norm.transpose(2,0,1)).float()
        mask_tensor = torch.tensor(mask).long()
        return {'image': img_tensor, 'mask': mask_tensor}

def build_split_dataset(name, split):
    if name != 'combined4':
        img_dir  = os.path.join(HAND_SPLIT_BASE, name, 'images_splitted')
        mask_dir = os.path.join(HAND_SPLIT_BASE, name, 'masks_splitted')
        return HandSegmentationDataset(img_dir, mask_dir, split)
    else:
        parts = []
        for d in BASE_DATASETS:
            img_dir  = os.path.join(HAND_SPLIT_BASE, d, 'images_splitted')
            mask_dir = os.path.join(HAND_SPLIT_BASE, d, 'masks_splitted')
            parts.append(HandSegmentationDataset(img_dir, mask_dir, split))
        return ConcatDataset(parts)

def collate_skip_none(batch):
    batch = [b for b in batch if b is not None]
    if not batch:
        return None
    return torch.utils.data.dataloader.default_collate(batch)

def intersection_and_union(pred, target, num_classes):
    inter = np.zeros(num_classes); union = np.zeros(num_classes)
    for c in range(num_classes):
        p, t = (pred==c), (target==c)
        inter[c] = np.logical_and(p,t).sum()
        union[c] = np.logical_or(p,t).sum()
    return inter, union

def evaluate_miou(model, dataloader, num_classes, device):
    model.eval(); inters, unions = np.zeros(num_classes), np.zeros(num_classes)
    with torch.no_grad():
        for batch in tqdm(dataloader, leave=False):
            if batch is None: continue
            imgs, masks = batch['image'].to(device), batch['mask'].to(device)
            preds = torch.argmax(model(imgs), dim=1).cpu().numpy().ravel()
            gts = masks.cpu().numpy().ravel()
            inter, union = intersection_and_union(preds, gts, num_classes)
            inters += inter; unions += union
    ious = np.divide(inters, unions, out=np.zeros_like(inters), where=unions>0)
    return float(np.nanmean(ious))

@torch.no_grad()
def evaluate_accuracy(model, dataloader, device):
    model.eval()
    correct, total = 0, 0
    for batch in tqdm(dataloader, leave=False):
        if batch is None:
            continue
        imgs  = batch['image'].to(device)
        masks = batch['mask'].to(device).long()
        logits = model(imgs)
        preds = torch.argmax(logits, dim=1)
        correct += (preds == masks).sum().item()
        total   += masks.numel()
    return correct, total

def safe_div(n, d):
    return (n / d) if d > 0 else 0.0

def load_pretrained(model, path):
    ckpt = torch.load(path, map_location='cpu')
    if 'model_state_dict' in ckpt: ckpt = ckpt['model_state_dict']
    state = OrderedDict()
    for k,v in ckpt.items():
        k2 = k.replace('module.','')
        if 'clf_conv' not in k2: state[k2] = v
    model.load_state_dict(state, strict=False)
    return model


In [6]:
def train_on_dataset(name):
    print(f"\nðŸŸ¢ Training on {name} dataset...")
    train_ds = build_split_dataset(name, 'train')
    val_ds   = build_split_dataset(name, 'val')
    train_loader = DataLoader(train_ds, BATCH_SIZE, True, collate_fn=collate_skip_none)
    val_loader   = DataLoader(val_ds, BATCH_SIZE, False, collate_fn=collate_skip_none)

    model = rf101(num_classes=NUM_CLASSES)
    model = load_pretrained(model, PRETRAINED_WEIGHTS)
    model.clf_conv = nn.Conv2d(256, NUM_CLASSES, 3, 1, 1)
    model = model.to(DEVICE)

    opt = optim.Adam(model.parameters(), lr=LR)
    criterion = nn.CrossEntropyLoss()

    best_miou = -1
    save_path_best = os.path.join(OUTPUT_DIR, f'{name}_best.pth.tar')
    save_path_last = os.path.join(OUTPUT_DIR, f'{name}_last.pth.tar')

    for epoch in range(EPOCHS):
        model.train(); total_loss = 0
        for batch in tqdm(train_loader, desc=f"{name} | Epoch {epoch+1}/{EPOCHS}", leave=False):
            if batch is None: continue
            imgs, masks = batch['image'].to(DEVICE), batch['mask'].to(DEVICE)
            opt.zero_grad(); out = model(imgs)
            loss = criterion(out, masks)
            loss.backward(); opt.step()
            total_loss += loss.item()

        val_miou = evaluate_miou(model, val_loader, NUM_CLASSES, DEVICE)
        print(f"{name} | Epoch {epoch+1}: Loss={total_loss/len(train_loader):.4f}, Val mIoU={val_miou:.4f}")

        checkpoint_data = {
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'val_miou': val_miou,
            'loss': total_loss/len(train_loader)
        }

        torch.save(checkpoint_data, save_path_last)

        if val_miou > best_miou:
            best_miou = val_miou
            torch.save(checkpoint_data, save_path_best)
            print(f"âœ… Saved best model for {name} with mIoU={val_miou:.4f}")

    print(f"ðŸŸ© Finished training {name}. Best Val mIoU={best_miou:.4f}")
    print(f"   Best model saved to: {save_path_best}")
    print(f"   Last model saved to: {save_path_last}")
    return save_path_best


In [7]:
def run_cross_evaluation():
    print("\n========================================================")
    print("ðŸ”· Starting Cross-Dataset Evaluation...")
    print("========================================================")

    results = np.zeros((len(DATASETS), len(DATASETS)))
    acc_counts = {train_ds: {test_ds: (0, 0) for test_ds in DATASETS} for train_ds in DATASETS}

    for i, train_ds in enumerate(DATASETS):
        print(f"\nðŸ”· Evaluating model trained on {train_ds}")
        model_path = os.path.join(OUTPUT_DIR, f'{train_ds}_best.pth.tar')
        if not os.path.exists(model_path):
            print(f"   [!] WARNING: Best model not found at {model_path}. Skipping.")
            continue

        model = rf101(NUM_CLASSES)
        ck = torch.load(model_path, map_location=DEVICE)
        model.load_state_dict(ck['model_state_dict'])
        model = model.to(DEVICE)
        model.eval()

        for j, test_ds in enumerate(DATASETS):
            print(f"   Testing on {test_ds}...")
            test_data   = build_split_dataset(test_ds, 'test')
            test_loader = DataLoader(test_data, BATCH_SIZE, False, collate_fn=collate_skip_none)

            miou = evaluate_miou(model, test_loader, NUM_CLASSES, DEVICE)
            results[i, j] = miou
            print(f"   {train_ds} â†’ {test_ds} : mIoU = {miou:.4f}")
            
            correct, total = evaluate_accuracy(model, test_loader, DEVICE)
            acc_counts[train_ds][test_ds] = (correct, total)
            print(f"      (acc = {safe_div(correct, total):.4f})")

    print("\n====================  FINAL  mIoU  MATRIX  ====================")
    print("Rows = trained on, Columns = tested on\n")
    print("        " + "  ".join([f"{d[:10]:>10}" for d in DATASETS]))
    for i, train_ds in enumerate(DATASETS):
        row = "  ".join([f"{results[i,j]:10.4f}" for j in range(len(DATASETS))])
        print(f"{train_ds[:10]:>10}  {row}")

    np.save(os.path.join(OUTPUT_DIR, "cross_dataset_mIoU_5x5.npy"), results)
    print(f"\nMatrix saved to {OUTPUT_DIR}/cross_dataset_mIoU_5x5.npy")

    print("\n====================  PER-MODEL ACCURACIES  ====================")
    print("Format: own_test | other_tests_combined | all_tests_combined\n")

    for train_ds in DATASETS:
        own_correct, own_total = acc_counts[train_ds][train_ds]

        other_correct, other_total = 0, 0
        for test_ds in DATASETS:
            if test_ds == train_ds:
                continue
            c, t = acc_counts[train_ds][test_ds]
            other_correct += c
            other_total   += t

        all_correct = own_correct + other_correct
        all_total   = own_total   + other_total

        own_acc   = safe_div(own_correct,   own_total)
        other_acc = safe_div(other_correct, other_total)
        all_acc   = safe_div(all_correct,   all_total)

        print(f"{train_ds:>12}: {own_acc:.4f} | {other_acc:.4f} | {all_acc:.4f}")

    print("\nâœ… Cross-evaluation complete.")

In [11]:
train_on_dataset('egohands')


ðŸŸ¢ Training on egohands dataset...


                                                                        

egohands | Epoch 1: Loss=0.0538, Val mIoU=0.9013
âœ… Saved best model for egohands with mIoU=0.9013


                                                                        

egohands | Epoch 2: Loss=0.0334, Val mIoU=0.9114
âœ… Saved best model for egohands with mIoU=0.9114


                                                                        

egohands | Epoch 3: Loss=0.0295, Val mIoU=0.9154
âœ… Saved best model for egohands with mIoU=0.9154


                                                                        

egohands | Epoch 4: Loss=0.0275, Val mIoU=0.9163
âœ… Saved best model for egohands with mIoU=0.9163


                                                                        

egohands | Epoch 5: Loss=0.0267, Val mIoU=0.9183
âœ… Saved best model for egohands with mIoU=0.9183


                                                                        

egohands | Epoch 6: Loss=0.0249, Val mIoU=0.9180


                                                                        

egohands | Epoch 7: Loss=0.0240, Val mIoU=0.9175


                                                                        

egohands | Epoch 8: Loss=0.0228, Val mIoU=0.9192
âœ… Saved best model for egohands with mIoU=0.9192


                                                                        

egohands | Epoch 9: Loss=0.0220, Val mIoU=0.9205
âœ… Saved best model for egohands with mIoU=0.9205


                                                                         

egohands | Epoch 10: Loss=0.0211, Val mIoU=0.9221
âœ… Saved best model for egohands with mIoU=0.9221


                                                                         

egohands | Epoch 11: Loss=0.0204, Val mIoU=0.9216


                                                                         

egohands | Epoch 12: Loss=0.0203, Val mIoU=0.9198


                                                                         

egohands | Epoch 13: Loss=0.0198, Val mIoU=0.9216


                                                                         

egohands | Epoch 14: Loss=0.0188, Val mIoU=0.9186


                                                                         

egohands | Epoch 15: Loss=0.0193, Val mIoU=0.9202
ðŸŸ© Finished training egohands. Best Val mIoU=0.9221
   Best model saved to: /teamspace/studios/this_studio/models_checkpoints/egohands_best.pth.tar
   Last model saved to: /teamspace/studios/this_studio/models_checkpoints/egohands_last.pth.tar


'/teamspace/studios/this_studio/models_checkpoints/egohands_best.pth.tar'

In [10]:
#batch size 16

train_on_dataset('ego_youtube_hands')



ðŸŸ¢ Training on ego_youtube_hands dataset...


                                                                               

ego_youtube_hands | Epoch 1: Loss=0.1159, Val mIoU=0.4966
âœ… Saved best model for ego_youtube_hands with mIoU=0.4966


                                                                               

ego_youtube_hands | Epoch 2: Loss=0.0504, Val mIoU=0.7413
âœ… Saved best model for ego_youtube_hands with mIoU=0.7413


                                                                               

ego_youtube_hands | Epoch 3: Loss=0.0337, Val mIoU=0.7712
âœ… Saved best model for ego_youtube_hands with mIoU=0.7712


                                                                               

ego_youtube_hands | Epoch 4: Loss=0.0250, Val mIoU=0.7838
âœ… Saved best model for ego_youtube_hands with mIoU=0.7838


                                                                               

ego_youtube_hands | Epoch 5: Loss=0.0225, Val mIoU=0.7993
âœ… Saved best model for ego_youtube_hands with mIoU=0.7993


                                                                               

ego_youtube_hands | Epoch 6: Loss=0.0188, Val mIoU=0.7993


                                                                               

ego_youtube_hands | Epoch 7: Loss=0.0170, Val mIoU=0.8115
âœ… Saved best model for ego_youtube_hands with mIoU=0.8115


                                                                               

ego_youtube_hands | Epoch 8: Loss=0.0152, Val mIoU=0.8077


                                                                               

ego_youtube_hands | Epoch 9: Loss=0.0144, Val mIoU=0.8113


                                                                                

ego_youtube_hands | Epoch 10: Loss=0.0143, Val mIoU=0.8206
âœ… Saved best model for ego_youtube_hands with mIoU=0.8206


                                                                                

ego_youtube_hands | Epoch 11: Loss=0.0142, Val mIoU=0.8038


                                                                                

ego_youtube_hands | Epoch 12: Loss=0.0133, Val mIoU=0.8111


                                                                                

ego_youtube_hands | Epoch 13: Loss=0.0129, Val mIoU=0.8139


                                                                                

ego_youtube_hands | Epoch 14: Loss=0.0116, Val mIoU=0.8110


                                                                                

ego_youtube_hands | Epoch 15: Loss=0.0109, Val mIoU=0.8180
ðŸŸ© Finished training ego_youtube_hands. Best Val mIoU=0.8206
   Best model saved to: /teamspace/studios/this_studio/models_checkpoints/ego_youtube_hands_best.pth.tar
   Last model saved to: /teamspace/studios/this_studio/models_checkpoints/ego_youtube_hands_last.pth.tar


'/teamspace/studios/this_studio/models_checkpoints/ego_youtube_hands_best.pth.tar'

In [26]:
#batch size = 4
train_on_dataset('ego_youtube_hands')



ðŸŸ¢ Training on ego_youtube_hands dataset...


                                                                               

ego_youtube_hands | Epoch 1: Loss=0.0870, Val mIoU=0.7132
âœ… Saved best model for ego_youtube_hands with mIoU=0.7132


                                                                               

ego_youtube_hands | Epoch 2: Loss=0.0383, Val mIoU=0.7717
âœ… Saved best model for ego_youtube_hands with mIoU=0.7717


                                                                               

ego_youtube_hands | Epoch 3: Loss=0.0274, Val mIoU=0.8119
âœ… Saved best model for ego_youtube_hands with mIoU=0.8119


                                                                               

ego_youtube_hands | Epoch 4: Loss=0.0230, Val mIoU=0.8233
âœ… Saved best model for ego_youtube_hands with mIoU=0.8233


                                                                               

ego_youtube_hands | Epoch 5: Loss=0.0203, Val mIoU=0.8376
âœ… Saved best model for ego_youtube_hands with mIoU=0.8376


                                                                               

ego_youtube_hands | Epoch 6: Loss=0.0160, Val mIoU=0.8378
âœ… Saved best model for ego_youtube_hands with mIoU=0.8378


                                                                               

ego_youtube_hands | Epoch 7: Loss=0.0146, Val mIoU=0.8282


                                                                               

ego_youtube_hands | Epoch 8: Loss=0.0139, Val mIoU=0.8405
âœ… Saved best model for ego_youtube_hands with mIoU=0.8405


                                                                               

ego_youtube_hands | Epoch 9: Loss=0.0128, Val mIoU=0.8438
âœ… Saved best model for ego_youtube_hands with mIoU=0.8438


                                                                                

ego_youtube_hands | Epoch 10: Loss=0.0135, Val mIoU=0.8173


                                                                                

ego_youtube_hands | Epoch 11: Loss=0.0133, Val mIoU=0.8295


                                                                                

ego_youtube_hands | Epoch 12: Loss=0.0111, Val mIoU=0.8394


                                                                                

ego_youtube_hands | Epoch 13: Loss=0.0112, Val mIoU=0.8412


                                                                                

ego_youtube_hands | Epoch 14: Loss=0.0152, Val mIoU=0.8087


                                                                                

ego_youtube_hands | Epoch 15: Loss=0.0191, Val mIoU=0.8028
ðŸŸ© Finished training ego_youtube_hands. Best Val mIoU=0.8438
   Best model saved to: /teamspace/studios/this_studio/models_checkpoints/ego_youtube_hands_best.pth.tar
   Last model saved to: /teamspace/studios/this_studio/models_checkpoints/ego_youtube_hands_last.pth.tar


'/teamspace/studios/this_studio/models_checkpoints/ego_youtube_hands_best.pth.tar'

In [27]:
#batch size = 4
train_on_dataset('gtea')


ðŸŸ¢ Training on gtea dataset...


                                                                  

gtea | Epoch 1: Loss=0.0832, Val mIoU=0.8571
âœ… Saved best model for gtea with mIoU=0.8571


                                                                  

gtea | Epoch 2: Loss=0.0370, Val mIoU=0.8797
âœ… Saved best model for gtea with mIoU=0.8797


                                                                  

gtea | Epoch 3: Loss=0.0301, Val mIoU=0.8765


                                                                  

gtea | Epoch 4: Loss=0.0297, Val mIoU=0.8593


                                                                  

gtea | Epoch 5: Loss=0.0358, Val mIoU=0.8838
âœ… Saved best model for gtea with mIoU=0.8838


                                                                  

gtea | Epoch 6: Loss=0.0271, Val mIoU=0.8779


                                                                  

gtea | Epoch 7: Loss=0.0246, Val mIoU=0.8728


                                                                  

gtea | Epoch 8: Loss=0.0273, Val mIoU=0.8785


                                                                  

gtea | Epoch 9: Loss=0.0366, Val mIoU=0.8774


                                                                   

gtea | Epoch 10: Loss=0.0271, Val mIoU=0.8898
âœ… Saved best model for gtea with mIoU=0.8898


                                                                   

gtea | Epoch 11: Loss=0.1238, Val mIoU=0.8791


                                                                   

gtea | Epoch 12: Loss=0.0316, Val mIoU=0.8817


                                                                   

gtea | Epoch 13: Loss=0.0272, Val mIoU=0.8881


                                                                   

gtea | Epoch 14: Loss=0.0246, Val mIoU=0.8868


                                                                   

gtea | Epoch 15: Loss=0.0210, Val mIoU=0.8902
âœ… Saved best model for gtea with mIoU=0.8902
ðŸŸ© Finished training gtea. Best Val mIoU=0.8902
   Best model saved to: /teamspace/studios/this_studio/models_checkpoints/gtea_best.pth.tar
   Last model saved to: /teamspace/studios/this_studio/models_checkpoints/gtea_last.pth.tar


'/teamspace/studios/this_studio/models_checkpoints/gtea_best.pth.tar'

In [15]:
#batch size = 16
train_on_dataset('gtea')


ðŸŸ¢ Training on gtea dataset...


                                                                  

gtea | Epoch 1: Loss=0.1308, Val mIoU=0.8408
âœ… Saved best model for gtea with mIoU=0.8408


                                                                  

gtea | Epoch 2: Loss=0.0434, Val mIoU=0.8699
âœ… Saved best model for gtea with mIoU=0.8699


                                                                  

gtea | Epoch 3: Loss=0.0344, Val mIoU=0.8661


                                                                  

gtea | Epoch 4: Loss=0.0289, Val mIoU=0.8832
âœ… Saved best model for gtea with mIoU=0.8832


                                                                  

gtea | Epoch 5: Loss=0.0282, Val mIoU=0.8855
âœ… Saved best model for gtea with mIoU=0.8855


                                                                  

gtea | Epoch 6: Loss=0.0241, Val mIoU=0.8844


                                                                  

gtea | Epoch 7: Loss=0.0227, Val mIoU=0.8859
âœ… Saved best model for gtea with mIoU=0.8859


                                                                  

gtea | Epoch 8: Loss=0.0220, Val mIoU=0.8861
âœ… Saved best model for gtea with mIoU=0.8861


                                                                  

gtea | Epoch 9: Loss=0.0202, Val mIoU=0.8875
âœ… Saved best model for gtea with mIoU=0.8875


                                                                   

gtea | Epoch 10: Loss=0.0208, Val mIoU=0.8879
âœ… Saved best model for gtea with mIoU=0.8879


                                                                   

gtea | Epoch 11: Loss=0.0202, Val mIoU=0.8869


                                                                   

gtea | Epoch 12: Loss=0.0195, Val mIoU=0.8855


                                                                   

gtea | Epoch 13: Loss=0.0186, Val mIoU=0.8879


                                                                   

gtea | Epoch 14: Loss=0.0181, Val mIoU=0.8901
âœ… Saved best model for gtea with mIoU=0.8901


                                                                   

gtea | Epoch 15: Loss=0.0181, Val mIoU=0.8855
ðŸŸ© Finished training gtea. Best Val mIoU=0.8901
   Best model saved to: /teamspace/studios/this_studio/models_checkpoints/gtea_best.pth.tar
   Last model saved to: /teamspace/studios/this_studio/models_checkpoints/gtea_last.pth.tar


'/teamspace/studios/this_studio/models_checkpoints/gtea_best.pth.tar'

In [None]:
#batch size 16
train_on_dataset('hand_over_face')


# ðŸŸ¢ Training on hand_over_face dataset...
                                                                            
# hand_over_face | Epoch 1: Loss=0.2744, Val mIoU=0.4835
# âœ… Saved best model for hand_over_face with mIoU=0.4835
                                                                            
# hand_over_face | Epoch 2: Loss=0.1648, Val mIoU=0.6855
# âœ… Saved best model for hand_over_face with mIoU=0.6855
                                                                            
# hand_over_face | Epoch 3: Loss=0.1087, Val mIoU=0.7124
# âœ… Saved best model for hand_over_face with mIoU=0.7124
                                                                            
# hand_over_face | Epoch 4: Loss=0.0861, Val mIoU=0.7709
# âœ… Saved best model for hand_over_face with mIoU=0.7709
                                                                            
# hand_over_face | Epoch 5: Loss=0.0568, Val mIoU=0.7996
# âœ… Saved best model for hand_over_face with mIoU=0.7996
                                                                            
# hand_over_face | Epoch 6: Loss=0.0433, Val mIoU=0.8270
# âœ… Saved best model for hand_over_face with mIoU=0.8270
                                                                            
# hand_over_face | Epoch 7: Loss=0.0349, Val mIoU=0.8150
                                                                            
# hand_over_face | Epoch 8: Loss=0.0314, Val mIoU=0.8110
                                                                            
# hand_over_face | Epoch 9: Loss=0.0282, Val mIoU=0.8185
                                                                             
# hand_over_face | Epoch 10: Loss=0.0254, Val mIoU=0.8129
                                                                             
# hand_over_face | Epoch 11: Loss=0.0233, Val mIoU=0.8184
                                                                             
# hand_over_face | Epoch 12: Loss=0.0218, Val mIoU=0.8085
                                                                             
# hand_over_face | Epoch 13: Loss=0.0200, Val mIoU=0.8211
                                                                             
# hand_over_face | Epoch 14: Loss=0.0197, Val mIoU=0.8089
                                                                             
# hand_over_face | Epoch 15: Loss=0.0194, Val mIoU=0.8129
# ðŸŸ© Finished training hand_over_face. Best Val mIoU=0.8270
#    Best model saved to: /teamspace/studios/this_studio/models_checkpoints/hand_over_face_best.pth.tar
#    Last model saved to: /teamspace/studios/this_studio/models_checkpoints/hand_over_face_last.pth.tar


In [28]:
#batch size 4

train_on_dataset('hand_over_face')



ðŸŸ¢ Training on hand_over_face dataset...


                                                                            

hand_over_face | Epoch 1: Loss=0.2267, Val mIoU=0.6070
âœ… Saved best model for hand_over_face with mIoU=0.6070


                                                                            

hand_over_face | Epoch 2: Loss=0.1189, Val mIoU=0.6745
âœ… Saved best model for hand_over_face with mIoU=0.6745


                                                                            

hand_over_face | Epoch 3: Loss=0.0749, Val mIoU=0.7869
âœ… Saved best model for hand_over_face with mIoU=0.7869


                                                                            

hand_over_face | Epoch 4: Loss=0.0577, Val mIoU=0.7791


                                                                            

hand_over_face | Epoch 5: Loss=0.0493, Val mIoU=0.7603


                                                                            

hand_over_face | Epoch 6: Loss=0.0385, Val mIoU=0.8099
âœ… Saved best model for hand_over_face with mIoU=0.8099


                                                                            

hand_over_face | Epoch 7: Loss=0.0346, Val mIoU=0.8266
âœ… Saved best model for hand_over_face with mIoU=0.8266


                                                                            

hand_over_face | Epoch 8: Loss=0.0286, Val mIoU=0.8230


                                                                            

hand_over_face | Epoch 9: Loss=0.0244, Val mIoU=0.7879


                                                                             

hand_over_face | Epoch 10: Loss=0.0273, Val mIoU=0.7962


                                                                             

hand_over_face | Epoch 11: Loss=0.0353, Val mIoU=0.7851


                                                                             

hand_over_face | Epoch 12: Loss=0.0267, Val mIoU=0.8006


                                                                             

hand_over_face | Epoch 13: Loss=0.0234, Val mIoU=0.7853


                                                                             

hand_over_face | Epoch 14: Loss=0.0245, Val mIoU=0.7393


                                                                             

hand_over_face | Epoch 15: Loss=0.0307, Val mIoU=0.7480
ðŸŸ© Finished training hand_over_face. Best Val mIoU=0.8266
   Best model saved to: /teamspace/studios/this_studio/models_checkpoints/hand_over_face_best.pth.tar
   Last model saved to: /teamspace/studios/this_studio/models_checkpoints/hand_over_face_last.pth.tar


'/teamspace/studios/this_studio/models_checkpoints/hand_over_face_best.pth.tar'

In [17]:
# Modified intersection_and_union function (Code Cell 15.1)
def intersection_and_union_all_metrics(pred, target, num_classes):
    inter = np.zeros(num_classes)
    union = np.zeros(num_classes)
    tp = np.zeros(num_classes) # True Positives
    fp = np.zeros(num_classes) # False Positives
    fn = np.zeros(num_classes) # False Negatives

    for c in range(num_classes):
        p, t = (pred == c), (target == c)
        
        # Intersection and Union for IoU
        inter[c] = np.logical_and(p, t).sum()
        union[c] = np.logical_or(p, t).sum()
        
        # TP, FP, FN for Precision and Recall
        tp[c] = np.logical_and(p, t).sum() # Correctly predicted as class C
        fp[c] = np.logical_and(p, np.logical_not(t)).sum() # Predicted as C, but is not C
        fn[c] = np.logical_and(np.logical_not(p), t).sum() # Not predicted as C, but is C

    return inter, union, tp, fp, fn

# Modified evaluate_miou (Code Cell 15.2)
# Renaming and updating to handle all metrics
def evaluate_metrics(model, dataloader, num_classes, device):
    model.eval()
    inters, unions, tps, fps, fns = (np.zeros(num_classes) for _ in range(5))
    
    with torch.no_grad():
        for batch in tqdm(dataloader, leave=False):
            if batch is None: continue
            imgs, masks = batch['image'].to(device), batch['mask'].to(device)
            # Flatten predictions and ground truth
            preds = torch.argmax(model(imgs), dim=1).cpu().numpy().ravel()
            gts = masks.cpu().numpy().ravel()
            
            inter, union, tp, fp, fn = intersection_and_union_all_metrics(preds, gts, num_classes)
            inters += inter
            unions += union
            tps += tp
            fps += fp
            fns += fn
    
    # Calculate per-class metrics
    ious = np.divide(inters, unions, out=np.zeros_like(inters), where=unions > 0)
    precisions = np.divide(tps, tps + fps, out=np.zeros_like(tps), where=(tps + fps) > 0)
    recalls = np.divide(tps, tps + fns, out=np.zeros_like(tps), where=(tps + fns) > 0)
    
    # Calculate mean metrics (skipping background class c=0 if num_classes > 1)
    # Background class c=0, Hand class c=1
    
    if num_classes > 1:
        # Calculate mean for the foreground class(es), typically 1 to num_classes-1
        miou = float(np.nanmean(ious[1:]))
        mprecision = float(np.nanmean(precisions[1:]))
        mrecall = float(np.nanmean(recalls[1:]))
    else:
        # For single class segmentation, just use the only class (class 0)
        miou = float(np.nanmean(ious))
        mprecision = float(np.nanmean(precisions))
        mrecall = float(np.nanmean(recalls))

    return miou, mprecision, mrecall, tps, fns, fps, unions

# Helper function to find mask extension for the dataset (Code Cell 15.3)
def get_mask_ext(dataset_name):
    # Based on your note: 'gtea masks are jpg, others are png'
    return '.jpg' if dataset_name == 'gtea' else '.png'


# Updated HandSegmentationDataset for correct mask extension (Code Cell 15.4)
class HandSegmentationDataset(Dataset):
    def __init__(self, img_root, mask_root, split='train', mask_ext='.jpg'):
        self.img_dir = os.path.join(img_root, split)
        self.mask_dir = os.path.join(mask_root, split)
        self.files = [f for f in os.listdir(self.img_dir) if f.lower().endswith(('jpg', 'png', 'jpeg'))]
        self.mask_ext = mask_ext

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        img_file_base = self.files[idx].rsplit('.',1)[0]
        
        img_path = os.path.join(self.img_dir, self.files[idx])
        mask_path = os.path.join(self.mask_dir, img_file_base + self.mask_ext)
        
        try:
            image = np.array(Image.open(img_path).convert('RGB').resize(IMG_SIZE, Image.BILINEAR)).astype(np.float32)/255.
            mask = np.array(Image.open(mask_path).convert('L').resize(IMG_SIZE, Image.NEAREST))
            mask = (mask>0).astype(np.int64)
            norm = (image - IMG_MEAN)/IMG_STD
            img_tensor = torch.tensor(norm.transpose(2,0,1)).float()
            mask_tensor = torch.tensor(mask).long()
            return {'image': img_tensor, 'mask': mask_tensor}
        except FileNotFoundError:
            # Handle cases where a corresponding mask file might be missing
            # Should not happen in a clean dataset, but good for robustness
            print(f"File not found: {mask_path} (Img: {img_path}). Skipping.")
            return None


# Updated build_split_dataset (Code Cell 15.5)
def build_split_dataset(name, split):
    if name != 'combined4':
        img_dir  = os.path.join(HAND_SPLIT_BASE, name, 'images_splitted')
        mask_dir = os.path.join(HAND_SPLIT_BASE, name, 'masks_splitted')
        mask_ext = get_mask_ext(name) # Get correct extension
        return HandSegmentationDataset(img_dir, mask_dir, split, mask_ext=mask_ext)
    else:
        parts = []
        for d in BASE_DATASETS:
            img_dir  = os.path.join(HAND_SPLIT_BASE, d, 'images_splitted')
            mask_dir = os.path.join(HAND_SPLIT_BASE, d, 'masks_splitted')
            mask_ext = get_mask_ext(d) # Get correct extension
            parts.append(HandSegmentationDataset(img_dir, mask_dir, split, mask_ext=mask_ext))
        return ConcatDataset(parts)

In [18]:
# Cell: New Cross-Evaluation Function
import pandas as pd

# Only consider models trained on individual datasets
train_datasets = BASE_DATASETS  # ['egohands', 'ego_youtube_hands', 'gtea', 'hand_over_face']
test_datasets = DATASETS       # ['egohands', 'ego_youtube_hands', 'gtea', 'hand_over_face', 'combined4']

def run_new_cross_evaluation():
    print("\n=========================================================================")
    print("ðŸ”· Starting Cross-Dataset Evaluation (mIoU, mRecall, mPrecision)...")
    print("=========================================================================")

    all_results = []
    
    for i, train_ds in enumerate(train_datasets):
        print(f"\nðŸ”· Evaluating model trained on {train_ds}")
        model_path = os.path.join(OUTPUT_DIR, f'{train_ds}_best.pth.tar')
        
        if not os.path.exists(model_path):
            print(f"   [!] WARNING: Best model not found at {model_path}. Skipping.")
            continue

        # Load model and weights
        model = rf101(NUM_CLASSES)
        ck = torch.load(model_path, map_location=DEVICE)
        model.load_state_dict(ck['model_state_dict'])
        model = model.to(DEVICE)
        model.eval()

        # Initialize per-model combined test set totals
        model_all_test_tps, model_all_test_fns, model_all_test_fps = np.zeros(NUM_CLASSES), np.zeros(NUM_CLASSES), np.zeros(NUM_CLASSES)
        model_all_test_unions = np.zeros(NUM_CLASSES)

        for j, test_ds in enumerate(BASE_DATASETS): # Iterate over individual test sets for aggregation
            print(f"   Testing on {test_ds}...")
            test_data   = build_split_dataset(test_ds, 'test')
            test_loader = DataLoader(test_data, BATCH_SIZE, False, collate_fn=collate_skip_none)

            miou, mprecision, mrecall, tps, fns, fps, unions = evaluate_metrics(model, test_loader, NUM_CLASSES, DEVICE)
            
            # Aggregate for the final "combined4" result
            model_all_test_tps += tps
            model_all_test_fns += fns
            model_all_test_fps += fps
            model_all_test_unions += unions
            
            print(f"   {train_ds} â†’ {test_ds} : mIoU={miou:.4f}, mRecall={mrecall:.4f}, mPrecision={mprecision:.4f}")
            
            # Store results for individual test sets
            all_results.append({
                'Trained On': train_ds,
                'Tested On': test_ds,
                'mIoU': miou,
                'mRecall': mrecall,
                'mPrecision': mprecision
            })

        # --- Calculate Combined Testset Results ---
        print(f"   Testing on combined testset (All 4)...")
        
        # Calculate combined metrics from aggregated totals (c=1, the hand class)
        tps_hand = model_all_test_tps[1]
        fns_hand = model_all_test_fns[1]
        fps_hand = model_all_test_fps[1]
        unions_hand = model_all_test_unions[1]
        inters_hand = tps_hand
        
        # Calculate metrics (only for hand class, as per original logic's miou[1:])
        combined_miou = safe_div(inters_hand, unions_hand)
        combined_mprecision = safe_div(tps_hand, tps_hand + fps_hand)
        combined_mrecall = safe_div(tps_hand, tps_hand + fns_hand)
        
        print(f"   {train_ds} â†’ combined4 : mIoU={combined_miou:.4f}, mRecall={combined_mrecall:.4f}, mPrecision={combined_mprecision:.4f}")
        
        # Store results for 'combined4' test set
        all_results.append({
            'Trained On': train_ds,
            'Tested On': 'combined4',
            'mIoU': combined_miou,
            'mRecall': combined_mrecall,
            'mPrecision': combined_mprecision
        })


    # --- Save and Print Final Matrix ---
    df = pd.DataFrame(all_results)
    
    # Pivot the table to create the cross-evaluation matrix for printing and select order
    miou_matrix = df.pivot(index='Trained On', columns='Tested On', values='mIoU')[test_datasets]
    recall_matrix = df.pivot(index='Trained On', columns='Tested On', values='mRecall')[test_datasets]
    precision_matrix = df.pivot(index='Trained On', columns='Tested On', values='mPrecision')[test_datasets]
    
    # Save to CSV
    csv_path = os.path.join(OUTPUT_DIR, "cross_dataset_metrics_4x5.csv")
    df.to_csv(csv_path, index=False, float_format='%.4f')
    print(f"\nâœ… All results saved to: {csv_path}")

    print("\n========================= FINAL mIoU MATRIX =========================")
    print("Rows = Trained on, Columns = Tested on\n")
    print(miou_matrix.to_string(float_format='%.4f'))
    
    print("\n========================= FINAL mRecall MATRIX =========================")
    print("Rows = Trained on, Columns = Tested on\n")
    print(recall_matrix.to_string(float_format='%.4f'))
    
    print("\n========================= FINAL mPrecision MATRIX =========================")
    print("Rows = Trained on, Columns = Tested on\n")
    print(precision_matrix.to_string(float_format='%.4f'))
    
    print("\nâœ… Cross-evaluation complete.")

In [29]:
# Cell: Run New Cross Evaluation
run_new_cross_evaluation()


ðŸ”· Starting Cross-Dataset Evaluation (mIoU, mRecall, mPrecision)...

ðŸ”· Evaluating model trained on egohands


   Testing on egohands...


                                                 

   egohands â†’ egohands : mIoU=0.8603, mRecall=0.9156, mPrecision=0.9343
   Testing on ego_youtube_hands...


                                               

   egohands â†’ ego_youtube_hands : mIoU=0.2190, mRecall=0.4496, mPrecision=0.2993
   Testing on gtea...


                                               

   egohands â†’ gtea : mIoU=0.7342, mRecall=0.8350, mPrecision=0.8589
   Testing on hand_over_face...


                                               

   egohands â†’ hand_over_face : mIoU=0.4149, mRecall=0.7272, mPrecision=0.4914
   Testing on combined testset (All 4)...
   egohands â†’ combined4 : mIoU=0.7580, mRecall=0.8733, mPrecision=0.8517

ðŸ”· Evaluating model trained on ego_youtube_hands
   Testing on egohands...


                                                 

   ego_youtube_hands â†’ egohands : mIoU=0.2471, mRecall=0.2634, mPrecision=0.7999
   Testing on ego_youtube_hands...


                                               

   ego_youtube_hands â†’ ego_youtube_hands : mIoU=0.6356, mRecall=0.6974, mPrecision=0.8776
   Testing on gtea...


                                               

   ego_youtube_hands â†’ gtea : mIoU=0.1292, mRecall=0.1313, mPrecision=0.8906
   Testing on hand_over_face...


                                               

   ego_youtube_hands â†’ hand_over_face : mIoU=0.1217, mRecall=0.1322, mPrecision=0.6047
   Testing on combined testset (All 4)...
   ego_youtube_hands â†’ combined4 : mIoU=0.2346, mRecall=0.2484, mPrecision=0.8076

ðŸ”· Evaluating model trained on gtea
   Testing on egohands...


                                                 

   gtea â†’ egohands : mIoU=0.3104, mRecall=0.4315, mPrecision=0.5251
   Testing on ego_youtube_hands...


                                               

   gtea â†’ ego_youtube_hands : mIoU=0.0377, mRecall=0.0665, mPrecision=0.0800
   Testing on gtea...


                                               

   gtea â†’ gtea : mIoU=0.7824, mRecall=0.8339, mPrecision=0.9269
   Testing on hand_over_face...


                                               

   gtea â†’ hand_over_face : mIoU=0.0680, mRecall=0.1523, mPrecision=0.1094
   Testing on combined testset (All 4)...
   gtea â†’ combined4 : mIoU=0.3416, mRecall=0.4755, mPrecision=0.5482

ðŸ”· Evaluating model trained on hand_over_face
   Testing on egohands...


                                                 

   hand_over_face â†’ egohands : mIoU=0.4383, mRecall=0.6357, mPrecision=0.5853
   Testing on ego_youtube_hands...


                                               

   hand_over_face â†’ ego_youtube_hands : mIoU=0.2618, mRecall=0.3765, mPrecision=0.4620
   Testing on gtea...


                                               

   hand_over_face â†’ gtea : mIoU=0.4100, mRecall=0.4387, mPrecision=0.8623
   Testing on hand_over_face...


                                               

   hand_over_face â†’ hand_over_face : mIoU=0.7216, mRecall=0.8410, mPrecision=0.8357
   Testing on combined testset (All 4)...
   hand_over_face â†’ combined4 : mIoU=0.4406, mRecall=0.6016, mPrecision=0.6221

âœ… All results saved to: /teamspace/studios/this_studio/models_checkpoints/cross_dataset_metrics_4x5.csv

Rows = Trained on, Columns = Tested on

Tested On           gtea
Trained On              
ego_youtube_hands 0.1292
egohands          0.7342
gtea              0.7824
hand_over_face    0.4100

Rows = Trained on, Columns = Tested on

Tested On           gtea
Trained On              
ego_youtube_hands 0.1313
egohands          0.8350
gtea              0.8339
hand_over_face    0.4387

Rows = Trained on, Columns = Tested on

Tested On           gtea
Trained On              
ego_youtube_hands 0.8906
egohands          0.8589
gtea              0.9269
hand_over_face    0.8623

âœ… Cross-evaluation complete.




In [23]:
import pandas as pd
import numpy as np

def safe_format(value):
    """Formats a float to 4 decimal places or returns '-' if NaN."""
    if pd.isna(value) or value is None:
        return '-'
    return f"{value:.4f}"

def generate_combined_table(csv_path="/teamspace/studios/this_studio/models_checkpoints/cross_dataset_metrics_4x5.csv"):
    """
    Reads the cross-evaluation CSV, pivots the data for each metric, 
    and combines mIoU, mRecall, and mPrecision into a single formatted table,
    with explicit metric headings in the column names.
    """
    print(f"Reading data from: {csv_path}")
    
    try:
        df = pd.read_csv(csv_path)
    except FileNotFoundError:
        print(f"Error: CSV file not found at {csv_path}. Please ensure the file exists.")
        return

    # 1. Define the desired order for rows and columns
    train_datasets_order = ['egohands', 'ego_youtube_hands', 'gtea', 'hand_over_face']
    test_datasets_order = ['egohands', 'ego_youtube_hands', 'gtea', 'hand_over_face', 'combined4']
    
    # 2. Pivot the DataFrame for each metric
    miou_pivot = df.pivot(index='Trained On', columns='Tested On', values='mIoU').fillna(np.nan)
    mrecall_pivot = df.pivot(index='Trained On', columns='Tested On', values='mRecall').fillna(np.nan)
    mprecision_pivot = df.pivot(index='Trained On', columns='Tested On', values='mPrecision').fillna(np.nan)

    # 3. Reindex the pivoted tables to enforce the desired order
    try:
        miou_pivot = miou_pivot.reindex(index=train_datasets_order, columns=test_datasets_order)
        mrecall_pivot = mrecall_pivot.reindex(index=train_datasets_order, columns=test_datasets_order)
        mprecision_pivot = mprecision_pivot.reindex(index=train_datasets_order, columns=test_datasets_order)
    except KeyError as e:
        print(f"Warning: Dataset names in CSV do not match expected names. Missing key: {e}. Using existing order.")
        pass
        
    # 4. Create the final combined DataFrame with formatted column headers
    
    # New column headers: Dataset Name \n (mIoU/mRec/mPre)
    new_columns = [f"{ds}\n(mIoU/mRec/mPre)" for ds in test_datasets_order]
    
    combined_table = pd.DataFrame(index=train_datasets_order, columns=new_columns)

    # Combine the metrics (mIoU / mRecall / mPrecision) into a single string for each cell
    for i, train_ds in enumerate(train_datasets_order):
        for j, test_ds in enumerate(test_datasets_order):
            miou = miou_pivot.loc[train_ds, test_ds]
            mrecall = mrecall_pivot.loc[train_ds, test_ds]
            mprecision = mprecision_pivot.loc[train_ds, test_ds]
            
            # Format the output string: mIoU / mRecall / mPrecision
            cell_content = (
                f"{safe_format(miou)} / "
                f"{safe_format(mrecall)} / "
                f"{safe_format(mprecision)}"
            )
            # Use iloc to set values now that column names are complex strings
            combined_table.iloc[i, j] = cell_content

    print("\n=========================================================================================")
    print("Combined Cross-Evaluation Metrics Table")
    print("Rows = Trained on, Columns = Tested on")
    print("=========================================================================================")
    # Using line_width=None helps pandas format the multi-line headers cleanly
    print(combined_table.to_string(line_width=None))
    print("=========================================================================================")

# Execute the function
generate_combined_table()

Reading data from: /teamspace/studios/this_studio/models_checkpoints/cross_dataset_metrics_4x5.csv

Combined Cross-Evaluation Metrics Table
Rows = Trained on, Columns = Tested on
                  egohands\n(mIoU/mRec/mPre) ego_youtube_hands\n(mIoU/mRec/mPre)    gtea\n(mIoU/mRec/mPre) hand_over_face\n(mIoU/mRec/mPre) combined4\n(mIoU/mRec/mPre)
egohands            0.8603 / 0.9156 / 0.9343            0.2190 / 0.4496 / 0.2993  0.7342 / 0.8350 / 0.8589         0.4149 / 0.7272 / 0.4914    0.7580 / 0.8733 / 0.8517
ego_youtube_hands   0.3976 / 0.4510 / 0.7704            0.6033 / 0.7057 / 0.8061  0.2970 / 0.3047 / 0.9208         0.2322 / 0.2729 / 0.6091    0.3794 / 0.4243 / 0.7819
gtea                0.4139 / 0.5678 / 0.6043            0.1632 / 0.1973 / 0.4861  0.7909 / 0.8279 / 0.9466         0.1182 / 0.1437 / 0.3991    0.4451 / 0.5779 / 0.6595
hand_over_face      0.4484 / 0.5757 / 0.6698            0.2616 / 0.3293 / 0.5600  0.5386 / 0.5771 / 0.8898         0.6960 / 0.8068 / 0.8351    0.4682

In [30]:
import pandas as pd
import numpy as np

def safe_format(value):
    """Formats a float to 4 decimal places or returns '-' if NaN."""
    if pd.isna(value) or value is None:
        return '-'
    return f"{value:.4f}"

def generate_combined_table(csv_path="/teamspace/studios/this_studio/models_checkpoints/cross_dataset_metrics_4x5.csv"):
    """
    Reads the cross-evaluation CSV, pivots the data for each metric, 
    and combines mIoU, mRecall, and mPrecision into a single formatted table,
    with explicit metric headings in the column names.
    """
    print(f"Reading data from: {csv_path}")
    
    try:
        df = pd.read_csv(csv_path)
    except FileNotFoundError:
        print(f"Error: CSV file not found at {csv_path}. Please ensure the file exists.")
        return

    # 1. Define the desired order for rows and columns
    train_datasets_order = ['egohands', 'ego_youtube_hands', 'gtea', 'hand_over_face']
    test_datasets_order = ['egohands', 'ego_youtube_hands', 'gtea', 'hand_over_face', 'combined4']
    
    # 2. Pivot the DataFrame for each metric
    miou_pivot = df.pivot(index='Trained On', columns='Tested On', values='mIoU').fillna(np.nan)
    mrecall_pivot = df.pivot(index='Trained On', columns='Tested On', values='mRecall').fillna(np.nan)
    mprecision_pivot = df.pivot(index='Trained On', columns='Tested On', values='mPrecision').fillna(np.nan)

    # 3. Reindex the pivoted tables to enforce the desired order
    try:
        miou_pivot = miou_pivot.reindex(index=train_datasets_order, columns=test_datasets_order)
        mrecall_pivot = mrecall_pivot.reindex(index=train_datasets_order, columns=test_datasets_order)
        mprecision_pivot = mprecision_pivot.reindex(index=train_datasets_order, columns=test_datasets_order)
    except KeyError as e:
        print(f"Warning: Dataset names in CSV do not match expected names. Missing key: {e}. Using existing order.")
        pass
        
    # 4. Create the final combined DataFrame with formatted column headers
    
    # New column headers: Dataset Name \n (mIoU/mRec/mPre)
    new_columns = [f"{ds}\n(mIoU/mRec/mPre)" for ds in test_datasets_order]
    
    combined_table = pd.DataFrame(index=train_datasets_order, columns=new_columns)

    # Combine the metrics (mIoU / mRecall / mPrecision) into a single string for each cell
    for i, train_ds in enumerate(train_datasets_order):
        for j, test_ds in enumerate(test_datasets_order):
            miou = miou_pivot.loc[train_ds, test_ds]
            mrecall = mrecall_pivot.loc[train_ds, test_ds]
            mprecision = mprecision_pivot.loc[train_ds, test_ds]
            
            # Format the output string: mIoU / mRecall / mPrecision
            cell_content = (
                f"{safe_format(miou)} / "
                f"{safe_format(mrecall)} / "
                f"{safe_format(mprecision)}"
            )
            # Use iloc to set values now that column names are complex strings
            combined_table.iloc[i, j] = cell_content

    print("\n=========================================================================================")
    print("Combined Cross-Evaluation Metrics Table")
    print("Rows = Trained on, Columns = Tested on")
    print("=========================================================================================")
    # Using line_width=None helps pandas format the multi-line headers cleanly
    print(combined_table.to_string(line_width=None))
    print("=========================================================================================")

# Execute the function
generate_combined_table()

Reading data from: /teamspace/studios/this_studio/models_checkpoints/cross_dataset_metrics_4x5.csv

Combined Cross-Evaluation Metrics Table
Rows = Trained on, Columns = Tested on
                  egohands\n(mIoU/mRec/mPre) ego_youtube_hands\n(mIoU/mRec/mPre)    gtea\n(mIoU/mRec/mPre) hand_over_face\n(mIoU/mRec/mPre) combined4\n(mIoU/mRec/mPre)
egohands            0.8603 / 0.9156 / 0.9343            0.2190 / 0.4496 / 0.2993  0.7342 / 0.8350 / 0.8589         0.4149 / 0.7272 / 0.4914    0.7580 / 0.8733 / 0.8517
ego_youtube_hands   0.2471 / 0.2634 / 0.7999            0.6356 / 0.6974 / 0.8776  0.1292 / 0.1313 / 0.8906         0.1217 / 0.1322 / 0.6047    0.2346 / 0.2484 / 0.8076
gtea                0.3104 / 0.4315 / 0.5251            0.0377 / 0.0665 / 0.0800  0.7824 / 0.8339 / 0.9269         0.0680 / 0.1523 / 0.1094    0.3416 / 0.4755 / 0.5482
hand_over_face      0.4383 / 0.6357 / 0.5853            0.2618 / 0.3765 / 0.4620  0.4100 / 0.4387 / 0.8623         0.7216 / 0.8410 / 0.8357    0.4406