# Mask R-CNN Training Notebook
This notebook prepares the dataset, trains Mask R-CNN, and shows inference examples.
Ensure that all requirements have been installed and ...


In [15]:
# Paths and Import Setup
import os
from dataset import MaskRCNNDataset
from torch.utils.data import DataLoader # type: ignore
import torch # type: ignore
import torchvision # type: ignore
from utils import simple_map_evaluation
images_dir = "data\archive\car\train\images"
inst_dir = "data\archive\car\train\labels"
fold = 0
fold_dir = os.path.join("data\archive", f"fold_{fold}")
train_list = os.path.join(fold_dir, "data\archive\car\train")
val_list = os.path.join(fold_dir, "data\archive\car\valid")
with open(train_list) as f:
    train_names = [l.strip() for l in f if l.strip()]
train_img_paths = [os.path.join(images_dir, n) for n in train_names]
train_inst_paths = [os.path.join(inst_dir, os.path.splitext(n)[0]+".png") for n in train_names]

with open(val_list) as f:
    val_names = [l.strip() for l in f if l.strip()]
val_img_paths = [os.path.join(images_dir, n) for n in val_names]
val_inst_paths = [os.path.join(inst_dir, os.path.splitext(n)[0]+".png") for n in val_names]


SyntaxError: invalid syntax (715019419.py, line 4)

In [None]:
# build model
def get_model_instance_segmentation(number_of_classes=2):
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
    input_features = model.roi_heads.box_predictor.cls_score.input_features
    from torchvision.models.detection.faster_rcnn import FastRCNNPredictor # type: ignore
    model.roi_heads.box_predictor = FastRCNNPredictor(input_features, number_of_classes)
    input_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor # type: ignore
    model.roi_heads.mask_predictor = MaskRCNNPredictor(input_features_mask, 256, number_of_classes)
    return model

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = get_model_instance_segmentation()
model.to(device)


In [9]:
import torch.utils.data.DataLoader # type: ignore
train_ds = MaskRCNNDataset(train_img_paths, train_inst_paths)
train_loader = DataLoader(train_ds, batch_size=2, shuffle=True, collate_function=lambda x: tuple(zip(*x)))
optimizer = torch.optim.SGD([p for p in model.parameters() if p.requires_grad], lr=0.005, momentum=0.9, weight_decay=0.0005)

for epoch in range(3):
    model.train()
    epoch_loss = 0.0
    for images, targets in train_loader:
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) for k,v in t.items()} for t in targets]
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        epoch_loss += losses.item()
    print("Epoch", epoch, "loss", epoch_loss / len(train_loader))


NameError: name 'torch' is not defined

In [None]:
val_ds = MaskRCNNDataset(val_img_paths, val_inst_paths)
val_loader = DataLoader(val_ds, batch_size=1, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))
from utils import simple_map_evaluation
f1 = simple_map_evaluation(model, val_loader, device)
print("Quick F1-like score:", f1)


In [None]:
import matplotlib.pyplot as plt # type: ignore
import torchvision.transforms as T # type: ignore
from PIL import Image # type: ignore
img = Image.open(train_img_paths[0]).convert("RGB")
x = T.ToTensor()(img).unsqueeze(0).to(device)
model.eval()
with torch.no_grad():
    out = model(list(x))[0]
masks = out.get("masks")
if masks is not None and masks.shape[0] > 0:
    m = masks[0,0].cpu().numpy()
    plt.figure(figsize=(10,5))
    plt.subplot(1,2,1)
    plt.imshow(img)
    plt.axis('off')
    plt.subplot(1,2,2)
    plt.imshow(m, cmap='gray')
    plt.axis('off')
else:
    print("No masks predicted")
