# Baseline Model

In [1]:
from recognizer.utils. constants import DATA_DIR

IMAGES_DIR = DATA_DIR / "preprocessed-10p" / "images"
MASKS_DIR = DATA_DIR / "preprocessed-10p" / "masks"

In [2]:
import os

image_to_mask_map = dict()

for img_file in os.listdir(IMAGES_DIR):
    image_to_mask_map[img_file] = img_file.replace("image", "mask")


In [3]:
image_filenames = list(image_to_mask_map.keys())
mask_filenames = [image_to_mask_map[image] for image in image_filenames]

In [4]:
from sklearn.model_selection import train_test_split

images_train, images_test, masks_train, masks_test = train_test_split(image_filenames, mask_filenames, test_size=0.1, random_state=180)
images_train, images_val, masks_train, masks_val = train_test_split(images_train, masks_train, test_size=0.1127, random_state=180) # 0.1 * 0.9 = 0.1127

In [5]:
len(images_train), len(images_val), len(images_test)

(408, 52, 52)

In [6]:
import torch

from torchvision.transforms import v2

image_transform = v2.Compose([
    v2.ToTensor()
    # v2.ToImage(),
    # v2.ToDtype(torch.float32, scale=True),
])

mask_transform = v2.Compose([
    v2.ToTensor(),
    v2.ToDtype(torch.float32)
    # v2.ToImage(),
    # v2.ToDtype(torch.float32, scale=True),
])



In [7]:
from recognizer.datasets import SegmentationDataset

train_dataset = SegmentationDataset(
    images_dir=str(IMAGES_DIR),
    masks_dir=str(MASKS_DIR),
    image_files=images_train,
    mask_files=masks_train,
    transform=image_transform,
    target_transform=mask_transform
)

val_dataset = SegmentationDataset(
    images_dir=str(IMAGES_DIR),
    masks_dir=str(MASKS_DIR),
    image_files=images_val,
    mask_files=masks_val,
    transform=image_transform,
    target_transform=mask_transform
)

test_dataset = SegmentationDataset(
    images_dir=str(IMAGES_DIR),
    masks_dir=str(MASKS_DIR),
    image_files=images_test,
    mask_files=masks_test,
    transform=image_transform,
    target_transform=mask_transform
)

In [8]:
from torch.utils.data import DataLoader

BATCH_SIZE = 8

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=False, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, drop_last=False, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, drop_last=False, num_workers=4, pin_memory=True)

In [22]:
from recognizer.models.unet import UNet

unet_model = UNet(n_channels=3, n_classes=1)

In [15]:
def dice_coefficient(pred: torch.Tensor, target: torch.Tensor) -> float:
    """
    Compute the Dice coefficient.
    
    Parameters:
    pred (torch.Tensor): Predicted mask, a tensor of shape (N, 1, H, W)
    target (torch.Tensor): Ground truth mask, a tensor of shape (N, 1, H, W)

    Returns:
    float: Dice coefficient
    """
    pred = pred.contiguous()
    target = target.contiguous()

    intersection = (pred * target).sum(dim=2).sum(dim=2)
    pred_sum = pred.sum(dim=2).sum(dim=2)
    target_sum = target.sum(dim=2).sum(dim=2)

    dice = (2. * intersection + 1e-7) / (pred_sum + target_sum + 1e-7)
    
    return dice.mean().item()


def iou(pred: torch.Tensor, target: torch.Tensor) -> float:
    """
    Compute the Intersection over Union (IoU).
    
    Parameters:
    pred (torch.Tensor): Predicted mask, a tensor of shape (N, 1, H, W)
    target (torch.Tensor): Ground truth mask, a tensor of shape (N, 1, H, W)
    smooth (float): A smoothing constant to avoid division by zero

    Returns:
    float: Intersection over Union (IoU)
    """
    pred = pred.contiguous()
    target = target.contiguous()
    
    intersection = (pred * target).sum(dim=2).sum(dim=2)
    union = pred.sum(dim=2).sum(dim=2) + target.sum(dim=2).sum(dim=2) - intersection
    
    iou = (intersection + 1e-7) / (union + 1e-7)
    
    return iou.mean().item()

In [23]:
import lightning as L
from torch import nn

example_input_array = torch.Tensor(512, 3, 216, 384)


class LitSegmenter(L.LightningModule):
    def __init__(self, model):
        super().__init__()
        self.model = model
        self.loss_fn = nn.BCEWithLogitsLoss()

    def training_step(self, batch, batch_idx):
        # training_step defines the train loop.
        # it is independent of forward
        images, masks = batch
        
        outputs = self.model(images)

        loss = self.loss_fn(outputs, masks)
        return loss

    def validation_step(self, batch, batch_idx):
        # training_step defines the train loop.
        # it is independent of forward
        images, masks = batch

        outputs = self.model(images)

        loss = self.loss_fn(outputs, masks)

        self.log("val_loss", loss)

    def test_step(self, batch, batch_idx):
        images, masks = batch

        outputs = self.model(images)

        loss = self.loss_fn(outputs, masks)
        dice = dice_coefficient(outputs, masks)
        iou_val = iou(outputs, masks)
        self.log_dict({"test_loss": loss, "dice": dice, "iou": iou_val}, prog_bar=True)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-4)
        return optimizer


segmenter = LitSegmenter(model=unet_model)

In [24]:
trainer = L.Trainer(max_epochs=10, check_val_every_n_epoch=2)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [25]:
trainer.fit(model=segmenter, train_dataloaders=train_loader, val_dataloaders=val_loader)


  | Name    | Type              | Params
----------------------------------------------
0 | model   | UNet              | 17.3 M
1 | loss_fn | BCEWithLogitsLoss | 0     
----------------------------------------------
17.3 M    Trainable params
0         Non-trainable params
17.3 M    Total params
69.070    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/Users/facundopalavecino/Library/Caches/pypoetry/virtualenvs/football-players-recognizer-8BLejKao-py3.9/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:436: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.
/Users/facundopalavecino/Library/Caches/pypoetry/virtualenvs/football-players-recognizer-8BLejKao-py3.9/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:436: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


In [26]:
trainer.test(model=segmenter, dataloaders=test_loader)

/Users/facundopalavecino/Library/Caches/pypoetry/virtualenvs/football-players-recognizer-8BLejKao-py3.9/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:436: Consider setting `persistent_workers=True` in 'test_dataloader' to speed up the dataloader worker initialization.


Testing: |          | 0/? [00:00<?, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
          dice             -0.03506287559866905
           iou             -0.017193950712680817
        test_loss           0.11957840621471405
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_loss': 0.11957840621471405,
  'dice': -0.03506287559866905,
  'iou': -0.017193950712680817}]