In [1]:
import numpy as np
import torch
from torch.utils.data import DataLoader, random_split

import pytorch_lightning as pl
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

from PIL import Image, ImageDraw
from torchvision import transforms

from dataloader.dataloader import DTSegmentationDataset
from model.model import DTSegmentationNetwork

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
%load_ext autoreload
%autoreload 2

### Load the dataset

In [3]:
dataset = DTSegmentationDataset()

# Split the dataset into train and test
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train, val = random_split(dataset, [train_size, test_size])
print(f"Train size: {len(train)}, val size: {len(val)}")

Train size: 100, val size: 25


### Overfit Test

In [None]:
overfit_hparams = {
    # --- Model ---
    # | Model hyperparameters
    'num_classes': 5,
    # | Optimization hyperparameters
    "learning_rate": 0.0625,
    "weight_decay": 0.000000625,
    "lr_decay": 0.25,
    
    # --- Dataloader (Hardware-specific) ---
    "batch_size": 12,
    "num_workers": 2,
}

model = DTSegmentationNetwork(overfit_hparams)

# Overfit for testing
early_stop_overfit_callback = EarlyStopping(
    monitor='loss',
    patience=150,
    min_delta=0.0005,
    verbose=True,
    mode='min'
)

trainer = pl.Trainer(
    log_every_n_steps=1,
    max_epochs=150,
    overfit_batches=1,
    callbacks=[early_stop_overfit_callback]
)

# image, target = train[0]
# # print(f'image: {image.shape}, target: {target.shape}')
# result = torch.argmax(target, dim=0)
# print(f'image: {image.shape}, result: {result.shape}, value range: {torch.min(result)}-{torch.max(result)}')

trainer.fit(model, DataLoader(train, shuffle=False, batch_size=1), DataLoader(val, shuffle=False, batch_size=1))

In [23]:
# visualize the overfitted results
img, target = train[0]
print(f"Target shape: {target.shape}, value range: {torch.min(target)}-{torch.max(target)}")
result = torch.argmax(model(img.unsqueeze(0)), dim=1)
print(f"Result shape: {result.shape}, value range: {torch.min(result)}-{torch.max(result)}")
pil_transform = transforms.ToPILImage()
pil_transform(img).show(title="image")
pil_transform(DTSegmentationDataset.label_img_to_rgb(target)).show(title="ground truth")
pil_transform(DTSegmentationDataset.label_img_to_rgb(result[0])).show(title="prediction")

Target shape: torch.Size([480, 640]), value range: 0-3
Result shape: torch.Size([1, 480, 640]), value range: 0-3


### Training the model

In [None]:
hparams = {
    # --- Model ---
    # | Model hyperparameters
    'num_classes': 5,
    # | Optimization hyperparameters
    "learning_rate": 0.0625,
    "weight_decay": 0.000000625,
    "lr_decay": 0.25,
    
    # --- Dataloader (Hardware-specific) ---
    "batch_size": 12,
    "num_workers": 2,
}

model = DTSegmentationNetwork(hparams)

# Training procedure
early_stop_callback = EarlyStopping(
    monitor='val_loss',
    patience=7,
    min_delta=0.01,
    verbose=True,
    mode='min'
)

trainer = pl.Trainer(
    max_epochs=100,
    log_every_n_steps=1,
    # For new MacBooks
    # accelerator="mps",
    # devices=1,
    callbacks=[early_stop_callback]
)

train_dataloader = DataLoader(train, batch_size=hparams['batch_size'], num_workers=hparams['num_workers'])
val_dataloader = DataLoader(val, batch_size=hparams['batch_size'], num_workers=hparams['num_workers'])

trainer.fit(model, train_dataloader, val_dataloader)

model.save("model.pt")

### Prediction test

In [4]:
model = torch.load("model.pt")

In [5]:
# visualize the model results
img, target = val[0]
print(f"Target shape: {target.shape}, value range: {torch.min(target)}-{torch.max(target)}")
result = torch.argmax(model(img.unsqueeze(0)), dim=1)
print(f"Result shape: {result.shape}, value range: {torch.min(result)}-{torch.max(result)}")
pil_transform = transforms.ToPILImage()
pil_transform(img).show(title="image")
pil_transform(DTSegmentationDataset.label_img_to_rgb(target)).show(title="ground truth")
rgb_prediction = DTSegmentationDataset.label_img_to_rgb(result[0])
pil_prediction = pil_transform(rgb_prediction)
pil_prediction.show(title="prediction")

Target shape: torch.Size([480, 640]), value range: 0-4


[W NNPACK.cpp:53] Could not initialize NNPACK! Reason: Unsupported hardware.


Result shape: torch.Size([1, 480, 640]), value range: 0-4


### Draw benchmarks

In [6]:
# Find the average coordinate of the road
road_mask = np.argwhere(rgb_prediction == DTSegmentationDataset.SEGM_LABELS['Ego Lane']['rgb_value'])
road_center_y, road_center_x = np.ceil(np.mean(road_mask, axis=0)[:2])

drawer = ImageDraw.Draw(pil_prediction)
# Draw two circles (bigger and smaller) centered on road center coordinate
RADIUS = 20
drawer.ellipse((road_center_x - RADIUS, road_center_y - RADIUS, road_center_x + RADIUS, road_center_y + RADIUS), fill='green')
drawer.ellipse((road_center_x - 5, road_center_y - 5, road_center_x + 5, road_center_y + 5), fill='orange')
# Draw a vertical line on the center of the image
drawer.line((pil_prediction.width / 2, 0, pil_prediction.width / 2, pil_prediction.height), fill='red')
# Draw a horizontal line from the center of the image to the road center
drawer.line((pil_prediction.width / 2, road_center_y, road_center_x, road_center_y), fill='blue')
# Draw two lines going from the corners of the image to the road center
drawer.line((0, 0, road_center_x, road_center_y), fill='yellow', width=2)
drawer.line((pil_prediction.width, 0, road_center_x, road_center_y), fill='yellow', width=2)

pil_prediction.show()