In [31]:
import numpy as np
import torch
from torch.utils.data import DataLoader, random_split

import pytorch_lightning as pl
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

from PIL import Image, ImageDraw
from torchvision import transforms

from dataloader.dataloader import DTSegmentationDataset
from model.model import DTSegmentationNetwork

In [32]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Load the dataset

In [33]:
dataset = DTSegmentationDataset()

# Split the dataset into train and test
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train, val = random_split(dataset, [train_size, test_size])
print(f"Train size: {len(train)}, val size: {len(val)}")

Train size: 194, val size: 49


### Overfit Test

In [None]:
overfit_hparams = {
    # --- Model ---
    # | Model hyperparameters
    'num_classes': 5,
    # | Optimization hyperparameters
    "learning_rate": 0.0625,
    "weight_decay": 0.000000625,
    "lr_decay": 0.25,
    
    # --- Dataloader (Hardware-specific) ---
    "batch_size": 12,
    "num_workers": 2,
}

model = DTSegmentationNetwork(overfit_hparams)

# Overfit for testing
early_stop_overfit_callback = EarlyStopping(
    monitor='loss',
    patience=150,
    min_delta=0.0005,
    verbose=True,
    mode='min'
)

trainer = pl.Trainer(
    log_every_n_steps=1,
    max_epochs=150,
    overfit_batches=1,
    callbacks=[early_stop_overfit_callback]
)

# image, target = train[0]
# # print(f'image: {image.shape}, target: {target.shape}')
# result = torch.argmax(target, dim=0)
# print(f'image: {image.shape}, result: {result.shape}, value range: {torch.min(result)}-{torch.max(result)}')

trainer.fit(model, DataLoader(train, shuffle=False, batch_size=1), DataLoader(val, shuffle=False, batch_size=1))

In [23]:
# visualize the overfitted results
img, target = train[0]
print(f"Target shape: {target.shape}, value range: {torch.min(target)}-{torch.max(target)}")
result = torch.argmax(model(img.unsqueeze(0)), dim=1)
print(f"Result shape: {result.shape}, value range: {torch.min(result)}-{torch.max(result)}")
pil_transform = transforms.ToPILImage()
pil_transform(img).show(title="image")
pil_transform(DTSegmentationDataset.label_img_to_rgb(target)).show(title="ground truth")
pil_transform(DTSegmentationDataset.label_img_to_rgb(result[0])).show(title="prediction")

Target shape: torch.Size([480, 640]), value range: 0-3
Result shape: torch.Size([1, 480, 640]), value range: 0-3


### Training the model

In [34]:
hparams = {
    # --- Model ---
    # | Model hyperparameters
    'num_classes': 5,
    # | Optimization hyperparameters
    "learning_rate": 0.0625,
    "weight_decay": 0.000000625,
    "lr_decay": 0.25,
    
    # --- Dataloader (Hardware-specific) ---
    "batch_size": 16,
    "num_workers": 4,
}

model = DTSegmentationNetwork(hparams)

# Training procedure
early_stop_callback = EarlyStopping(
    monitor='val_loss',
    patience=7,
    min_delta=0.01,
    verbose=True,
    mode='min'
)

trainer = pl.Trainer(
    max_epochs=100,
    log_every_n_steps=1,
    # For new MacBooks
    # accelerator="mps",
    # devices=1,
    callbacks=[early_stop_callback]
)

train_dataloader = DataLoader(train, batch_size=hparams['batch_size'], num_workers=hparams['num_workers'], shuffle=True)
val_dataloader = DataLoader(val, batch_size=hparams['batch_size'], num_workers=hparams['num_workers'])

trainer.fit(model, train_dataloader, val_dataloader)

model.save("model.pt")

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type   | Params
---------------------------------
0 | model | LRASPP | 3.2 M 
---------------------------------
246 K     Trainable params
3.0 M     Non-trainable params
3.2 M     Total params
12.875    Total estimated model params size (MB)


Epoch 0: 100%|██████████| 17/17 [00:57<00:00,  3.40s/it, loss=0.708, v_num=0]

Metric val_loss improved. New best score: 1.087


Epoch 1: 100%|██████████| 17/17 [01:18<00:00,  4.61s/it, loss=0.498, v_num=0]

Metric val_loss improved by 0.175 >= min_delta = 0.01. New best score: 0.912


Epoch 2: 100%|██████████| 17/17 [01:15<00:00,  4.44s/it, loss=0.394, v_num=0]

Metric val_loss improved by 0.227 >= min_delta = 0.01. New best score: 0.686


Epoch 3: 100%|██████████| 17/17 [01:15<00:00,  4.42s/it, loss=0.354, v_num=0]

Metric val_loss improved by 0.160 >= min_delta = 0.01. New best score: 0.525


Epoch 6: 100%|██████████| 17/17 [01:16<00:00,  4.49s/it, loss=0.331, v_num=0]

Metric val_loss improved by 0.093 >= min_delta = 0.01. New best score: 0.432


Epoch 8: 100%|██████████| 17/17 [01:15<00:00,  4.45s/it, loss=0.319, v_num=0]

Metric val_loss improved by 0.051 >= min_delta = 0.01. New best score: 0.381


Epoch 11: 100%|██████████| 17/17 [01:15<00:00,  4.45s/it, loss=0.282, v_num=0]

Metric val_loss improved by 0.046 >= min_delta = 0.01. New best score: 0.335


Epoch 12: 100%|██████████| 17/17 [01:16<00:00,  4.47s/it, loss=0.307, v_num=0]

Metric val_loss improved by 0.031 >= min_delta = 0.01. New best score: 0.304


Epoch 16: 100%|██████████| 17/17 [01:15<00:00,  4.43s/it, loss=0.262, v_num=0]Epoch 00017: reducing learning rate of group 0 to 1.5625e-02.
Epoch 17: 100%|██████████| 17/17 [01:17<00:00,  4.58s/it, loss=0.246, v_num=0]

Metric val_loss improved by 0.016 >= min_delta = 0.01. New best score: 0.288


Epoch 22: 100%|██████████| 17/17 [01:17<00:00,  4.56s/it, loss=0.201, v_num=0]

Metric val_loss improved by 0.022 >= min_delta = 0.01. New best score: 0.267


Epoch 28: 100%|██████████| 17/17 [01:16<00:00,  4.49s/it, loss=0.211, v_num=0]

Metric val_loss improved by 0.012 >= min_delta = 0.01. New best score: 0.255


Epoch 32: 100%|██████████| 17/17 [01:15<00:00,  4.43s/it, loss=0.197, v_num=0]Epoch 00033: reducing learning rate of group 0 to 3.9062e-03.
Epoch 35: 100%|██████████| 17/17 [01:15<00:00,  4.47s/it, loss=0.218, v_num=0]

Monitored metric val_loss did not improve in the last 7 records. Best score: 0.255. Signaling Trainer to stop.


Epoch 35: 100%|██████████| 17/17 [01:15<00:00,  4.47s/it, loss=0.218, v_num=0]
Saving model... model.pt


### Prediction test

In [21]:
model = torch.load("model_v3_0_302.pt")

In [52]:
# visualize the model results
img, target = val[13]
print(f"Target shape: {target.shape}, value range: {torch.min(target)}-{torch.max(target)}")
result = torch.argmax(model(img.unsqueeze(0)), dim=1)
print(f"Result shape: {result.shape}, value range: {torch.min(result)}-{torch.max(result)}")
pil_transform = transforms.ToPILImage()
pil_transform(img).show(title="image")
pil_transform(DTSegmentationDataset.label_img_to_rgb(target)).show(title="ground truth")
rgb_prediction = DTSegmentationDataset.label_img_to_rgb(result[0])
pil_prediction = pil_transform(rgb_prediction)
pil_prediction.show(title="prediction")

Target shape: torch.Size([640, 480]), value range: 0-4
Result shape: torch.Size([1, 640, 480]), value range: 0-4


### Draw benchmarks

In [39]:
# Find the average coordinate of the road
road_mask = np.argwhere(rgb_prediction == DTSegmentationDataset.SEGM_LABELS['Ego Lane']['rgb_value'])
road_center_y, road_center_x = np.ceil(np.mean(road_mask, axis=0)[:2])

drawer = ImageDraw.Draw(pil_prediction)
# Draw two circles (bigger and smaller) centered on road center coordinate
RADIUS = 20
drawer.ellipse((road_center_x - RADIUS, road_center_y - RADIUS, road_center_x + RADIUS, road_center_y + RADIUS), fill='green')
drawer.ellipse((road_center_x - 5, road_center_y - 5, road_center_x + 5, road_center_y + 5), fill='orange')
# Draw a vertical line on the center of the image
drawer.line((pil_prediction.width / 2, 0, pil_prediction.width / 2, pil_prediction.height), fill='red')
# Draw a horizontal line from the center of the image to the road center
drawer.line((pil_prediction.width / 2, road_center_y, road_center_x, road_center_y), fill='blue')
# Draw two lines going from the corners of the image to the road center
drawer.line((0, 0, road_center_x, road_center_y), fill='yellow', width=2)
drawer.line((pil_prediction.width, 0, road_center_x, road_center_y), fill='yellow', width=2)

pil_prediction.show()

### Test set

In [15]:
# Load and visualize the test image
img = transforms.ToTensor()(Image.open("offline learning/semantic segmentation/data/frames_test/track_test_6.png"))
print(f"Image shape: {img.size}")
result = torch.argmax(model(img.unsqueeze(0)), dim=1)
print(f"Result shape: {result.shape}, value range: {torch.min(result)}-{torch.max(result)}")
pil_transform = transforms.ToPILImage()
pil_transform(img).show(title="image")
rgb_prediction = DTSegmentationDataset.label_img_to_rgb(result[0])
pil_prediction = pil_transform(rgb_prediction)
pil_prediction.show(title="prediction")

Image shape: <built-in method size of Tensor object at 0x7f943245b330>
Result shape: torch.Size([1, 878, 1180]), value range: 0-2
