In [1]:
import numpy as np
import torch
from torch.utils.data import DataLoader, random_split

import pytorch_lightning as pl
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

from PIL import Image, ImageDraw
from torchvision import transforms

from dataloader.dataloader import DTSegmentationDataset
from model.model import DTSegmentationNetwork

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
%load_ext autoreload
%autoreload 2

### Load the dataset

In [3]:
dataset = DTSegmentationDataset()

# Split the dataset into train and test
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train, val = random_split(dataset, [train_size, test_size])
print(f"Train size: {len(train)}, val size: {len(val)}")

Train size: 100, val size: 25


### Compute the distribution of the labels in the whole dataset for weighted loss

In [4]:
# These values will be used in the model to compute the weighted loss
labels = np.zeros(len(DTSegmentationDataset.SEGM_LABELS))
for img, target in dataset:
    for label, label_info in DTSegmentationDataset.SEGM_LABELS.items():
        labels[label_info['id']] += (target == label_info['id']).sum()
labels = labels / np.sum(labels)
print(f"Train labels distribution: {labels}")
weights = np.sum(labels) / (2 * labels)
print(f"Train labels weights: {weights}")

Train labels distribution: [0.19483307 0.03665143 0.68552852 0.08298698 0.         0.
 0.         0.        ]
Train labels weights: [ 2.56629941 13.64203167  0.72936426  6.02504158         inf         inf
         inf         inf]


  weights = np.sum(labels) / (2 * labels)


### Overfit Test

In [None]:
overfit_hparams = {
    # --- Model ---
    # | Model hyperparameters
    'num_classes': 2,
    # | Optimization hyperparameters
    "learning_rate": 0.0625,
    "weight_decay": 0.000000625,
    "lr_decay": 0.25,
    
    # --- Dataloader (Hardware-specific) ---
    "batch_size": 12,
    "num_workers": 2,
}

model = DTSegmentationNetwork(overfit_hparams)

# Overfit for testing
early_stop_overfit_callback = EarlyStopping(
    monitor='loss',
    patience=150,
    min_delta=0.0005,
    verbose=True,
    mode='min'
)

trainer = pl.Trainer(
    log_every_n_steps=1,
    max_epochs=150,
    overfit_batches=1,
    callbacks=[early_stop_overfit_callback]
)

# image, target = train[0]
# # print(f'image: {image.shape}, target: {target.shape}')
# result = torch.argmax(target, dim=0)
# print(f'image: {image.shape}, result: {result.shape}, value range: {torch.min(result)}-{torch.max(result)}')

trainer.fit(model, DataLoader(train, shuffle=False, batch_size=1), DataLoader(val, shuffle=False, batch_size=1))

In [None]:
# visualize the overfitted results
img, target = train[0]
print(f"Target shape: {target.shape}, value range: {torch.min(target)}-{torch.max(target)}")
result = torch.argmax(model(img.unsqueeze(0)), dim=1)
print(f"Result shape: {result.shape}, value range: {torch.min(result)}-{torch.max(result)}")
pil_transform = transforms.ToPILImage()
pil_transform(img).show(title="image")
pil_transform(DTSegmentationDataset.label_img_to_rgb(target)).show(title="ground truth")
pil_transform(DTSegmentationDataset.label_img_to_rgb(result[0])).show(title="prediction")

### Training the model

In [5]:
hparams = {
    # --- Model ---
    # | Model hyperparameters
    'num_classes': 4,
    # | Optimization hyperparameters
    "learning_rate": 0.0625,
    "weight_decay": 0.000000625,
    "lr_decay": 0.25,
    
    # --- Dataloader (Hardware-specific) ---
    "batch_size": 16,
    "num_workers": 4,
}
# Initialize a tensor with pre-computed weights for the weighted loss: 
# * no crop, 5 classes - (bg, mid, road, end, side): UNKNOWN, wrong values:[1.04134481, 20.79269895, 1.1516967, 50.6744693, 9.6533002]
# * 30% crop, 4 classes - (bg, mid, road, side): [2.56629941, 13.64203167,  0.72936426, 6.02504158]
class_weights = torch.tensor([2.56629941, 13.64203167,  0.72936426, 6.02504158], dtype=torch.float32)
hparams['class_weights'] = class_weights

model = DTSegmentationNetwork(hparams)

# Training procedure
early_stop_callback = EarlyStopping(
    monitor='val_loss',
    patience=7,
    min_delta=0.01,
    verbose=True,
    mode='min'
)

trainer = pl.Trainer(
    max_epochs=100,
    log_every_n_steps=1,
    # For new MacBooks
    # accelerator="mps",
    # devices=1,
    callbacks=[early_stop_callback]
)

train_dataloader = DataLoader(train, batch_size=hparams['batch_size'], num_workers=hparams['num_workers'], shuffle=True)
val_dataloader = DataLoader(val, batch_size=hparams['batch_size'], num_workers=hparams['num_workers'])

trainer.fit(model, train_dataloader, val_dataloader)

model.save("model.pt")

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type   | Params
---------------------------------
0 | model | LRASPP | 3.2 M 
---------------------------------
246 K     Trainable params
3.0 M     Non-trainable params
3.2 M     Total params
12.875    Total estimated model params size (MB)


Epoch 0: 100%|██████████| 9/9 [00:26<00:00,  2.95s/it, loss=0.883, v_num=3]

Metric val_loss improved. New best score: 0.699


Epoch 1: 100%|██████████| 9/9 [00:45<00:00,  5.09s/it, loss=0.646, v_num=3]

Metric val_loss improved by 0.198 >= min_delta = 0.01. New best score: 0.500


Epoch 2: 100%|██████████| 9/9 [00:45<00:00,  5.09s/it, loss=0.487, v_num=3]

Metric val_loss improved by 0.015 >= min_delta = 0.01. New best score: 0.485


Epoch 5: 100%|██████████| 9/9 [00:48<00:00,  5.44s/it, loss=0.277, v_num=3]

Metric val_loss improved by 0.035 >= min_delta = 0.01. New best score: 0.450


Epoch 7: 100%|██████████| 9/9 [00:46<00:00,  5.14s/it, loss=0.24, v_num=3] 

Metric val_loss improved by 0.053 >= min_delta = 0.01. New best score: 0.398


Epoch 9: 100%|██████████| 9/9 [00:46<00:00,  5.16s/it, loss=0.227, v_num=3]

Metric val_loss improved by 0.073 >= min_delta = 0.01. New best score: 0.325


Epoch 11: 100%|██████████| 9/9 [00:45<00:00,  5.08s/it, loss=0.203, v_num=3]

Metric val_loss improved by 0.058 >= min_delta = 0.01. New best score: 0.267


Epoch 15: 100%|██████████| 9/9 [00:45<00:00,  5.09s/it, loss=0.186, v_num=3]

Metric val_loss improved by 0.033 >= min_delta = 0.01. New best score: 0.233


Epoch 16: 100%|██████████| 9/9 [00:46<00:00,  5.14s/it, loss=0.184, v_num=3]

Metric val_loss improved by 0.011 >= min_delta = 0.01. New best score: 0.223


Epoch 18: 100%|██████████| 9/9 [00:45<00:00,  5.08s/it, loss=0.188, v_num=3]

Metric val_loss improved by 0.012 >= min_delta = 0.01. New best score: 0.210


Epoch 21: 100%|██████████| 9/9 [00:45<00:00,  5.08s/it, loss=0.187, v_num=3]

Metric val_loss improved by 0.013 >= min_delta = 0.01. New best score: 0.198


Epoch 25: 100%|██████████| 9/9 [00:45<00:00,  5.10s/it, loss=0.176, v_num=3]Epoch 00026: reducing learning rate of group 0 to 1.5625e-02.
Epoch 28: 100%|██████████| 9/9 [00:45<00:00,  5.07s/it, loss=0.161, v_num=3]

Monitored metric val_loss did not improve in the last 7 records. Best score: 0.198. Signaling Trainer to stop.


Epoch 28: 100%|██████████| 9/9 [00:45<00:00,  5.08s/it, loss=0.161, v_num=3]
Saving model... model.pt


### Prediction test

In [None]:
model = torch.load("model.pt")

In [None]:
# save the state dict of the model
torch.save(model.state_dict(), "model_v7_0_086_state_dict.pt")

In [12]:
# visualize the model validation results
IMAGE_WIDTH = 640
IMAGE_HEIGHT = 288
PICS_PER_ROW = 3
NUM_IMG_SIDE_BY_SIDE = 3

# Generate a grid image (with PICS_PER_ROW images per row) that has all validation targets and the predictions for corresponding images side-by-side
final_img = Image.new('RGB', (IMAGE_WIDTH * PICS_PER_ROW * NUM_IMG_SIDE_BY_SIDE, IMAGE_HEIGHT * (len(val) // PICS_PER_ROW + 1)))


for index, (img, target) in enumerate(val):
    # print(f"Target shape: {target.shape}, value range: {torch.min(target)}-{torch.max(target)}")
    result = torch.argmax(model(img.unsqueeze(0)), dim=1)
    # print(f"Result shape: {result.shape}, value range: {torch.min(result)}-{torch.max(result)}")
    pil_transform = transforms.ToPILImage()
    img_pil = pil_transform(img)
    
    # Add the image to the final image
    width_offset = (IMAGE_WIDTH * NUM_IMG_SIDE_BY_SIDE) * (index % PICS_PER_ROW)
    height_offset = IMAGE_HEIGHT * (index // PICS_PER_ROW)
    final_img.paste(img_pil, (width_offset, height_offset))
    
    pil_target = pil_transform(DTSegmentationDataset.label_img_to_rgb(target))
    
    # Add the target to the final image
    width_offset = (IMAGE_WIDTH * NUM_IMG_SIDE_BY_SIDE) * (index % PICS_PER_ROW) + IMAGE_WIDTH
    height_offset = IMAGE_HEIGHT * (index // PICS_PER_ROW)
    final_img.paste(pil_target, (width_offset, height_offset))
    
    pil_prediction = pil_transform(DTSegmentationDataset.label_img_to_rgb(result[0]))
    
    # Identify two lines in the image: the middle lane and the side lane using Hough transform
    
    drawer = ImageDraw.Draw(pil_prediction)
    # Fit a line to the middle lane pixels and draw it on the prediction and the target
    middle_lane_pixels = torch.nonzero(target == DTSegmentationDataset.SEGM_LABELS['Middle Lane']['id'])
    if len(middle_lane_pixels) > 0:
        x = middle_lane_pixels[:, 1]
        y = middle_lane_pixels[:, 0]
        # Find the mass center for the middle lane pixels
        mass_center = (int(np.mean(x)), int(np.mean(y)))
        # Draw 
        

        # drawer.line((0, b, IMAGE_WIDTH, m * IMAGE_WIDTH + b), fill='red', width=10)
    # # Fit a line to the side lane pixels and draw it on the prediction and the target
    # side_lane_pixels = torch.nonzero(target == DTSegmentationDataset.SEGM_LABELS['Side Lane']['id'])
    # if len(side_lane_pixels) > 0:
    #     x = side_lane_pixels[:, 1]
    #     y = side_lane_pixels[:, 0]
    #     m, b = np.polyfit(x, y, 1)
    #     drawer.line((0, b, IMAGE_WIDTH, m * IMAGE_WIDTH + b), fill='black', width=10)
    

    # Add the prediction to the final image
    width_offset = (IMAGE_WIDTH * NUM_IMG_SIDE_BY_SIDE) * (index % PICS_PER_ROW) + (IMAGE_WIDTH * 2)
    height_offset = IMAGE_HEIGHT * (index // PICS_PER_ROW)
    final_img.paste(pil_prediction, (width_offset, height_offset))
    
final_img.show()

TypeError: expected 1D vector for x

### Draw benchmarks

In [None]:
# Crop the prediction to focus on the lower part of the image
rgb_prediction = rgb_prediction[rgb_prediction.shape[0] // 2:, :, :]
pil_prediction = pil_transform(rgb_prediction)

# Find the average coordinate of the road
road_mask = np.argwhere(rgb_prediction == DTSegmentationDataset.SEGM_LABELS['Ego Lane']['rgb_value'])
road_center_y, road_center_x = np.ceil(np.mean(road_mask, axis=0)[:2])

drawer = ImageDraw.Draw(pil_prediction)
# Draw two circles (bigger and smaller) centered on road center coordinate
RADIUS = 20
drawer.ellipse((road_center_x - RADIUS, road_center_y - RADIUS, road_center_x + RADIUS, road_center_y + RADIUS), fill='green')
drawer.ellipse((road_center_x - 5, road_center_y - 5, road_center_x + 5, road_center_y + 5), fill='orange')
# Draw a vertical line on the center of the image
drawer.line((pil_prediction.width / 2, 0, pil_prediction.width / 2, pil_prediction.height), fill='red')
# Draw a horizontal line from the center of the image to the road center
drawer.line((pil_prediction.width / 2, road_center_y, road_center_x, road_center_y), fill='blue')
# Draw two lines going from the corners of the image to the road center
drawer.line((0, 0, road_center_x, road_center_y), fill='yellow', width=2)
drawer.line((pil_prediction.width, 0, road_center_x, road_center_y), fill='yellow', width=2)

pil_prediction.show()

### Test set

In [None]:
# Load and visualize the test image
img = transforms.ToTensor()(Image.open("offline learning/semantic segmentation/data/frames_test/track_test_6.png"))
print(f"Image shape: {img.size}")
result = torch.argmax(model(img.unsqueeze(0)), dim=1)
print(f"Result shape: {result.shape}, value range: {torch.min(result)}-{torch.max(result)}")
pil_transform = transforms.ToPILImage()
pil_transform(img).show(title="image")
rgb_prediction = DTSegmentationDataset.label_img_to_rgb(result[0])
pil_prediction = pil_transform(rgb_prediction)
pil_prediction.show(title="prediction")

### Online Predictions

In [None]:
import time
import requests
import io

In [None]:
model.eval()

time_start = time.time()
r = requests.get('http://192.168.0.108:1318/image')
time_request = time.time() - time_start
pil_image = Image.open(io.BytesIO(r.content)).convert("RGB")
image = transforms.ToTensor()(pil_image)
time_convert = time.time() - (time_request + time_start)

pred = torch.argmax(model(image.unsqueeze(0)), dim=1)[0]
time_prediction = time.time() - (time_convert + time_start)

title = f"{time.time()}"
pil_image.save(f"testing/out/image_{title}.png")
pil_image.show(title="image")

pil_transform = transforms.ToPILImage()
rgb_prediction = DTSegmentationDataset.label_img_to_rgb(pred)
pil_prediction = pil_transform(rgb_prediction)
pil_prediction.save(f"testing/out/prediction_{title}.png")
pil_prediction.show(title="prediction")

print(f"""
Request time: {time_request:.3f}s
Convert time: {time_convert:.3f}s
Prediction time: {time_prediction:.3f}s
Total time: {(time_request + time_convert + time_prediction):.3f}s
    """)