In [2]:
import ast
import time
import os

import torch
from torch.utils.data import DataLoader
import torchvision.transforms as transform
from torch.cuda.amp import GradScaler, autocast
from tqdm import tqdm

from src.dataset.datasets import CTrainingDataset, CValidationDataset
from src.reader.readers import CTrainingReader, CValidationReader
from src.utils.parameters import CRGBType
from src.utils.parameters import CImageSize

In [3]:
torch.cuda.empty_cache()

In [4]:
training_path = r"C:\Git\AUDI_A2D2_dataset\training"
validation_path = r"C:\Git\AUDI_A2D2_dataset\validation"

In [5]:
trainingReader = CTrainingReader(training_path)
validationReader = CValidationReader(validation_path)

In [6]:
image_transform = transform.Resize((CImageSize.WIDTH, CImageSize.HEIGHT), interpolation=transform.InterpolationMode.BILINEAR)
target_transform = transform.Resize((CImageSize.WIDTH, CImageSize.HEIGHT), interpolation=transform.InterpolationMode.NEAREST)

In [7]:
trainingDataset = CTrainingDataset(trainingReader, CRGBType, image_transform, target_transform)
validationDataset = CValidationDataset(validationReader, CRGBType, image_transform, target_transform)

In [8]:
batch_size = 16
num_epochs = 10
epoch_peak = 2
lr = 0.01
lr_warmup_ratio = 1.0
lr_decay_per_epoch = 1.0
iterations = 10e5
log_freq = 1
eval_size = 30
checkpoint_dir = r"C:\Git\lane-detection\src\checkpoints"

In [9]:
train_loader = DataLoader(trainingDataset, batch_size=batch_size, shuffle=True, num_workers=6, pin_memory=True, drop_last=True, prefetch_factor=True, persistent_workers=True)
val_loader = DataLoader(validationDataset, batch_size=batch_size, shuffle=False, num_workers=6, pin_memory=True, drop_last=True, prefetch_factor=True, persistent_workers=True)

In [10]:
model = torch.hub.load("pytorch/vision:v0.9.1", "deeplabv3_mobilenet_v3_large", weights=None, num_classes=55)
model.train()

Using cache found in C:\Users\david/.cache\torch\hub\pytorch_vision_v0.9.1


DeepLabV3(
  (backbone): IntermediateLayerGetter(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): Hardswish()
    )
    (1): InvertedResidual(
      (block): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        )
      )
    )
    (2): InvertedResidual(
      (block): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 64, kernel_size=(1, 1), stride

In [11]:
amp = ast.literal_eval("True")
ce = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.95)
scaler = GradScaler(enabled=amp)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

DeepLabV3(
  (backbone): IntermediateLayerGetter(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): Hardswish()
    )
    (1): InvertedResidual(
      (block): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        )
      )
    )
    (2): InvertedResidual(
      (block): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 64, kernel_size=(1, 1), stride

In [12]:
for epoch in range(0, num_epochs):
    # custom LR schedule. Warmup with peak and exponential decay
    if epoch <= epoch_peak:
        start_lr = lr * lr_warmup_ratio
        lr = start_lr + (epoch / epoch_peak) * (lr - start_lr)
    else:
        lr = lr * (lr_decay_per_epoch) ** (epoch - epoch_peak)

    print("In epoch {} learning rate: {:.10f}".format(epoch, lr))
    for p in optimizer.param_groups:
        p["lr"] = lr

    bstart = time.time()
    correct = 0
    total = 0
    train_loader = tqdm(train_loader)
    for i, batch in enumerate(train_loader):

        # if want to train for less than 1 epoch
        if i > iterations:
            break

        model.train()
        inputs = batch[0].to(device)
        masks = batch[1].to(device)
        optimizer.zero_grad()

        with autocast(enabled=amp):
            outputs = model(inputs)
            loss = ce(outputs["out"], masks.long())
            _, predicted = torch.max(outputs["out"], 1)
            total += masks.nelement()
            correct += (predicted == masks.long()).sum().item()

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        if i > 0 and (i / float(log_freq)).is_integer():
            train_accuracy = 100 * correct / total

            bstop = time.time()
            val_losses = []
            correct = 0
            total = 0
            model.eval()
            val_loader = tqdm(val_loader)
            with torch.no_grad():
                # validation dataloader takes 30s to load first batch :(...
                for j, batch in enumerate(val_loader):
                    inputs = batch[0].to(device)
                    masks = batch[1].to(device)
                    outputs = model(inputs)
                    val_loss = ce(outputs["out"], masks)
                    _, predicted = torch.max(outputs["out"], 1)
                    total += masks.nelement()
                    correct += (predicted == masks).sum().item()
                    val_losses.append(val_loss)
                    if j * batch_size >= eval_size:  # evaluate on a subset of val set
                        break
            avg_val_loss = torch.mean(torch.stack(val_losses))
            val_accuracy = 100 * correct / total

            # print metrics
            throughput = float((i + 1) * batch_size) / (bstop - bstart)
            print("processed {} records in {}s".format(i * batch_size, bstop - bstart))
            print(
                "batch {}: Training_loss: {:.4f}, Val_loss: {:.4f}, Train_accuracy: {:.2f}%, Val_accuracy: {:.2f}%".format(
                    i, loss, avg_val_loss, train_accuracy, val_accuracy
                )
            )

            # save model twice ("latest" and versioned)
            checkpoint_name = "model-epoch{}-iter{}.pth".format(epoch, i)
            torch.save(model, os.path.join(checkpoint_dir, checkpoint_name))
            torch.save(model, os.path.join(checkpoint_dir, "latest_model.pth"))

# we save the final model in the checkpoint location, for consistency
torch.save(model, os.path.join(checkpoint_dir, "final_model.pth"))

In epoch 0 learning rate: 0.0010000000


  2%|▏         | 2/117 [00:42<40:16, 21.01s/it]/it]


processed 16 records in 39.57758331298828s
batch 1: Training_loss: 4.1288, Val_loss: 4.0365, Train_accuracy: 0.16%, Val_accuracy: 0.22%


  2%|▏         | 2/117 [00:23<22:13, 11.60s/it]/it]


processed 32 records in 83.09365153312683s
batch 2: Training_loss: 4.1068, Val_loss: 4.0362, Train_accuracy: 0.24%, Val_accuracy: 0.34%


  2%|▏         | 2/117 [00:23<22:04, 11.52s/it]/it]


processed 48 records in 107.586909532547s
batch 3: Training_loss: 4.0887, Val_loss: 4.0345, Train_accuracy: 0.35%, Val_accuracy: 0.46%


  0%|          | 0/117 [00:05<?, ?it/s], 30.19s/it]
  0%|          | 4/2462 [02:16<23:22:28, 34.23s/it]


KeyboardInterrupt: 