In [3]:
import torch
import torch.nn as nn
from torchsummary import summary
print(torch.__version__)
print(torch.cuda.is_available())
class DepthwiseSeparableConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1):
        super(DepthwiseSeparableConv2d, self).__init__()
        self.depthwise_conv = nn.Conv2d(in_channels, in_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=in_channels)
        self.pointwise_conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
        
    def forward(self, x):
        x = self.depthwise_conv(x)
        x = self.pointwise_conv(x)
        return x

class MobileNetEncoder(nn.Module):
    def __init__(self, in_channels=3):
        super(MobileNetEncoder, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels, 8, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(8),
            nn.ReLU(inplace=True)
        )
        self.conv2 = nn.Sequential(
            DepthwiseSeparableConv2d(8, 16),
            nn.BatchNorm2d(16),
            nn.ReLU(inplace=True),
            nn.Conv2d(16, 16, kernel_size=1, stride=2, padding=0),
            nn.BatchNorm2d(16),
            nn.ReLU(inplace=True)
        )
        self.conv3 = nn.Sequential(
            DepthwiseSeparableConv2d(16, 16),
            nn.BatchNorm2d(16),
            nn.ReLU(inplace=True),
            nn.Conv2d(16, 32, kernel_size=1, stride=2, padding=0),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True)
        )
        self.conv4 = nn.Sequential(
            DepthwiseSeparableConv2d(32, 32),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 64, kernel_size=1, stride=2, padding=0),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True)
        )
        self.conv5 = nn.Sequential(
            DepthwiseSeparableConv2d(64, 64),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 128, kernel_size=1, stride=2, padding=0),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True)
        )
        self.conv6 = nn.Sequential(
            DepthwiseSeparableConv2d(128, 128),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 256, kernel_size=1, stride=2, padding=0),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        x1 = self.conv1(x)
        x2 = self.conv2(x1)
        x3 = self.conv3(x2)
        x4 = self.conv4(x3)
        x5 = self.conv5(x4)
        x6 = self.conv6(x5)
        return x1, x2, x3, x4, x5, x6

class UNet(nn.Module):
    def __init__(self, n_channels=3, n_classes=2):
        super(UNet, self).__init__()
        
        # Encoder
        self.encoder = MobileNetEncoder(in_channels=n_channels)
        
        # Decoder

        self.upconv5 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.iconv5 = nn.Conv2d(256, 128, kernel_size=1)
        self.decoder_block5_1 = nn.Sequential(
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
            nn.Conv2d(256, 128, kernel_size=1, bias=False),
            nn.ReLU(inplace=True),
        )
        self.decoder_block5_2 = nn.Sequential(
            nn.Conv2d(128*2, 128, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
        )

        self.upconv4 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.iconv4 = nn.Conv2d(128, 64, kernel_size=1)
        self.decoder_block4_1 = nn.Sequential(
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
            nn.Conv2d(128, 64, kernel_size=1, bias=False),
            nn.ReLU(inplace=True),
        )
        self.decoder_block4_2 = nn.Sequential(
            nn.Conv2d(64*2, 64, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
        )


        self.upconv3 = nn.ConvTranspose2d(64, 32, kernel_size=2, stride=2)
        self.iconv3 = nn.Conv2d(64, 32, kernel_size=1)
        self.decoder_block3_1 = nn.Sequential(
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
            nn.Conv2d(64, 32, kernel_size=1, bias=False),
            nn.ReLU(inplace=True),
        )
        self.decoder_block3_2 = nn.Sequential(
            nn.Conv2d(32*2, 32, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
        )
        
        self.upconv2 = nn.ConvTranspose2d(32, 16, kernel_size=2, stride=2)
        self.iconv2 = nn.Conv2d(32, 16, kernel_size=1)
        self.decoder_block2_1 = nn.Sequential(
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
            nn.Conv2d(32, 16, kernel_size=1, bias=False),
            nn.ReLU(inplace=True),
        )
        self.decoder_block2_2 = nn.Sequential(
            nn.Conv2d(16*2, 16, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU(inplace=True),
        )
        
        self.upconv1 = nn.ConvTranspose2d(16, 8, kernel_size=2, stride=2)
        # print(self.upconv1.shape)
        self.iconv1 = nn.Conv2d(16, 8, kernel_size=1)
        self.decoder_block1_1 = nn.Sequential(
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
            nn.Conv2d(16, 8, kernel_size=1, bias=False),
            nn.ReLU(inplace=True),
        )
        self.decoder_block1_2 = nn.Sequential(
            nn.Conv2d(8*2, 8, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(8),
            nn.ReLU(inplace=True),
        )
        self.C2_layer = nn.Conv2d(256, 256, kernel_size=1)
        self.C3_layer = nn.Conv2d(8, 8, kernel_size=1)
        self.C3_layer = nn.Sequential(
            nn.Conv2d(8, 8, kernel_size=1),
            nn.BatchNorm2d(8))
        self.output_layer = nn.Conv2d(8, n_classes, kernel_size=1)
        
    def forward(self, x):
        # print("Input shape: ", x.shape)
        x1, x2, x3, x4, x5, x6 = self.encoder(x)
        # print("Encoder output shapes: ", x1.shape, x2.shape, x3.shape, x4.shape, x5.shape, x6.shape)
        y6 = self.C2_layer(x6)
        # print("C2 layer shape: ", y6.shape)
        # UpSample
        y5 = self.decoder_block5_1(y6)
        # Concatenation with skip connection
        y5 = torch.cat([x5, y5], dim=1)
        # Conv2
        y5 = self.decoder_block5_2(y5)
        # print("1 - Decoder layer shape: ", y5.shape)
        # UpSample
        y4 = self.decoder_block4_1(y5)
        # Concatenation with skip connection
        y4 = torch.cat([x4, y4], dim=1)
        # Conv2
        y4 = self.decoder_block4_2(y4)
        # print("2 - Decoder layer shape: ", y4.shape)
        # UpSample
        y3 = self.decoder_block3_1(y4)
        # Concatenation with skip connection
        y3 = torch.cat([x3, y3], dim=1)
        # Conv2
        y3 = self.decoder_block3_2(y3)
        # print("3 - Decoder layer shape: ", y3.shape)
        # UpSample
        y2 = self.decoder_block2_1(y3)
        # Concatenation with skip connection
        y2 = torch.cat([x2, y2], dim=1)
        # Conv2
        y2 = self.decoder_block2_2(y2)
        # print("4 - Decoder layer shape: ", y2.shape)

        # UpSample
        y1 = self.decoder_block1_1(y2)
        # Concatenation with skip connection
        y1 = torch.cat([x1, y1], dim=1)
        # Conv2
        y1 = self.decoder_block1_2(y1)
        # print("5 - Decoder layer shape: ", y1.shape)

        out = self.output_layer(y1)
        # print("6 - Output layer shape: ", out.shape)
        return out


1.12.0+cu116
True


In [4]:
import argparse
import json
import os
import shutil
import time
import numpy as np
import random
import cv2
import os
import json
import torch.optim as optim
from torch.utils.data import DataLoader
from tqdm import tqdm

# Set seed for randomize functions (Ez reproduction of results)
random.seed(100)

# Import TuSimple loader
import sys
sys.path.insert(0,'../resources/')
from tusimple import TuSimple

# ROOT DIRECTORIES
root_dir = os.path.dirname(os.getcwd())
annotated_dir = os.path.join(root_dir,'datasets/tusimple/train_set/annotations')
clips_dir = os.path.join(root_dir,'datasets/tusimple/train_set/')
annotated = os.listdir(annotated_dir)

# Get path directories for clips and annotations for the TUSimple dataset + ground truth dictionary
annotations = list()
for gt_file in annotated:
    path = os.path.join(annotated_dir,gt_file)
    json_gt = [json.loads(line) for line in open(path)]
    annotations.append(json_gt)
    
annotations = [a for f in annotations for a in f]

dataset = TuSimple(train_annotations = annotations, train_img_dir = clips_dir, resize_to = (640,640), subset_size = 0.15)

In [7]:
dataloader = DataLoader(dataset, batch_size=8, shuffle=True)
len(dataloader)

68

In [8]:
import torch.optim as optim
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Instantiate the UNet model and move it to the device
model = UNet().to(device)
# print(model)
# Define the loss function and the optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# summary(model, input_size=(3, 640, 640), device='cpu')
# Loop through the training dataset and perform the training
for epoch in range(10):
    running_loss = 0.0
    for i, (inputs, targets) in enumerate(dataloader):
        # Move the input images and the target masks to the same device as the model
        inputs, targets = inputs.to(device), targets["gt_tensor"].to(device)
        # Zero the gradients of the optimizer
        optimizer.zero_grad()
        # inputs = inputs.unsqueeze(0)
        # print(inputs.shape)
        # Forward pass the input images through the model to get the predictions
        outputs = model(inputs)

        outputs = torch.argmax(outputs, dim=1).unsqueeze(1).repeat(1,3,1,1).float()
        outputs.requires_grad = True
        # Compute the loss between the predictions and the target masks
        loss = criterion(outputs, targets)

        # Backward propagate the loss through the model to compute the gradients
        loss.backward()

        # Update the model parameters using the optimizer
        optimizer.step()
        
        # Update the running loss
        running_loss += loss.item() * inputs.size(0)
    
    #Print the average loss for the epoch
    epoch_loss = running_loss / len(dataloader)
    print(f"Epoch {epoch+1}/{10} - Loss: {epoch_loss:.4f}")

Epoch 1/10 - Loss: 9.8206
Epoch 2/10 - Loss: 9.8362
Epoch 3/10 - Loss: 9.8252
Epoch 4/10 - Loss: 9.8276
Epoch 5/10 - Loss: 9.8259
Epoch 6/10 - Loss: 9.8212
Epoch 7/10 - Loss: 9.8313
Epoch 8/10 - Loss: 9.8310
Epoch 9/10 - Loss: 9.8247
Epoch 10/10 - Loss: 9.8235


In [108]:
model.eval()
img_tns, gt = dataset[0]
print(img_tns.shape)
img_tns_ = img_tns.unsqueeze(0)
print(img_tns_.shape)
img_tns_ = img_tns_.to('cuda')
pred_mask = model(img_tns_)

torch.Size([3, 640, 640])
torch.Size([1, 3, 640, 640])


In [109]:
pred_mask = torch.argmax(pred_mask, dim=1).unsqueeze(1).repeat(1,3,1,1).float()
# print(pred_mask[0].unique())
print(pred_mask[0])
print(gt["gt_tensor"])
# print(gt["gt_tensor"].unique())

tensor([[[0., 0., 0.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [0., 1., 1.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 0.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [0., 0., 0.,  ..., 1., 1., 1.]],

        [[0., 0., 0.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [0., 1., 1.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 0.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [0., 0., 0.,  ..., 1., 1., 1.]],

        [[0., 0., 0.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [0., 1., 1.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 0.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [0., 0., 0.,  ..., 1., 1., 1.]]], device='cuda:0')
tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0

In [110]:
print(pred_mask[0].shape)
pred_mask = pred_mask[0].cpu().detach().numpy()
dataset.plot_img_gt(gt["gt_tensor"], pred_mask)
# img_tns.shape

torch.Size([3, 640, 640])


ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 0, the array at index 0 has size 640 and the array at index 1 has size 3