In [1]:
pip install torchmetrics

Collecting torchmetrics
  Downloading torchmetrics-1.2.0-py3-none-any.whl (805 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m805.2/805.2 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.10.0-py3-none-any.whl (24 kB)
Installing collected packages: lightning-utilities, torchmetrics
Successfully installed lightning-utilities-0.10.0 torchmetrics-1.2.0


In [2]:
import numpy as np
from PIL import Image, ImageColor
from google.colab import drive
import skimage.io as io
import numpy as np
from scipy import ndimage
from torchmetrics.classification import Dice

In [3]:
drive.mount('/content/drive')

drive_path = '/content/drive/My Drive/CarData/'

Mounted at /content/drive


In [4]:
parts = {10: {'col': 'orange', 'name':'hood'},
         20: {'col':'darkgreen', 'name':'front door'},
         30: {'col':'yellow', 'name':'rear door'},
         40: {'col':'cyan', 'name':'frame'},
         50: {'col':'purple', 'name':'rear quarter panel'},
         60: {'col':'lightgreen', 'name':'trunk lid'},
         70: {'col':'blue', 'name':'fender'},
         80: {'col':'pink', 'name':'bumper'},
         90: {'col':'darkgray', 'name':'rest of car'},
         0 : {'col':'black', 'name':'background'}}

def display_car(data_arr):
    # Can take both full data and already split data
    if type(data_arr) == torch.Tensor: data_arr = np.moveaxis(data_arr.numpy().astype(np.uint8), 0, 2)
    elif data_arr.shape[0] == 3: data_arr = np.moveaxis(data_arr.astype(np.uint8), 0, 2)
    elif data_arr.shape[2] > 3: data_arr = data_arr[:,:,:3]
    img = Image.fromarray(data_arr)
    display(img) # img.show() for jupyter

def display_labels(data_arr):
    # Can take both full data and already split data
    if type(data_arr) == torch.Tensor: data_arr = data_arr.numpy()
    if data_arr.dtype != np.uint8: data_arr = data_arr.astype(np.uint8)*10
    if data_arr.ndim > 2: data_arr = data_arr[:,:,3]
    img = Image.fromarray(data_arr)
    pixels = list(img.getdata())
    pixels = [ImageColor.getrgb(parts.get(pixel)['col']) for pixel in pixels]
    image = Image.new("RGB", (256, 256), (0,0,0))
    image.putdata(pixels)
    display(image)

def numpy_to_tensor(arr):
    return np.moveaxis(arr, 2, 0).astype(np.float32)

def tensor_to_numpy(tens):
    arr = np.moveaxis(tens, 0, 2).astype(np.uint8)
    return arr

In [5]:
def add_background(car, labels, background):
    mask = (labels == 0)
    updated_car = np.where(mask[..., None], background, car)
    updated_car = updated_car.squeeze()

    return updated_car


In [6]:
def center_square(img):
    """Returns the cropped central square of an image (crops the largest dimension to match the smallest one)"""
    if img.size[0] == img.size[1]: return img
    smallest_dim = np.argmin(img.size)
    largest_dim = np.argmax(img.size)
    square_dim = img.size[smallest_dim]
    crop_dims = [0,0,0,0]
    crop_dims[largest_dim] = int(img.size[largest_dim]/2-square_dim/2)
    crop_dims[largest_dim+2] = int(img.size[largest_dim]/2+square_dim/2)
    crop_dims[smallest_dim] = 0
    crop_dims[smallest_dim+2] = img.size[smallest_dim]
    crop_img = img.crop(crop_dims)

    return crop_img

def set_background(car_arr, labels_arr, img):
    """Places all non-0 pixels of the car on the background img"""
    center_img = center_square(img)
    back_arr = np.array(center_img.resize(labels_arr.shape))
    # Use both car and labels just in case
    back_arr[labels_arr!=0] = car_arr[labels_arr!=0]

    # In the black car dataset, label pixel count should be similar to non-black pixel count
    if np.sum(car_arr!=0)/3 < np.sum(labels_arr!=0)*1.2:
        # In the black dataset, part of the car isn't correctly labeled, so also use car data for setting background
        back_arr[car_arr!=0] = car_arr[car_arr!=0]

    return back_arr

def move_full_car(arr, x, y, angle=0):
    """Moves the center of the car to (x, y). Takes the whole array (car AND labels)"""
    car_idxs = np.where(arr!=0)
    car_bbox = [max(0,np.min(car_idxs[1])-10), max(0,np.min(car_idxs[0])-10), min(255, np.max(car_idxs[1])+10), min(255,np.max(car_idxs[0])+10)]
    # Array with just the car
    car_arr = arr[car_bbox[1]:car_bbox[3],car_bbox[0]:car_bbox[2]]
    # Rotate the car
    car_arr = ndimage.rotate(car_arr, angle, reshape=True, order=0)
    # Edges of the car in the new array (without taking into account new image borders)
    edges = [y-np.ceil(car_arr.shape[0]/2),y+np.floor(car_arr.shape[0]/2),x-np.ceil(car_arr.shape[1]/2),x+np.floor(car_arr.shape[1]/2)]
    # Where to crop the car if it goes off bounds
    car_limits = [max(0,-1*int(edges[0])), 255-int(edges[1]) if 255-int(edges[1]) < 0 else car_arr.shape[0], max(0,-1*int(edges[2])), 255-int(edges[3]) if 255-int(edges[3]) < 0 else car_arr.shape[1]]
    edges = [max(0,int(edges[0])), min(255, int(edges[1])), max(0,int(edges[2])), min(255, int(edges[3]))]

    new_arr = np.zeros(arr.shape)
    new_arr[edges[0]:edges[1],edges[2]:edges[3]] = car_arr[car_limits[0]:car_limits[1],car_limits[2]:car_limits[3]]

    return new_arr.astype(np.uint8)

In [7]:
import os
import numpy as np
from torch.utils.data import Dataset
from skimage.transform import resize
from PIL import Image
import random

class CarDataset(Dataset):
    def __init__(self, root, file_list: list=None, backgrounds: list=None, background_root: str=None, move_car: bool=False, rotate_car: bool=False):

        self.root = root
        self.filenames = os.listdir(self.root) if file_list is None else file_list
        if backgrounds is not None:
          self.backgrounds = backgrounds
        else:
          self.backgrounds = None
        if background_root is not None:
            self.backgrounds = [os.path.join(background_root, filename) for filename in os.listdir(background_root)]
        self.move_car = move_car
        self.rotate_car = rotate_car


    def __len__(self):
        return len(self.filenames)

    # def __getitem__(self, index):
        # filename = self.filenames[index]
        # arr = np.load(os.path.join(self.root, filename))
        # car = arr[:,:,0:3]
        # labels = arr[:,:,3]

        # if self.backgrounds is not None:
        #   background = random.choices(self.backgrounds)
        #   car = add_background(car, labels, background)

        # car = car.astype(np.float32)
        # car = np.moveaxis(car, 2, 0)

        # return car, labels/10

    def __getitem__(self, index):
        filename = self.filenames[index]
        arr = np.load(os.path.join(self.root, filename))
        # Don't move car with 10% probability
        if self.move_car and random.randrange(0,10)>0:
            x = random.randrange(80,160)
            y = random.randrange(100,210)
            angle = random.randrange(-30,30) if self.rotate_car else 0
            arr = move_full_car(arr, x, y, angle)
        car = arr[:,:,0:3]
        labels = arr[:,:,3]
        if self.backgrounds is not None:
          background = random.choices(self.backgrounds)
          car = add_background(car, labels, background)
        # if self.background_root is not None and 'photo' not in filename and random.randrange(0,10)>0:
        #     rand_idx = random.randrange(0,len(self.backgrounds))
        #     img = Image.open(self.backgrounds[rand_idx]).convert('RGB')
        #     car = set_background(car, labels, img)
        # Resize function does not work on int dtypes, so we must convert to float before
        car = resize(car.astype(np.float32), (256, 256))
        # Convert to the torch tensor image convention (CxWxH)
        car = np.moveaxis(car, 2, 0)

        labels = resize(labels.astype(np.float32), (256, 256))

        return torch.tensor(car), torch.tensor(labels/10)


In [8]:
import os
import numpy as np
from PIL import Image

def load_images_from_folder(folder_path, resize_shape=(256, 256), limit=100):
    image_list = []
    count = 0

    for filename in os.listdir(folder_path):
        # Check if the file is an image file (you can add more extensions if needed)
        if filename.endswith(".jpg") or filename.endswith(".png") or filename.endswith(".jpeg"):
            file_path = os.path.join(folder_path, filename)

            # Open the image using PIL
            img = Image.open(file_path)

            # Resize the image
            img = img.resize(resize_shape)

            # Convert the image to a numpy array and append to the list
            image_array = np.array(img)
            image_list.append(image_array)

            count += 1
            if count >= limit:
                break

    return image_list

# Example usage:
folder_path = f'{drive_path}images/landscapes'
backgrounds = load_images_from_folder(folder_path, limit=250)


In [9]:
black_car = []
orange_car = []
photos = []
for file in os.listdir(f'{drive_path}arrays'):
    if 'orange' in file: orange_car.append(file)
    elif 'black' in file: black_car.append(file)
    elif 'photo' in file: photos.append(file)

print(len(black_car), len(orange_car), len(photos))

834 2001 168


In [10]:
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

root = f'{drive_path}arrays'

# ds = CarDataset(data_root, move_car=True, rotate_car=True, background_root=backgrounds_root)

test_set = photos

photo_train_val = photos
photo_train, photo_val = train_test_split(photo_train_val, test_size=0.2, random_state=42)
black_train, black_val = train_test_split(black_car, test_size=0.2, random_state=42)
orange_train, orange_val = train_test_split(orange_car, test_size=0.2, random_state=42)

# photo_train, photo_test = train_test_split(photos, test_size=0.4, random_state=42)
# black_train, black_test = train_test_split(black_car, test_size=0.4, random_state=42)
# orange_train, orange_test = train_test_split(orange_car, test_size=0.4, random_state=42)

# photo_val, photo_test = train_test_split(photo_test, test_size=0.5, random_state=42)
# black_val, black_test = train_test_split(black_test, test_size=0.5, random_state=42)
# orange_val, orange_test = train_test_split(orange_test, test_size=0.5, random_state=42)


# joint_train_ds = CarDataset(root, photo_train*3+black_train+orange_train)
# joint_train_background_ds = CarDataset(root, black_train+orange_train, backgrounds)
# joint_val_ds = CarDataset(root, photo_val+black_val+orange_val)

# joint_test_ds = CarDataset(root, photo_test+black_test+orange_test)
# photo_test_ds = CarDataset(root, photo_test)
# black_test_ds = CarDataset(root, black_test)
# orange_test_ds = CarDataset(root, orange_test)


joint_train_ds = CarDataset(root, photo_train*3+black_train+orange_train)
joint_train_background_ds = CarDataset(root, photo_train+black_train+orange_train, backgrounds)
joint_val_ds = CarDataset(root, photo_val+black_val+orange_val)

# joint_test_ds = CarDataset(root, photo_test+black_test+orange_test)
# photo_test_ds = CarDataset(root, photo_test)
# black_test_ds = CarDataset(root, black_test)
# orange_test_ds = CarDataset(root, orange_test)

joint_test_ds = CarDataset(root, test_set)

train_loader = DataLoader(joint_train_ds+joint_train_background_ds, batch_size=16, shuffle=True)
val_loader = DataLoader(joint_val_ds, batch_size=16)
test_loader = DataLoader(joint_test_ds, batch_size=16)

# photo_test_loader =  DataLoader(photo_test_ds, batch_size=16)
# black_test_loader =  DataLoader(black_test_ds, batch_size=16)
# orange_test_loader =  DataLoader(orange_test_ds, batch_size=16)

In [11]:
import torch
from torch import nn
import torch.nn.functional as F
import numpy as np
#!pip install torchmetrics
from torchmetrics.classification import Dice

In [138]:
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, batch_norm=True, encoder=False, leaky_relu_slope=0.2):
        super().__init__()

        layers = [
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, stride=2),
            nn.BatchNorm2d(out_channels) if batch_norm else nn.Identity(),
            nn.LeakyReLU(leaky_relu_slope) if encoder else nn.ReLU(),
            nn.Dropout2d(0.3),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, stride=2),
            nn.BatchNorm2d(out_channels) if batch_norm else nn.Identity(),
            nn.LeakyReLU(leaky_relu_slope) if encoder else nn.ReLU()
        ]

        self.block = nn.Sequential(*layers)

    def forward(self, x):
        return self.block(x)

class UNet(nn.Module):
    def __init__(self, in_channels, num_classes):
        super(UNet, self).__init__()

        self.encoder0 = ConvBlock(in_channels, 64, batch_norm=False, encoder=True)
        self.encoder1 = ConvBlock(64, 128, encoder=True)
        self.encoder2 = ConvBlock(128, 256, encoder=True)
        self.encoder3 = ConvBlock(256, 512, encoder=True)
        self.encoder4 = ConvBlock(512, 512, encoder=True)
        self.encoder5 = ConvBlock(512, 512, encoder=True)
        self.encoder6 = ConvBlock(512, 512, encoder=True)
        self.encoder7 = ConvBlock(512, 512, encoder=True)

        self.bottleneck = ConvBlock(512, 512)
        self.bottleneck_upsample = nn.ConvTranspose2d(512, 512, kernel_size=1, stride=1)

        self.decoder7 = ConvBlock(1024, 512)
        self.decoder6 = ConvBlock(1024, 512)
        self.decoder5 = ConvBlock(1024, 512)
        self.decoder4 = ConvBlock(1024, 512)
        self.decoder3 = ConvBlock(1024, 256)
        self.decoder2 = ConvBlock(512, 128)
        self.decoder1 = ConvBlock(256, 64)
        self.decoder0 = ConvBlock(64, num_classes)

        self.final_conv = nn.Conv2d(num_classes, num_classes, kernel_size=1)

    def forward(self, x):
        x0 = self.encoder0(x)
        x1 = self.encoder1(x0)
        x2 = self.encoder2(x1)
        x3 = self.encoder3(x2)
        x4 = self.encoder4(x3)
        x5 = self.encoder5(x4)
        x6 = self.encoder6(x5)
        x7 = self.encoder7(x6)

        bottleneck = self.bottleneck(x7)
        bottleneck_upsampled = self.bottleneck_upsample(bottleneck)

        x7_d = self.decoder7(torch.cat([x7, bottleneck_upsampled], dim=1))
        x6_d = self.decoder6(torch.cat([x6, x7_d], dim=1))
        x5_d = self.decoder5(torch.cat([x5, x6_d], dim=1))
        x4_d = self.decoder4(torch.cat([x4, x5_d], dim=1))
        x3_d = self.decoder3(torch.cat([x3, x4_d], dim=1))
        x3_d_upsampled = nn.Upsample(scale_factor=4, mode='nearest')(x3_d)
        x2_d = self.decoder2(torch.cat([x2, x3_d_upsampled], dim=1))
        x2_d_upsampled = nn.Upsample(scale_factor=16, mode='nearest')(x2_d)
        x1_d = self.decoder1(torch.cat([x1, x2_d_upsampled], dim=1))
        x1_d_upsampled = nn.Upsample(scale_factor=16, mode='nearest')(x1_d)
        x0_d = self.decoder0(x1_d_upsampled)
        x0_d_upsampled = nn.Upsample(scale_factor=16, mode='nearest')(x0_d)

        output = self.final_conv(x0_d_upsampled)

        return output



In [12]:
# import torch.nn as nn

# class ConvBlock(nn.Module):
#     def __init__(self, in_channels, out_channels):
#         super().__init__()
#         self.block = nn.Sequential(
#             nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
#             nn.BatchNorm2d(out_channels),
#             nn.ReLU(),
#             nn.Dropout2d(0.3),
#             nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
#             nn.BatchNorm2d(out_channels),
#             nn.ReLU()
#         )

#     def forward(self, x):
#         return self.block(x)

# class UNet(nn.Module):
#     def __init__(self, in_channels, num_classes):
#         super(UNet, self).__init__()
#         self.encoder0 = nn.Sequential(ConvBlock(in_channels, 64))
#         self.encoder1 = nn.Sequential(nn.MaxPool2d(2,2), ConvBlock(64, 128))
#         self.encoder2 = nn.Sequential(nn.MaxPool2d(2,2), ConvBlock(128, 256))
#         self.bottleneck = nn.Sequential(nn.MaxPool2d(2,2), ConvBlock(256,512), nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2))
#         self.decoder0 = nn.Sequential(ConvBlock(512,256), nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2))
#         self.decoder1 = nn.Sequential(ConvBlock(256,128), nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2))
#         self.decoder2 = nn.Sequential(ConvBlock(128,64), nn.Conv2d(in_channels=64, out_channels=num_classes, kernel_size=1))

#     def forward(self, x):
#         x0 = self.encoder0(x)
#         x1 = self.encoder1(x0)
#         x2 = self.encoder2(x1)
#         x3 = self.bottleneck(x2)
#         x3 = self.decoder0(torch.cat([x2,x3],dim=1))
#         x3 = self.decoder1(torch.cat([x1,x3],dim=1))
#         x3 = self.decoder2(torch.cat([x0,x3],dim=1))
#         # print("x3.shape: ", x3.shape)

#         return x3


In [139]:
import torch.nn as nn

class PatchGANDiscriminator(nn.Module):
    def __init__(self, in_channels=1):
        super(PatchGANDiscriminator, self).__init__()

        self.model = nn.Sequential(
            nn.Conv2d(in_channels, 16, kernel_size=4, stride=2, padding=1),
            nn.LeakyReLU(0.2),
            nn.Conv2d(16, 128, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2),
            nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.2),
            nn.Conv2d(256, 512, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.2),
            nn.Conv2d(512, 1, kernel_size=4, stride=1, padding=0),  # Adjust kernel_size and stride
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.model(x)
        return x


In [140]:
device = "cuda"
generator = UNet(3, 10).to(device)
discriminator = PatchGANDiscriminator().to(device)


In [141]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
criterion_L1 = nn.L1Loss()
criterion_adv = nn.BCEWithLogitsLoss()

optimizer_gen = optim.Adam(generator.parameters(), lr=0.001, weight_decay=1e-5)
optimizer_dis = optim.Adam(discriminator.parameters(), lr=0.001, weight_decay=1e-5)

In [142]:
num_epochs = 25
dice = Dice(average='micro').to(device)
loaders = [train_loader, ]

for epoch in range(num_epochs):
    generator.train()
    discriminator.train()

    total_gen_loss = 0.0
    total_dis_loss = 0.0

    for batch in train_loader:
        inputs, labels = batch
        inputs = inputs.to(device)
        labels = labels.to(device)

        inputs = inputs.float()
        labels = labels.long()

        # Train the discriminator
        optimizer_dis.zero_grad()
        fake_targets = generator(inputs)
        loss_gen_L1 = criterion(fake_targets, labels)

        fake_targets = torch.argmax(fake_targets, dim=1, keepdim=True)

        labels = labels.unsqueeze(1)
        labels = labels.float()

        real_preds = discriminator(labels)
        fake_preds = discriminator(fake_targets.detach().float())
        real_loss = criterion_adv(real_preds, torch.ones_like(real_preds))
        fake_loss = criterion_adv(fake_preds, torch.zeros_like(fake_preds))
        loss_dis = 0.5 * (real_loss + fake_loss)
        loss_dis.backward()
        optimizer_dis.step()

        # Train the generator
        optimizer_gen.zero_grad()
        fake_t_pred = discriminator(fake_targets.float())
        loss_gen_adv = criterion_adv(fake_t_pred, torch.ones_like(fake_t_pred))

        # loss_gen = 0.4*loss_gen_adv + 0.8 * loss_gen_L1
        loss_gen = 0.5*loss_gen_adv + 0.9 * loss_gen_L1
        loss_gen.backward()
        optimizer_gen.step()

        total_dis_loss += loss_dis.item()
        total_gen_loss += loss_gen.item()

    avg_dis_loss = total_dis_loss / len(train_loader)
    avg_gen_loss = total_gen_loss / len(train_loader)
    print(f"Epoch {epoch + 1}/{num_epochs}, Generator Loss: {avg_gen_loss}, Discriminator Loss: {avg_dis_loss}")

    generator.eval()
    total_val_loss = 0.0
    total_dice = 0.0
    for batch in val_loader:
        inputs, labels = batch
        inputs = inputs.to(device)
        labels = labels.to(device)

        inputs = inputs.float()
        labels = labels.long()

        with torch.no_grad():
            val_outputs = generator(inputs)
            val_loss = criterion(val_outputs, labels)
            val_outputs = val_outputs.to(device)
            total_val_loss += val_loss.item()
            val_dice = dice(val_outputs, labels)
            total_dice += val_dice

    average_val_loss = total_val_loss / len(val_loader)
    average_dice = total_dice / len(val_loader)
    print(f"Epoch {epoch + 1}/{num_epochs}, Generator Loss (Validation): {average_val_loss:.4f}, Dice: {average_dice:.4f}")



Epoch 1/25, Generator Loss: 1.4692795910293748, Discriminator Loss: 0.5536835171070761
Epoch 1/25, Generator Loss (Validation): 0.6284, Dice: 0.8286
Epoch 2/25, Generator Loss: 0.9041849989620293, Discriminator Loss: 0.5124955953859757
Epoch 2/25, Generator Loss (Validation): 0.5076, Dice: 0.8419
Epoch 3/25, Generator Loss: 0.8544845376104963, Discriminator Loss: 0.5043127059184415
Epoch 3/25, Generator Loss (Validation): 0.4814, Dice: 0.8446
Epoch 4/25, Generator Loss: 0.8295241966608571, Discriminator Loss: 0.5062632923622613
Epoch 4/25, Generator Loss (Validation): 0.4577, Dice: 0.8469
Epoch 5/25, Generator Loss: 0.8144570154722557, Discriminator Loss: 0.503340592519718
Epoch 5/25, Generator Loss (Validation): 0.4507, Dice: 0.8475
Epoch 6/25, Generator Loss: 0.7994700100519679, Discriminator Loss: 0.5032298653283706
Epoch 6/25, Generator Loss (Validation): 0.4346, Dice: 0.8513
Epoch 7/25, Generator Loss: 0.7894137544962886, Discriminator Loss: 0.5032190932460388
Epoch 7/25, Generato

In [None]:
# torch.save(model.state_dict(), f'{drive_path}model2.pth')

## Original

Photos: 0.8259544\
Black: 0.9816798\
Orange: 0.98628855\
Test: 0.9728253

# Added photos

Photos: 0.83027714\
Black: 0.9769864\
Orange: 0.98165745\
Test: 0.9720461

## Added photos and background

Photos: 0.8418195\
Black: 0.9842939\
Orange: 0.9854847\
Test: 0.97684497



In [None]:
# Test weighted Cross Entropy Loss/Cross Entropy Loss/Dice Loss/Jaccard Loss

In [None]:
# Test different values for learning rate, weight decay etc.