In [127]:
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
plt.ion() # (%matplotlib inline)
import os

# Patch Perfect

This notebook will document the process we underwent to find a solution to the plothole-problem.


### EDA

We start by looking at the data systematically to see where we will inevitably need to solve problems before we create a model:

In [117]:
__file__ = os.path.abspath('') # notebooks are stupid
DATA_DIR = Path(__file__).resolve() / "data"
TRAIN_LABELS_PATH = DATA_DIR / "train_labels.csv"

train_label_df = pd.read_csv(filepath_or_buffer=TRAIN_LABELS_PATH)
train_label_df.rename(columns={'Bags used ': 'Bags used'}, inplace=True)

In [118]:
values = train_label_df.loc[:, 'Bags used'].value_counts()
plt.figure(figsize=(10, 6))
plt.hist(values, bins=range(1, max(values) + 2), edgecolor='black')
plt.title('Histogram of Data Points per Bag Amount')
plt.xlabel('Number of Data Points for Each Bag Amount')
plt.ylabel('Frequency')
plt.grid(True)


findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans.
findfont: Generic family 'sans-serif' not found because none of the following families were found: DejaVu Sans, Bitstream Vera Sans, Computer Modern Sans Serif, Lucida Grande, Verdana, Geneva, Lucid, Arial, Helvetica, Avant Garde, sans-serif


Error in callback <function install_repl_displayhook.<locals>.post_execute at 0x73253c27d200> (for post_execute):


ValueError: Failed to find font DejaVu Sans:style=normal:variant=normal:weight=normal:stretch=normal:size=13.2, and fallback to the default font was disabled

In [119]:
values

0.50     278
0.25     140
1.00     116
1.50      35
2.00      34
3.00      12
2.50       7
0.55       4
8.00       3
1.05       3
1.55       2
4.00       2
5.00       1
15.00      1
3.10       1
3.05       1
5.50       1
7.00       1
2.05       1
12.00      1
Name: Bags used, dtype: int64

We can see that there is a massive class imbalance. This could create issues where a model trained on this dataset has a bias towards more common bags. Most values are between 0 and 1 with some values much higher. There are many strategies we could use to solve this, including but not limited to: 
<ul>
<li>Some label abstraction technique where we might create labels based on a logarithmic scale</li>
<li>Data augmentation as a class imbalance mitigation: This process is called upsampling</li>
</ul>

We should also consider the following: The data makes this problem seem like a regression model is needed, but tuning the labels may enable us to change it to a much simpler classification task at the cost of some accuracy. Doing this would result in a much more robust model and enable us to use techniques like label smoothing to let the model generalize more to unseen data. 
<hr>
References:
<ul>
<li>Paperspace Blog. (2022). Data Augmentation: A Class Imbalance Mitigative Measure. [online] Available at: https://blog.paperspace.com/data-augmentation-a-class-imbalance-mitigative-measure/.</li>
<li>S. Wang and X. Yao, "Multiclass Imbalance Problems: Analysis and Potential Solutions," in IEEE Transactions on Systems, Man, and Cybernetics, Part B (Cybernetics), vol. 42, no. 4, pp. 1119-1130, Aug. 2012, doi: 10.1109/TSMCB.2012.2187280. keywords: {Training;Correlation;Training data;Pattern analysis;Genetic algorithms;IEEE Potentials;Cybernetics;Boosting;diversity;ensemble learning;multiclass imbalance problems;negative correlation learning}, </li>
</ul>
‌

## The model

In [120]:
import torch
from torch import nn
model = torch.load("./pretrained_model.pt")

for name, param in model.named_parameters():
    #if("bn" not in name):
    param.requires_grad = False


model.classifier = nn.Sequential( # Change only the classifier of the model, I.E the last few layers
    
    nn.Conv2d(2048, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False),
    nn.BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
    nn.ReLU(),
    nn.Dropout(p=0.1, inplace=False),
    nn.Conv2d(512, 3, kernel_size=(1, 1), stride=(1, 1)) # Change the output to 3 classes instead of 21
    
)


In [121]:
trainable_layers = 0
non_trainable_layers = 0

for name, module in model.named_modules():
    # Check if any parameter in the layer requires gradients
    if any(param.requires_grad for param in module.parameters()):
        trainable_layers += 1
    else:
        non_trainable_layers += 1

print(f"Trainable layers: {trainable_layers}")
print(f"Non-trainable layers: {non_trainable_layers}")


Trainable layers: 5
Non-trainable layers: 157


In [122]:

trainable_params = 0
non_trainable_params = 0

for param in model.parameters():
    if param.requires_grad:
        trainable_params += param.numel()  # Count the number of elements
    else:
        non_trainable_params += param.numel()

print(f"Trainable parameters: {trainable_params}")
print(f"Non-trainable parameters: {non_trainable_params}")


Trainable parameters: 9439747
Non-trainable parameters: 25873237


## Training Code

In [123]:
import torch.optim as optim
from torchvision.utils import save_image


# optimizer = optim.Adam(
#     filter(lambda p: p.requires_grad, model.parameters()), 
#     lr=0.001
# )



def train_model(model, train_loader, num_epochs=1):
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr = 3e-4) # Karpathy's number
    
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for images, labels in train_loader:
            # Move data to GPU if available
            if torch.cuda.is_available():
                images = images.cuda()
                labels = labels.type(torch.LongTensor).cuda()
                model.cuda()
                
            # Forward pass
            outputs = model(images)['out']
            
            loss = criterion(outputs, labels)
            
            # Zero gradients
            optimizer.zero_grad()
            
            # Backward pass and optimization
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * images.size(0)
        
        epoch_loss = running_loss / len(train_loader.dataset)
        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss:.4f}')

def evaluate_model(model, val_loader):
    model.eval()
    total_correct = 0
    total_pixels = 0
    with torch.no_grad():
        for images, labels in val_loader:
            if torch.cuda.is_available():
                images = images.cuda()
                labels = labels.cuda()
                
            outputs = model(images)['out']
            _, preds = torch.max(outputs, 1)
            
            total_pixels += labels.numel()
            total_correct += (preds == labels).sum().item()
        
        accuracy = total_correct / total_pixels
        print(f'Validation Accuracy: {accuracy:.4f}')


## Data Prep Code

In [124]:
# Custom dataset class for your data
import torchvision.transforms as transforms
import torch
from torch.utils.data import DataLoader, Dataset
import os
from PIL import Image
import numpy as np

rgb_to_class = {
    (0, 0, 0): 0,
    (255, 255, 255): 1,
    (100, 100, 100): 2
}

class Potholes(Dataset):
    def __init__(self, image_dir, label_dir):
        self.image_dir = image_dir
        self.label_dir = label_dir
        self.image_filenames = os.listdir(image_dir)
        self.mean = (0.485, 0.456, 0.406)
        self.std = (0.229, 0.224, 0.225)   
    
    def __len__(self):
        return len(self.image_filenames)
    
    @staticmethod
    def rgb_to_mask(mask):
        # Convert the mask to a numpy array
        mask = np.array(mask)
        
        # Initialize an array to hold the class indices
        class_mask = np.zeros((mask.shape[0], mask.shape[1]), dtype=np.uint8)
        
        # Apply the mapping from RGB values to class indices
        for rgb, class_index in rgb_to_class.items():
            matches = np.all(mask == rgb, axis=-1)
            class_mask[matches] = class_index
            
        return class_mask
    
    def __getitem__(self, idx):
        img_name = self.image_filenames[idx]
        img_path = os.path.join(self.image_dir, img_name)
        label_path = os.path.join(self.label_dir, (img_name[:-4] + "_mask.png"))
        
        # Load image and label
        image = Image.open(img_path)
        mask = Image.open(label_path).convert("RGB")
        
        # Apply transformations
        
        image, mask = self.image_transforms(image, mask)
        
        return image, mask

# Define transforms for data augmentation and normalization
    
    def image_transforms(self, image, label):
        transform_images = transforms.Compose([
            transforms.Resize((256, 256)),  # Resize to the desired input size
            transforms.ToTensor(),
            transforms.Normalize(self.mean, self.std)
        ])
        transform_labels = transforms.Compose([
            transforms.Resize((256, 256)) 
        ])
        mask = self.rgb_to_mask(transform_labels(label))
        return transform_images(image), mask

# Paths to your dataset
train_image_dir = "./temp/subset/"
train_label_dir = "./temp/masks/" 
  

In [125]:
# Create datasets and data loaders
train_dataset = Potholes(train_image_dir, train_label_dir)
train_loader = DataLoader(train_dataset, batch_size=15, shuffle=True)

train_model(model, train_loader, 1)

Epoch 1/1, Loss: 1.2197


In [126]:
stick_model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

Using cache found in /home/johan/.cache/torch/hub/ultralytics_yolov5_master
[31m[1mrequirements:[0m Ultralytics requirements ['gitpython>=3.1.30', 'pillow>=10.3.0'] not found, attempting AutoUpdate...

[31m[1mrequirements:[0m AutoUpdate success ✅ 4.5s, installed 2 packages: ['gitpython>=3.1.30', 'pillow>=10.3.0']
[31m[1mrequirements:[0m ⚠️ [1mRestart runtime or rerun command for updates to take effect[0m

YOLOv5 🚀 2024-8-14 Python-3.7.13 torch-1.13.1+cu117 CUDA:0 (NVIDIA GeForce RTX 2060, 5918MiB)

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 
