In [1]:
import pandas as pd
import numpy as np
import random

from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from ultralytics import YOLO
import albumentations as A
from albumentations.pytorch import ToTensorV2
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms

import os

import cv2

import matplotlib.pyplot as plt

import plotly.graph_objects as go
import plotly.express as px

from ipywidgets import widgets, VBox, HBox

from IPython.display import display, clear_output

from PIL import Image, ImageDraw, ImageFont

import base64
from io import BytesIO

### Loading Data

In [2]:
train_df = pd.read_csv('data/train_df.csv')
train_labels = pd.read_csv('../../../data/Patch Perfect Data/train_labels.csv')

train_labels.columns = ['pothole_id', 'bags_used']
train_labels['pothole_id'] = 'p'+train_labels['pothole_id'].astype(str)

In [3]:
df = pd.merge(train_df, train_labels, on='pothole_id', how='inner')

In [4]:
print(train_df.shape)
print(train_labels.shape)
print(df.shape)

(387, 5)
(644, 2)
(387, 6)


In [5]:
df.head(3)

Unnamed: 0,pothole_id,red_point_1_x,red_point_1_y,red_point_2_x,red_point_2_y,bags_used
0,p101,21.0,233.0,286.0,214.0,0.5
1,p102,297.0,176.0,297.0,352.0,1.0
2,p106,341.0,89.0,228.0,312.0,0.5


## Adding mm to pixel ratio to dataset

In [6]:
def calculate_distance(point1, point2):
    return np.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)

In [7]:
mm_to_pixel_ratios = []

for index, row in df.iterrows():
    pothole_id = row['pothole_id']
    point1 = (row['red_point_1_x'], row['red_point_1_y'])
    point2 = (row['red_point_2_x'], row['red_point_2_y'])

    # Calculate the distance in pixels between the red points
    pixel_distance = calculate_distance(point1, point2)
    
    if pixel_distance > 0:
        # Calculate the mm-to-pixel ratio (500mm is the known distance)
        mm_to_pixel_ratio = 500 / pixel_distance
    else:
        mm_to_pixel_ratio = None

    # Append the result to the list
    mm_to_pixel_ratios.append(mm_to_pixel_ratio)

# Add the mm-to-pixel ratios to the DataFrame
df['mm_to_pixel_ratio'] = mm_to_pixel_ratios

In [8]:
df.to_csv('data/train_df_final.csv', index=False)

In [9]:
df.head(3)

Unnamed: 0,pothole_id,red_point_1_x,red_point_1_y,red_point_2_x,red_point_2_y,bags_used,mm_to_pixel_ratio
0,p101,21.0,233.0,286.0,214.0,0.5,1.881961
1,p102,297.0,176.0,297.0,352.0,1.0,2.840909
2,p106,341.0,89.0,228.0,312.0,0.5,2.000032


## Fine tuning model

### ResNet

In [None]:
class PotholeDataset(Dataset):
    def __init__(self, dataframe, image_dir, transform=None):
        self.dataframe = dataframe
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = f"{self.image_dir}/{self.dataframe.iloc[idx]['pothole_id']}.jpg"
        image = Image.open(img_name).convert("RGB")
        pixel_ratio = self.dataframe.iloc[idx]['mm_to_pixel_ratio']
        bags_used = self.dataframe.iloc[idx]['bags_used']

        if self.transform:
            seed = np.random.randint(2147483647)  # make a seed with numpy generator 
            random.seed(seed)  # apply this seed to transform's random function
            torch.manual_seed(seed)  # needed for torchvision 0.7
            image = self.transform(image)

        return image, torch.tensor(pixel_ratio, dtype=torch.float32), torch.tensor(bags_used, dtype=torch.float32)

In [None]:
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to 224x224 for ResNet
    transforms.RandomHorizontalFlip(p=0.5),  # 50% chance of horizontal flip
    transforms.RandomRotation(degrees=15),  # Random rotation within 15 degrees
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),  # Randomly change the brightness, contrast, saturation and hue
    transforms.ToTensor(),  # Convert PIL image to Tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize using ImageNet mean and std
])

In [None]:
train_dataset = PotholeDataset(df, 'data/pothole_images', transform=train_transforms)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [None]:
class ResNetRegressionModel(nn.Module):
    def __init__(self):
        super(ResNetRegressionModel, self).__init__()
        self.resnet = models.resnet18(pretrained=True)
        self.resnet.fc = nn.Identity()  # Remove the final classification layer
        
        # Combine resnet features with pixel-to-mm ratio
        self.fc1 = nn.Linear(512 + 1, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 1)  # Output layer for regression
    
    def forward(self, images, pixel_ratios):
        x = self.resnet(images)
        pixel_ratios = pixel_ratios.view(-1, 1)  # Ensure pixel_ratios is a 2D tensor
        x = torch.cat((x, pixel_ratios), dim=1)  # Concatenate pixel ratios
        x = nn.ReLU()(self.fc1(x))
        x = nn.ReLU()(self.fc2(x))
        x = self.fc3(x)
        return x

In [None]:
model = ResNetRegressionModel()
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

In [None]:
for epoch in range(1):  # For simplicity, let's say we train for 10 epochs
    model.train()
    running_loss = 0.0
    
    for images, pixel_ratios, bags_used in train_loader:
        optimizer.zero_grad()
        outputs = model(images, pixel_ratios)
        loss = criterion(outputs.squeeze(), bags_used)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    
    print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")

In [None]:
torch.save(model.state_dict(), 'Prediction Model/resnet_regression_model.pth')

### Prediction

In [None]:
random_index = np.random.randint(0, len(train_dataset))
sample_image, sample_pixel_ratio, true_bags_used = train_dataset[random_index]

sample_image = sample_image.unsqueeze(0)  # Add batch dimension: [1, 3, H, W]

# The pixel ratio is already a scalar, so we convert it to a tensor and add a batch dimension.
sample_pixel_ratio = torch.tensor([sample_pixel_ratio]).unsqueeze(0)  # Shape: [1, 1]

# Make a prediction
with torch.no_grad():  # Disable gradient computation for inference
    predicted_bags_used = model(sample_image, sample_pixel_ratio)

print(f"Predicted number of bags used: {predicted_bags_used.item()}")
print(f"True number of bags used: {true_bags_used}")

# Transforming problem to classification

In [10]:
df = pd.read_csv('data/train_df_final.csv')

In [11]:
bins = [0, 0.25, 0.5, 1, 2, np.inf]
labels = [0, 1, 2, 3, 4]
df['class'] = pd.cut(df['bags_used'], bins=bins, labels=labels, include_lowest=True)

In [12]:
df.head()

Unnamed: 0,pothole_id,red_point_1_x,red_point_1_y,red_point_2_x,red_point_2_y,bags_used,mm_to_pixel_ratio,class
0,p101,21.0,233.0,286.0,214.0,0.5,1.881961,1
1,p102,297.0,176.0,297.0,352.0,1.0,2.840909,2
2,p106,341.0,89.0,228.0,312.0,0.5,2.000032,1
3,p107,311.0,99.0,212.0,258.0,0.5,2.669487,1
4,p109,238.0,237.0,247.0,359.0,0.5,4.087254,1


In [13]:
df.to_csv('data/train_df_final_w_classes.csv')

In [14]:
df = pd.read_csv('data/train_df_final_w_classes.csv')

In [15]:
df['class'].value_counts()

class
1    167
0    114
2     61
3     26
4     19
Name: count, dtype: int64

### CLASS MEANS

In [16]:
class_means = df.groupby('class')['bags_used'].mean()
print("Mean of bags used for each class:")
print(class_means)

Mean of bags used for each class:
class
0    0.250000
1    0.500000
2    0.977869
3    1.759615
4    4.063158
Name: bags_used, dtype: float64


In [17]:
def convert_image_to_base64(image):
    _, buffer = cv2.imencode('.jpg', image)
    image_base64 = base64.b64encode(buffer).decode('utf-8')
    return f'data:image/jpeg;base64,{image_base64}'

# Function to update the plot with a random image from the selected class
def update_plot(class_label):
    class_df = df[df['class'] == class_label]
    sample = class_df.sample(1).iloc[0]
    image_path = f"data/pothole_images/{sample['pothole_id']}.jpg"
    image = cv2.imread(image_path)
    
    image_base64 = convert_image_to_base64(image)
    
    fig.update_traces(go.Image(source=image_base64))
    fig.update_layout(title=f"Pothole ID: {sample['pothole_id']} | Class: {class_label} | Bags Used: {sample['bags_used']}")
    
    # Clear and re-display the button, selector, and plot
    clear_output(wait=True)
    display(class_selector, random_button)
    display(fig)

# Create a dropdown selector for classes
class_selector = widgets.Dropdown(
    options=[(f"Class {int(cls)}", cls) for cls in df['class'].unique()],
    description='Select Class:'
)

# Create a button to randomly sample an image from the selected class
random_button = widgets.Button(description="Sample Image")

# Initialize Plotly figure with a default image
initial_sample = df[df['class'] == df['class'].unique()[0]].iloc[0]
initial_image_path = f"data/pothole_images/{initial_sample['pothole_id']}.jpg"
initial_image = cv2.imread(initial_image_path)
initial_image_base64 = convert_image_to_base64(initial_image)

fig = go.Figure(go.Image(source=initial_image_base64))
fig.update_layout(title=f"Pothole ID: {initial_sample['pothole_id']} | Class: {initial_sample['class']} | Bags Used: {initial_sample['bags_used']}",
                  xaxis=dict(showticklabels=False), yaxis=dict(showticklabels=False))

# Set up the callback for the button
def on_button_click(b):
    update_plot(class_selector.value)

random_button.on_click(on_button_click)

# Display the dropdown and button
display(class_selector, random_button)

# Display the initial figure
display(fig)

Dropdown(description='Select Class:', index=1, options=(('Class 1', 1), ('Class 2', 2), ('Class 3', 3), ('Clas…

Button(description='Sample Image', style=ButtonStyle())

## Predicting Pothole Class Using ResNet

In [18]:
image_dir = 'data/train_images'

In [19]:
class ResNetWithRatio(nn.Module):
    def __init__(self, num_classes):
        super(ResNetWithRatio, self).__init__()
        self.resnet = models.resnet18(pretrained=True)
        self.resnet.fc = nn.Identity()  # Remove the classification layer
        
        # Fully connected layers
        self.fc1 = nn.Linear(512 + 1, 256)  # 512 from ResNet + 1 from mm_to_pixel_ratio
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, num_classes)
    
    def forward(self, images, ratios):
        features = self.resnet(images)
        x = torch.cat((features, ratios), dim=1)
        x = nn.ReLU()(self.fc1(x))
        x = nn.ReLU()(self.fc2(x))
        x = self.fc3(x)
        return x

# Determine the number of classes
num_classes = df['class'].nunique()

In [20]:
# Instantiate the model
model = ResNetWithRatio(num_classes=num_classes)

# Load model
#model.load_state_dict(torch.load('Prediction Model/pothole_classification_resnet_model.pth'))
#model.train()


The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.


Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet18_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet18_Weights.DEFAULT` to get the most up-to-date weights.



In [21]:
class PotholeDataset(Dataset):
    def __init__(self, dataframe, image_dir, transform=None):
        self.dataframe = dataframe
        self.image_dir = image_dir
        self.transform = transform
    
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, idx):
        # Get the image path
        pothole_id = self.dataframe.iloc[idx]['pothole_id']
        image_path = os.path.join(self.image_dir, f'{pothole_id}.jpg')
        
        # Load the image
        image = Image.open(image_path).convert('RGB')
        
        # Get the mm_to_pixel_ratio and class
        mm_to_pixel_ratio = self.dataframe.iloc[idx]['mm_to_pixel_ratio']
        class_label = self.dataframe.iloc[idx]['class']
        
        # Apply transformations if any
        if self.transform:
            image = self.transform(image)
        
        # Convert mm_to_pixel_ratio to tensor
        mm_to_pixel_ratio = torch.tensor([mm_to_pixel_ratio], dtype=torch.float32)
        
        return image, mm_to_pixel_ratio, class_label

# Define transformations for the images
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),  # Randomly flip the image horizontally
    transforms.RandomRotation(10),      # Randomly rotate the image by up to 10 degrees
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),  # Randomly change brightness, contrast, saturation, and hue
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create the dataset
dataset = PotholeDataset(df, image_dir, transform=transform)

In [22]:
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

In [43]:
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Training loop
for epoch in range(10):  # Number of epochs
    model.train()
    running_loss = 0.0
    for images, mm_to_pixel_ratios, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images, mm_to_pixel_ratios)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f'Epoch [{epoch + 1}/10], Loss: {running_loss/len(train_loader)}')

Epoch [1/10], Loss: 1.1190702181596022
Epoch [2/10], Loss: 1.139569034943214
Epoch [3/10], Loss: 1.1258767751547007


KeyboardInterrupt: 

In [24]:
torch.save(model.state_dict(), 'Prediction Model/pothole_classification_resnet_model.pth')

In [44]:
model.eval()

ResNetWithRatio(
  (resnet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, tr

In [84]:
def sample_and_predict(model, dataset, df):
    # Randomly select an index
    random_index = np.random.randint(0, len(dataset))
    
    # Get the sample from the dataset
    sample_image, sample_pixel_ratio, true_class = dataset[random_index]
    
    # Add batch dimension
    sample_image = sample_image.unsqueeze(0)  # Shape: [1, 3, H, W]
    sample_pixel_ratio = sample_pixel_ratio.unsqueeze(0)  # Shape: [1, 1]
    
    # Make a prediction
    with torch.no_grad():
        outputs = model(sample_image, sample_pixel_ratio)
        _, predicted_class = torch.max(outputs, 1)
    
    # Convert tensors to CPU and detach
    predicted_class = predicted_class.item()
    
    # Print the predicted and true classes
    print(f"Predicted Class: {predicted_class}")
    print(f"True Class: {true_class}")

# Example usage:
sample_and_predict(model, dataset, df)

Predicted Class: 1
True Class: 1
