In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from ultralytics import YOLO
import albumentations as A
from albumentations.pytorch import ToTensorV2

import os

import cv2

import matplotlib.pyplot as plt

import plotly.graph_objects as go
import plotly.express as px

from ipywidgets import widgets, VBox, HBox

from IPython.display import display, clear_output

from PIL import Image, ImageDraw, ImageFont

import base64
from io import BytesIO

### Image Labels

In [None]:
train_labels = pd.read_csv('../../../data/Patch Perfect Data/train_labels.csv')
test_labels = pd.read_csv('../../../data/Patch Perfect Data/test_labels.csv')

In [None]:
train_labels.shape

In [None]:
test_labels.shape

In [None]:
train_labels.head(3)

In [None]:
train_labels['Bags used '].value_counts()

In [None]:
train_labels.columns = ['pothole_id', 'bags_used']

In [None]:
train_labels['pothole_id'] = 'p'+(train_labels['pothole_id']).astype('str')

In [None]:
train_labels.head(3)

### Image annotations - bounding boxes

In [None]:
def load_annotations(annotation_path):
    data = []
    for filename in os.listdir(annotation_path):
        if filename.endswith('.txt'):
            pothole_id = filename.split('.')[0]
            with open(os.path.join(annotation_path, filename), 'r') as file:
                lines = file.readlines()
                for line in lines:
                    parts = line.strip().split()
                    data.append({
                        'pothole_id': pothole_id,
                        'class': int(parts[0]),
                        'x': float(parts[1]),
                        'y': float(parts[2]),
                        'width': float(parts[3]),
                        'height': float(parts[4])
                    })
    return pd.DataFrame(data)

In [None]:
train_annotations = load_annotations('../../../data/Patch Perfect Data/train_annotations')

In [None]:
train_annotations.head(3)

In [None]:
train_annotations['class'].value_counts()

### Images

In [None]:
def load_images_from_folder(folder):
    data = []
    for filename in os.listdir(folder):
        if filename.endswith('.jpg'):
            img = cv2.imread(os.path.join(folder, filename))
            if img is not None:
                pothole_id = filename.split('.')[0]
                data.append({'pothole_id': pothole_id, 'image': img})
    return pd.DataFrame(data)

In [None]:
train_images = load_images_from_folder('../../../data/Patch Perfect Data/train_images')
test_images = load_images_from_folder('../../../data/Patch Perfect Data/test_images')

In [None]:
train_images.head(3)

## Filtering out images with no labels

In [None]:
valid_ids = train_labels[train_labels['pothole_id'].isin(set(train_images['pothole_id']))]['pothole_id'].unique()

In [None]:
valid_images = train_images[train_images['pothole_id'].isin(set(valid_ids))]

In [None]:
valid_labels = train_labels[train_labels['pothole_id'].isin(set(valid_ids))]

In [None]:
print(valid_labels.shape)
print(valid_images.shape)

In [None]:
pd.DataFrame(valid_ids, columns=['pothole_id']).to_csv('data/valid_ids.csv', index=False)

# Finding red points of L1 stick

In [None]:
train_df = pd.DataFrame(valid_ids, columns=['pothole_id'])

In [None]:
model = YOLO('YOLO/L1/best_model.pt')

In [None]:
def find_red_centroids_in_bbox(image, bbox):
    x1, y1, x2, y2 = bbox
    cropped_image = image[y1:y2, x1:x2]

    hsv_cropped = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2HSV)

    lower_red = np.array([0, 100, 100])
    upper_red = np.array([10, 255, 255])
    mask1 = cv2.inRange(hsv_cropped, lower_red, upper_red)

    lower_red = np.array([160, 100, 100])
    upper_red = np.array([180, 255, 255])
    mask2 = cv2.inRange(hsv_cropped, lower_red, upper_red)

    mask = mask1 + mask2

    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    height, width = cropped_image.shape[:2]
    half_width = width // 2
    half_height = height // 2

    left_half_centroids = []
    right_half_centroids = []
    top_half_centroids = []
    bottom_half_centroids = []

    for contour in contours:
        area = cv2.contourArea(contour)
        M = cv2.moments(contour)
        if M['m00'] != 0:
            cX = int(M['m10'] / M['m00'])
            cY = int(M['m01'] / M['m00'])
            
            # Determine if the rectangle is vertical or horizontal
            if height > width:  # Vertical orientation
                if cY < half_height:
                    top_half_centroids.append((cX + x1, cY + y1, area))
                else:
                    bottom_half_centroids.append((cX + x1, cY + y1, area))
            else:  # Horizontal orientation
                if cX < half_width:
                    left_half_centroids.append((cX + x1, cY + y1, area))
                else:
                    right_half_centroids.append((cX + x1, cY + y1, area))

    def select_largest_area(centroids):
        if not centroids:
            return None
        return max(centroids, key=lambda x: x[2])

    if height > width:  # Vertical orientation
        top_centroid = select_largest_area(top_half_centroids)
        bottom_centroid = select_largest_area(bottom_half_centroids)

        if not top_centroid:
            top_centroid = (x1 + width // 2, y1 + int(height * 0.1))

        if not bottom_centroid:
            bottom_centroid = (x1 + width // 2, y2 - int(height * 0.1))

        return [top_centroid, bottom_centroid]
    else:  # Horizontal orientation
        left_centroid = select_largest_area(left_half_centroids)
        right_centroid = select_largest_area(right_half_centroids)

        if not left_centroid:
            left_centroid = (x1 + int(width * 0.1), y1 + height // 2)

        if not right_centroid:
            right_centroid = (x2 - int(width * 0.1), y1 + height // 2)

        return [left_centroid, right_centroid]

In [None]:
red_point_1_x = []
red_point_1_y = []
red_point_2_x = []
red_point_2_y = []

# Iterate over each image in valid_ids
for pothole_id in valid_ids:
    # Get the image corresponding to the pothole_id
    image = train_images[train_images['pothole_id'] == pothole_id]['image'].values[0]

    # Step 1: Detect the L1 bounding box using the model
    l1_results = model.predict(source=image, save=False, verbose=False)

    # Ensure there are detections
    if len(l1_results[0].boxes) > 0:
        # Get the bounding box with the highest confidence
        l1_boxes = l1_results[0].boxes.xyxy.cpu().numpy()
        l1_confidences = l1_results[0].boxes.conf.cpu().numpy()
        l1_max_conf_idx = np.argmax(l1_confidences)
        l1_bbox = l1_boxes[l1_max_conf_idx].astype(int)

        # Step 2: Find the red centroids in the L1 bounding box
        red_centroids = find_red_centroids_in_bbox(image, l1_bbox)

        # Store the red points if they are found
        if len(red_centroids) == 2:
            red_point_1_x.append(red_centroids[0][0])
            red_point_1_y.append(red_centroids[0][1])
            red_point_2_x.append(red_centroids[1][0])
            red_point_2_y.append(red_centroids[1][1])
        else:
            # If centroids are not found, store None
            red_point_1_x.append(None)
            red_point_1_y.append(None)
            red_point_2_x.append(None)
            red_point_2_y.append(None)
    else:
        # If no L1 bounding box is detected, store None
        red_point_1_x.append(None)
        red_point_1_y.append(None)
        red_point_2_x.append(None)
        red_point_2_y.append(None)

# Add the red points to the train_df dataframe
train_df['red_point_1_x'] = red_point_1_x
train_df['red_point_1_y'] = red_point_1_y
train_df['red_point_2_x'] = red_point_2_x
train_df['red_point_2_y'] = red_point_2_y

# Display the updated dataframe
train_df.head()

In [None]:
train_df.isna().sum()

In [None]:
train_df.dropna(inplace=True)

In [None]:
train_df.shape

In [None]:
train_df.to_csv('data/train_df.csv', index=False)