In [None]:
import json
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import os

Read the predicted annotations and format the data

In [None]:
start = 45001
end = start + 5000 - 1

In [None]:
pred_fp = f"predictions/frames_zod_detections_{start}_{end}.json"
with open(pred_fp, "r") as file:
    pred_data = json.load(file)

In [None]:
# Create containers for attributes
img_id = []
x1_ls = []
y1_ls = []
x2_ls = []
y2_ls = []
class_ls = []
conf_ls = []
area_ls = []

# Loop through the data and collect attribute values
for key, value in pred_data.items():
    for i in value:
        x1, y1, x2, y2 = i["box"]
        cls = "Vehicle"
        conf = i["confidence"]
        area = i["area"]

        # Append to the corresponding list
        img_id.append(key.split("_")[0])
        x1_ls.append(x1)
        y1_ls.append(y1)
        x2_ls.append(x2)
        y2_ls.append(y2) 
        class_ls.append(cls)
        conf_ls.append(conf)
        area_ls.append(area)

# Rearrange the data into dictionary
new_pred_data = {
    "image_id": img_id,
    "x1": x1_ls,
    "y1": y1_ls,
    "x2": x2_ls,
    "y2": y2_ls,
    "class": class_ls,
    "area": area_ls,
    "confidence": conf_ls,
}

# Wrap the new data as a pandas dataframe
pred_df = pd.DataFrame(new_pred_data)
pred_df = pred_df.sort_values("image_id")

pred_df.head(10)

In [None]:
# Save the result to csv
pred_df.to_csv(f"outputs/{start}_{end}/predictions.csv", index=False)

Read the ground-truth annotations and format the data as data frame

In [None]:
# Read groud truth data
img_ids = pred_df["image_id"].unique()
true_data = {}
for img_id in img_ids:
    fp = f"single_frames/{img_id}/annotations/object_detection.json"
    with open(fp, "r") as file:
        true_data[img_id] = json.load(file)

In [None]:
# Ground truth attributes needed: coordinates, class, area and occolusion ratio
# Create container for attributes
img_id_ls = []
x1_ls = []
y1_ls = []
x2_ls = []
y2_ls = []
class_ls = []
area_ls = []
oc_ratio_ls = []

for img_id, img_annots in true_data.items():
    for obj in img_annots:
        px = [p[0] for p in obj["geometry"]["coordinates"]] # Coordinates
        py = [p[1] for p in obj["geometry"]["coordinates"]]
        x1, y1, x2, y2 = int(min(px)), int(min(py)), int(max(px)), int(max(py))
            
        klass = obj["properties"]["class"]  # Class
        
        area = abs((x1 - x2) * (y1 - y2))

        try:
            oc_ratio = obj["properties"]["occlusion_ratio"] # Occolusion ratio
        except:
            oc_ratio = "Undefined"

        # Append to the corresponding list
        img_id_ls.append(img_id) 
        x1_ls.append(x1)
        y1_ls.append(y1)
        x2_ls.append(x2)
        y2_ls.append(y2)
        class_ls.append(klass)
        area_ls.append(area)
        oc_ratio_ls.append(oc_ratio)

    # Rearrange the data into dictionary
    new_true_data = {
        "image_id": img_id_ls,
        "x1": x1_ls,
        "y1": y1_ls,
        "x2": x2_ls,
        "y2": y2_ls,
        "class": class_ls,
        "area": area_ls,
        "occolusion_ratio": oc_ratio_ls
    }
# Wrap the new data as a pandas dataframe
true_df = pd.DataFrame(new_true_data)

true_df[true_df["class"]=="Vehicle"].head()

# Filter by area > 400
selector = true_df["area"] > 400
true_df = true_df[selector]

true_df.head()

In [None]:
# Save the result to csv
true_df.to_csv(f"outputs/{start}_{end}/ground_truths.csv", index=False)

Calculate KPIs (1) - IoU

In [None]:
def calculate_iou(box1, box2):
    box1 = list(box1)
    box2 = list(box2)
    # Coordinates of intersection rectangle
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    
    # Compute area of intersection
    intersection_area = max(0, x2 - x1) * max(0, y2 - y1)
    
    # Compute areas of both bounding boxes
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
    
    # Union area
    union_area = box1_area + box2_area - intersection_area
    
    # IoU calculation
    iou = intersection_area / union_area if union_area != 0 else 0
    return round(iou, 4)

In [None]:
iou_results = {
    "image_id": [],
    "pred_obj": [],
    "true_obj": [],
    "iou": [],
    "occolusion_ratio": [] 
}
for img_id in pred_df["image_id"].unique():
    preds = pred_df[pred_df["image_id"] == img_id]
    truths = true_df[true_df["image_id"] == img_id]

    if len(preds) == 0:
        iou_results["image_id"].append(img_id)
        iou_results["pred_obj"].append(None)
        iou_results["true_obj"].append(None)
        iou_results["iou"].append(0)
        iou_results["occolusion_ratio"].append(None)

        continue

    if len(truths) == 0:
        for pred_idx, pred_obj in preds.iterrows():
            iou_results["image_id"].append(img_id)
            iou_results["pred_obj"].append(pred_idx)
            iou_results["true_obj"].append(None)
            iou_results["iou"].append(0)
            iou_results["occolusion_ratio"].append(None)
        
        continue

    for pred_idx, pred_obj in preds.iterrows():
        # Maintain a list of IoUs
        iou_ls = []

        # Calculate IoU with all ground truth bounding boxes
        for _, true_obj in truths.iterrows():
            iou = calculate_iou(pred_obj.iloc[1:5], true_obj.iloc[1:5])
            iou_ls.append(iou)
        
        
        true_idx = np.argmax(iou_ls)

        # Save the result
        iou_results["image_id"].append(img_id)
        iou_results["pred_obj"].append(pred_idx)
        iou_results["true_obj"].append(true_idx)
        iou_results["iou"].append(max(iou_ls))
        iou_results["occolusion_ratio"].append(truths.iloc[true_idx, -1])

iou_results_df = pd.DataFrame(iou_results)
iou_results_df.head()

In [None]:
# Save the result to csv
iou_results_df.to_csv(f"outputs/{start}_{end}/kpi/iou.csv", index=False)

Summary statistics for the iou results

In [None]:
# Some summary statistics
print("Summary by thresholds: ")
for threshold in np.arange(0, 1, 0.1):
    selector = iou_results_df["iou"] >= threshold
    count = len(iou_results_df[selector])

    print(f"predicted obj with iou >= {threshold:.1f}: {count}")

print("\nSummary by occolusion level:")
for oratio in iou_results_df["occolusion_ratio"].unique():
    selector = (iou_results_df["occolusion_ratio"] == oratio)
    mean = np.mean(iou_results_df[selector]["iou"])

    print(f"mean iou for occlusion ratio {oratio}: {mean:.4f}")

Check the results by visualization

In [None]:
def visualize_bboxes(image_path, pred_bboxes, gt_bboxes):
    """
    Visualize prediction and ground truth bounding boxes on an image.
    
    Args:
        image_path (str): Path to the image file.
        pred_bboxes (list of tuples): List of predicted bounding boxes [(x1, y1, x2, y2), ...].
        gt_bboxes (list of tuples): List of ground truth bounding boxes [(x1, y1, x2, y2), ...].
    """
    # Load the image
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Draw prediction boxes in blue
    for (x1, y1, x2, y2) in pred_bboxes:
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 0, 255), 2)  # Blue for predictions
        cv2.putText(image, 'Pred', (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)
    
    # Draw ground truth boxes in green
    for (x1, y1, x2, y2) in gt_bboxes:
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)  # Green for ground truth
        cv2.putText(image, 'GT', (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
    
    return image

Calculate KPIs (2) - TP, FP, FN, precision and recall

In [None]:


# Define a function to calculate TP
# TP: prediction = car, truth = Vehicle
def cal_kpi(preds: pd.DataFrame, truths: pd.DataFrame):
    ''' 
    This function shall be performed for each image.
    The input preds and truths contains the predicted and the ground-truth objects in an image.
    '''
    tp = 0
    fp = 0
    fn = 0
    tn = 0
    if len(truths) == 0:
        return [0, len(preds), 0, 0, 0, 0, 0]
    if_true_detected = np.zeros(len(truths))   # Maintain a list tracking whether a ground-truth object is detected
    
    # Loop through all predicted objects
    for _, pred_obj in preds.iterrows():
        # Maintain a list of IoUs
        iou_ls = []
        
        # Calculate IoU with all ground truth bounding boxes
        for _, true_obj in truths.iterrows():
            iou = calculate_iou(pred_obj[1:5], true_obj[1:5])
            iou_ls.append(iou)
        
        # Find the best match
        idx = np.argmax(iou_ls) # This one is the index of the iou maxima and groud truth
        iou_max = iou_ls[idx]

        # Select threshold
        oratio = truths.iloc[idx, -1]
        threshold = 0.7 

        if oratio == "Medium":
            threshold = 0.6
        
        if oratio in ["Heavy", "VeryHeavy", "Undefined"]:
            threshold = 0.5

        # Compare with the threshold
        if iou_max >= threshold:
            # Correct detection
            if truths["class"].iloc[idx] == "Vehicle":
                # Correct classification => TP
                tp += 1
            else:
                # Incorrect classification => FP (for the vechile class)
                fp += 1
            # Update the if_true_detected
            if_true_detected[idx] += 1
        else:
            # Incorrect detection of a vehicle => FP
            fp +=1
        
    # Calculate FN: Vehicles in the ground truth not detected
    selector1 = (if_true_detected == 0) # Select objects in the ground truth not detected
    obj_not_detected = truths[selector1]
    selector2 = (obj_not_detected["class"] == "Vehicle") # Select undetected ground truth objects that are vehicle => FN
    car_not_detected = obj_not_detected[selector2]
    fn = len(car_not_detected)
    
    # Calculate TN: Non-vehicle objects in the ground truths not detected
    selector3 = (obj_not_detected["class"] != "Vehicle")
    car_not_detected = obj_not_detected[selector3]
    tn = len(car_not_detected)

    # # Debugging: check the correctness of tp, fp, fn counts
    # # Rule 1: tp + fp = the number of predicted car
    # print(f"if tp + fp = the number of predicted car: {tp + fp == len(preds)}")
    # # Rule 2: tp + fn = the number of ground truth car
    # print(f"if tp + fp = the number of ground truth car: {tp + fp == len(truths[truths["class"] == "Vehicle"])}")

    # Calculate precision, recall, and accurary
    epsilon = 2E-5
    precision = round(tp / (tp + fp + epsilon), 3)
    recall = round(tp / (tp + fn + epsilon), 3)
    accuracy = round((tp + tn)/(tp + tn + fp + fn), 3)
    
    return [tp, fp, fn, tn, precision, recall, accuracy]

In [None]:
# Get all img_ids
img_ids = pred_df["image_id"].unique()
col_names = ["image_id", "tp", "fp", "fn", "tn", "precision", "recall", "accuracy"]
rows = [] 

# Iterate through each img
for img_id in img_ids:
    preds_img = pred_df[pred_df["image_id"]==img_id]
    truths_img = true_df[true_df["image_id"]==img_id]
    rows.append([img_id] + cal_kpi(preds_img, truths_img))

kpi_df = pd.DataFrame(rows, columns=col_names)

kpi_df.head()

In [None]:
# Save to csv
kpi_df.to_csv(f"outputs/{start}_{end}/kpi/kpi.csv", index=False)

In [None]:
print(f"precision\n{kpi_df["precision"].describe()}\n")
print(f"recall\n{kpi_df["recall"].describe()}\n")
print(f"accuracy\n{kpi_df["accuracy"].describe()}\n")


### Overall Summary

In [None]:
iou_df = []
kpi_df = []

starts = [1 + 5000 * i for i in range(10)]

for start in starts:
    end = start + 5000 - 1
    iou_df.append(pd.read_csv(f"outputs/{start}_{end}/kpi/iou.csv"))
    kpi_df.append(pd.read_csv(f"outputs/{start}_{end}/kpi/kpi.csv"))

iou_df = pd.concat(iou_df, axis=0)
kpi_df = pd.concat(kpi_df, axis=0)

In [236]:
print(f"IoU summary: \n{iou_df["iou"].describe()}")
print(f"Precision summary: \n{kpi_df["precision"].describe()}")
print(f"Recall summary: \n{kpi_df["recall"].describe()}\n")
print(f"Accuracy summary: \n{kpi_df["accuracy"].describe()}\n")

IoU summary: 
count    336282.000000
mean          0.685821
std           0.230113
min           0.000000
25%           0.557600
50%           0.737150
75%           0.876200
max           1.000000
Name: iou, dtype: float64
Precision summary: 
count    47152.000000
mean         0.680207
std          0.262376
min          0.000000
25%          0.500000
50%          0.714000
75%          0.875000
max          1.000000
Name: precision, dtype: float64
Recall summary: 
count    47152.000000
mean         0.300220
std          0.182336
min          0.000000
25%          0.176000
50%          0.281000
75%          0.400000
max          1.000000
Name: recall, dtype: float64

Accuracy summary: 
count    47152.000000
mean         0.730965
std          0.151093
min          0.000000
25%          0.638000
50%          0.750000
75%          0.844000
max          1.000000
Name: accuracy, dtype: float64

