In [None]:
import json
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import os

Read the predicted annotations and format the data

In [None]:
start = 45001
end = start + 5000 - 1

In [None]:
pred_fp = f"predictions/frames_zod_detections_{start}_{end}.json"
with open(pred_fp, "r") as file:
    pred_data = json.load(file)

In [None]:
# Create containers for attributes
img_id = []
x1_ls = []
y1_ls = []
x2_ls = []
y2_ls = []
class_ls = []
conf_ls = []
area_ls = []

# Loop through the data and collect attribute values
for key, value in pred_data.items():
    for i in value:
        x1, y1, x2, y2 = i["box"]
        cls = "Vehicle"
        conf = i["confidence"]
        area = i["area"]

        # Append to the corresponding list
        img_id.append(key.split("_")[0])
        x1_ls.append(x1)
        y1_ls.append(y1)
        x2_ls.append(x2)
        y2_ls.append(y2) 
        class_ls.append(cls)
        conf_ls.append(conf)
        area_ls.append(area)

# Rearrange the data into dictionary
new_pred_data = {
    "image_id": img_id,
    "x1": x1_ls,
    "y1": y1_ls,
    "x2": x2_ls,
    "y2": y2_ls,
    "class": class_ls,
    "area": area_ls,
    "confidence": conf_ls,
}

# Wrap the new data as a pandas dataframe
pred_df = pd.DataFrame(new_pred_data)
pred_df = pred_df.sort_values("image_id")

pred_df.head(10)

In [None]:
# Save the result to csv
pred_df.to_csv(f"outputs/{start}_{end}/predictions.csv")

Read the ground-truth annotations and format the data as data frame

In [None]:
# Read groud truth data
img_ids = pred_df["image_id"].unique()
true_data = {}
for img_id in img_ids:
    fp = f"single_frames/{img_id}/annotations/object_detection.json"
    with open(fp, "r") as file:
        true_data[img_id] = json.load(file)

In [None]:
# Ground truth attributes needed: coordinates, class, area and occolusion ratio
# Create container for attributes
img_id_ls = []
x1_ls = []
y1_ls = []
x2_ls = []
y2_ls = []
class_ls = []
area_ls = []
oc_ratio_ls = []

for img_id, img_annots in true_data.items():
    for obj in img_annots:
        px = [p[0] for p in obj["geometry"]["coordinates"]] # Coordinates
        py = [p[1] for p in obj["geometry"]["coordinates"]]
        x1, y1, x2, y2 = int(min(px)), int(min(py)), int(max(px)), int(max(py))
            
        klass = obj["properties"]["class"]  # Class
        
        area = abs((x1 - x2) * (y1 - y2))

        try:
            oc_ratio = obj["properties"]["occlusion_ratio"] # Occolusion ratio
        except:
            oc_ratio = "Undefined"

        # Append to the corresponding list
        img_id_ls.append(img_id) 
        x1_ls.append(x1)
        y1_ls.append(y1)
        x2_ls.append(x2)
        y2_ls.append(y2)
        class_ls.append(klass)
        area_ls.append(area)
        oc_ratio_ls.append(oc_ratio)

    # Rearrange the data into dictionary
    new_true_data = {
        "image_id": img_id_ls,
        "x1": x1_ls,
        "y1": y1_ls,
        "x2": x2_ls,
        "y2": y2_ls,
        "class": class_ls,
        "area": area_ls,
        "occolusion_ratio": oc_ratio_ls
    }
# Wrap the new data as a pandas dataframe
true_df = pd.DataFrame(new_true_data)

true_df[true_df["class"]=="Vehicle"].head()

In [None]:
# Save the result to csv
true_df.to_csv(f"outputs/{start}_{end}/ground_truths.csv")

Bounding Box Visualization

In [None]:
def visualize_bboxes(image_path, pred_bboxes, gt_bboxes):
    """
    Visualize prediction and ground truth bounding boxes on an image.
    
    Args:
        image_path (str): Path to the image file.
        pred_bboxes (list of tuples): List of predicted bounding boxes [(x1, y1, x2, y2), ...].
        gt_bboxes (list of tuples): List of ground truth bounding boxes [(x1, y1, x2, y2), ...].
    """
    # Load the image
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Draw prediction boxes in blue
    for (x1, y1, x2, y2) in pred_bboxes:
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 0, 255), 2)  # Blue for predictions
        cv2.putText(image, 'Pred', (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)
    
    # Draw ground truth boxes in green
    for (x1, y1, x2, y2) in gt_bboxes:
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)  # Green for ground truth
        cv2.putText(image, 'GT', (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
    
    return image

Post Processing

In [None]:
'''
Check the range of predicted vehicle area
Decide whether a filtering is needed for groud truth objects
'''
start_ls = [i*5000+1 for i in range(10)]
all_areas_pred = []

for start in start_ls:
    end = start + 5000 - 1
    pred_fp = f"outputs/{start}_{end}/predictions.csv"
    pred_df = pd.read_csv(pred_fp)
    all_areas_pred.append(pred_df["area"])

all_areas_pred = pd.concat(all_areas_pred)

In [None]:
print(all_areas_pred.describe())

In [None]:
plt.hist(np.log1p(all_areas_pred), bins=20, edgecolor="black")
plt.xlabel("log(1 + Area)")
plt.ylabel("Frequency")
plt.title("Histogram with Log Transformation")
plt.show()

In [None]:
'''
If 400 is used as the filtering criteria, check how many ground truth objects will be removed (count: 1398989).
In addition, check how many vehicles will be removed (count: 124939). 
'''
start_ls = [i*5000+1 for i in range(10)]
all_areas_pred = []

for start in start_ls:
    end = start + 5000 - 1
    pred_fp = f"outputs/{start}_{end}/ground_truths.csv"
    pred_df = pd.read_csv(pred_fp)
    pred_df = pred_df[pred_df["class"]=="Vehicle"]
    all_areas_pred.append(pred_df["area"])

all_areas_pred = pd.concat(all_areas_pred)

threshold = 400
count = (all_areas_pred < threshold).sum()
print(count)

In [180]:
# Do removal
start_ls = [i*5000+1 for i in range(10)]

for start in start_ls:
    end = start + 5000 - 1
    pred_fp = f"outputs/{start}_{end}/ground_truths.csv"
    pred_df = pd.read_csv(pred_fp)
    selector = pred_df["area"] > 400
    pred_df = pred_df[selector]
    pred_df.to_csv(f"outputs/{start}_{end}/ground_truths_less.csv")

Calculate KPIs (1) - IoU

In [None]:
def calculate_iou(box1, box2):
    box1 = list(box1)
    box2 = list(box2)
    # Coordinates of intersection rectangle
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    
    # Compute area of intersection
    intersection_area = max(0, x2 - x1) * max(0, y2 - y1)
    
    # Compute areas of both bounding boxes
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
    
    # Union area
    union_area = box1_area + box2_area - intersection_area
    
    # IoU calculation
    iou = intersection_area / union_area if union_area != 0 else 0
    return round(iou, 4)

In [None]:
# Test cases
print(calculate_iou([0, 0, 2, 2], [-0.5, -0.5, 1.5, 1.5]))

Calculate iou for the selected 5000 predictions

In [None]:
iou_results = {
    "image_id": [],
    "pred_obj": [],
    "true_obj": [],
    "iou": [],
    "occolusion_ratio": [] 
}
for img_id in pred_df["image_id"].unique():
    preds = pred_df[pred_df["image_id"] == img_id]
    truths = true_df[true_df["image_id"] == img_id]

    for pred_idx, pred_obj in preds.iterrows():
        # Maintain a list of IoUs
        iou_ls = []

        # Calculate IoU with all ground truth bounding boxes
        for _, true_obj in truths.iterrows():
            iou = calculate_iou(pred_obj.iloc[1:5], true_obj.iloc[1:5])
            iou_ls.append(iou)
        
        true_idx = np.argmax(iou_ls)

        # Save the result
        iou_results["image_id"].append(img_id)
        iou_results["pred_obj"].append(pred_idx)
        iou_results["true_obj"].append(true_idx)
        iou_results["iou"].append(max(iou_ls))
        iou_results["occolusion_ratio"].append(truths.iloc[true_idx, -1])

iou_results_df = pd.DataFrame(iou_results)
iou_results_df.head()

In [None]:
# Save the result to csv
iou_results_df.to_csv(f"outputs/{start}_{end}/kpi/iou.csv")

Summary statistics for the iou results

In [None]:
# Some summary statistics
print("Summary by thresholds: ")
for threshold in np.arange(0, 1, 0.1):
    selector = iou_results_df["iou"] >= threshold
    count = len(iou_results_df[selector])

    print(f"predicted obj with iou >= {threshold:.1f}: {count}")

print("\nSummary by occolusion level:")
for oratio in iou_results_df["occolusion_ratio"].unique():
    selector = (iou_results_df["occolusion_ratio"] == oratio)
    mean = np.mean(iou_results_df[selector]["iou"])

    print(f"mean iou for occlusion ratio {oratio}: {mean:.4f}")

Check the results by visualization

In [None]:
img_id = "012120"

# Search for the image path
directory = f"single_frames_img/{img_id}/camera_front_blur"
img_path = os.path.join(directory, os.listdir(directory)[0])

selector1 = pred_df["image_id"]==img_id
pred_bboxes = pred_df[selector1].iloc[:, 1:5].to_numpy()

selector2 = (true_df["image_id"]==img_id) & (true_df["class"]=="Vehicle")
gt_bboxes = true_df[selector2].iloc[:, 1:5].to_numpy()

bbox_image = visualize_bboxes(img_path, pred_bboxes, gt_bboxes)

plt.figure(figsize=(15, 15))
plt.imshow(bbox_image)
plt.axis('off')
plt.show()

Calculate KPIs (2) - TP, FP, FN, precision and recall

In [None]:


# Define a function to calculate TP
# TP: prediction = car, truth = Vehicle
def cal_kpi(preds: pd.DataFrame, truths: pd.DataFrame, threshold):
    ''' 
    This function shall be performed for each image.
    The input preds and truths contains the predicted and the ground-truth objects in an image.
    '''
    tp = 0
    fp = 0
    fn = 0
    if_true_detected = np.zeros(len(truths))   # Maintain a list tracking whether a ground-truth object is detected
    
    # Loop through all predicted objects
    for _, pred_obj in preds.iterrows():
        # Maintain a list of IoUs
        iou_ls = []
        
        # Calculate IoU with all ground truth bounding boxes
        for _, true_obj in truths.iterrows():
            iou = calculate_iou(pred_obj[1:5], true_obj[1:5])
            iou_ls.append(iou)
        
        # Find the best match
        idx = np.argmax(iou_ls) # This one is the index of the iou maxima and groud truth
        iou_max = iou_ls[idx]

        # Compare with the threshold
        if iou_max >= threshold:
            # Correct detection
            if truths["class"].iloc[idx] == "Vehicle":
                # Correct classification => TP
                tp += 1
            else:
                # Incorrect classification => FP (for the vechile class)
                fp += 1
            # Update the if_true_detected
            if_true_detected[idx] += 1
        else:
            # Incorrect detection of a vehicle => FP
            fp +=1
        
    # Calculate FN: Vehicles in the ground truth not detected
    selector1 = (if_true_detected == 0) # Select objects in the ground truth not detected
    obj_not_detected = truths[selector1]
    selector2 = (obj_not_detected["class"] == "Vehicle") # Select undetected ground truth objects that are vehicle => FN
    car_not_detected = obj_not_detected[selector2]
    fn = len(car_not_detected)
    
    # # Debugging: check the correctness of tp, fp, fn counts
    # # Rule 1: tp + fp = the number of predicted car
    # print(f"if tp + fp = the number of predicted car: {tp + fp == len(preds)}")
    # # Rule 2: tp + fn = the number of ground truth car
    # print(f"if tp + fp = the number of ground truth car: {tp + fp == len(truths[truths["class"] == "Vehicle"])}")

    # Calculate precision, recall, and accurary
    epsilon = 2E-5
    precision = round(tp / (tp + fp + epsilon), 3)
    recall = round(tp / (tp + fn + epsilon), 3)
    
    return tp, fp, fn, precision, recall

In [None]:
# Get all img_ids
img_ids = pred_df["image_id"].unique()
col_names = ["image_id", "tp", "fp", "fn", "precision", "recall"]
rows = [] 

# Iterate through each img
for img_id in img_ids:
    preds_img = pred_df[pred_df["image_id"]==img_id]
    truths_img = true_df[true_df["image_id"]==img_id]
    tp, fp, fn, precision, recall = cal_kpi(preds_img, truths_img, threshold=0.75)
    # print(f"id: {img_id}, tp: {tp} fp: {fp}, fn: {fn}, precision: {precision}, recall: {recall}")
    rows.append([img_id, tp, fp, fn, precision, recall])

kpi_df = pd.DataFrame(rows, columns=col_names)

kpi_df.head()

In [None]:
# Save to csv
kpi_df.to_csv(f"outputs/{start}_{end}/kpi/kpi_threshold_0_75.csv")

In [None]:
selector = (kpi_df["precision"] >= 0.5)
print(f"{len(kpi_df[selector])} / {len(kpi_df)}")

selector = (kpi_df["recall"] >= 0.5)
print(f"{len(kpi_df[selector])} / {len(kpi_df)}")

Note:
1. Have a summary of kpis (mean)
2. Add accuracy
3. Check the training data to account for low recall.
4. Set a higher priority on closer (larger box or distance)