In [1]:
import pandas as pd 
from pandas import json_normalize

import matplotlib.pyplot as plt
import plotly.express as px
from datetime import datetime


In [2]:
from PIL import Image, ImageDraw, ImageFont

import os
from tqdm import tqdm  # for showing progress bar
import numpy as np

In [3]:
# Function to convert to Julian date
def to_julian(date):
    # Julian date is the number of days since the Julian epoch (January 1, 4713 BC)
    # Use toordinal() and adjust for the Julian calendar
    return date.to_julian_date()

In [4]:
def get_image_dimensions(image_path):
    """
    Load an image using Pillow to get its dimensions.
    Returns the image width and height.
    """
    with Image.open(image_path) as img:
        width, height = img.size
    return width, height

In [5]:
#NEW FUNCTIONS 

def calculate_iou(box1, box2):
    # Calculate Intersection over Union (IoU)
    x1, y1, x2, y2 = box1
    x1p, y1p, x2p, y2p = box2
    
    # Calculate intersection
    xi1, yi1 = max(x1, x1p), max(y1, y1p)
    xi2, yi2 = min(x2, x2p), min(y2, y2p)
    
    intersection_area = max(0, xi2 - xi1) * max(0, yi2 - yi1)
    box1_area = (x2 - x1) * (y2 - y1)
    box2_area = (x2p - x1p) * (y2p - y1p)
    
    # Calculate union
    union_area = box1_area + box2_area - intersection_area
    iou = intersection_area / union_area if union_area > 0 else 0
    return iou


def load_human_made_boxes(file_path):
    boxes = pd.read_csv(file_path)
    print("Human boxes columns:", boxes.columns)  # Debug print
    
    # Check if the expected columns exist
    required_columns = ['x_min', 'y_min', 'x_max', 'y_max']
    if not all(col in boxes.columns for col in required_columns):
        print(f"Warning: Missing columns in human boxes file {file_path}. Columns found: {boxes.columns}")
        boxes = pd.DataFrame()  # Return an empty DataFrame if necessary columns are missing
        
    return boxes

def load_model_made_boxes(file_path, img_width, img_height):
    boxes = pd.read_csv(file_path)
    print("Model boxes columns:", boxes.columns)  # Debug print
    
    # Check if the expected columns exist
    required_columns = ['x_center_normalised', 'y_center_normalised', 'width_normalised', 'height_normalised', 'confidence']
    if not all(col in boxes.columns for col in required_columns):
        print(f"Warning: Missing columns in model boxes file {file_path}. Columns found: {boxes.columns}")
        boxes = pd.DataFrame()  # Return an empty DataFrame if necessary columns are missing
    
    # Convert normalized to absolute coordinates
    boxes['x_min'] = ((boxes['x_center_normalised'] - boxes['width_normalised'] / 2) * img_width).clip(0, img_width)
    boxes['y_min'] = ((boxes['y_center_normalised'] - boxes['height_normalised'] / 2) * img_height).clip(0, img_height)
    boxes['x_max'] = ((boxes['x_center_normalised'] + boxes['width_normalised'] / 2) * img_width).clip(0, img_width)
    boxes['y_max'] = ((boxes['y_center_normalised'] + boxes['height_normalised'] / 2) * img_height).clip(0, img_height)

    # Optionally, filter based on confidence score if needed
    boxes = boxes[boxes['confidence'] >= 0.5]  # Only use boxes with confidence > 0.5

    return boxes

In [6]:
def evaluate_predictions(iou_threshold):
    """
    Evaluate predictions across all images in the folder.

    Parameters:
        iou_threshold: IoU threshold to consider a prediction as correct.

    Returns:
        A dictionary containing overall precision, recall, and F1 score across all images.
    """
    # List image files
    image_files = [f for f in os.listdir(image_folder) if f.endswith('.jpg')]

    # Initialize metrics counters
    total_model_boxes = 0
    total_human_boxes = 0
    total_model_boxes_matched = 0
    total_human_boxes_matched = 0

    print('Processing images...')
    
    # Process each image
    for image_file in tqdm(image_files, desc="Evaluating images"):
        try:
            # Get image dimensions
            image_path = os.path.join(image_folder, image_file)
            img_width, img_height = get_image_dimensions(image_path)

            # Load human-made bounding boxes
            human_boxes_file = os.path.join(human_boxes_folder, image_file.replace('.jpg', '.csv'))
            human_boxes = load_human_made_boxes(human_boxes_file)
            if human_boxes.empty:
                print(f"Warning: No human boxes found for {image_file}")
                continue

            # Load model-made bounding boxes
            model_boxes_file = os.path.join(model_boxes_folder, image_file + '.csv')
            model_boxes = load_model_made_boxes(model_boxes_file, img_width, img_height)
            if model_boxes.empty:
                print(f"Warning: No model boxes found for {image_file}")
                continue

            # Evaluate metrics for the current image
            human_boxes_matched = set()
            model_boxes_matched = set()

            for model_idx, model_box in model_boxes.iterrows():
                for human_idx, human_box in human_boxes.iterrows():
                    iou = calculate_iou(
                        [human_box['x_min'], human_box['y_min'], human_box['x_max'], human_box['y_max']],
                        [model_box['x_min'], model_box['y_min'], model_box['x_max'], model_box['y_max']]
                    )
                    if iou >= iou_threshold:
                        human_boxes_matched.add(human_idx)
                        model_boxes_matched.add(model_idx)
                        break  # Stop checking once the model box is matched

            # Update overall metrics
            total_model_boxes += len(model_boxes)
            total_human_boxes += len(human_boxes)
            total_model_boxes_matched += len(model_boxes_matched)
            total_human_boxes_matched += len(human_boxes_matched)

        except Exception as e:
            print(f"Error processing {image_file}: {e}")
            continue

    # Calculate overall metrics
    precision = (
        total_model_boxes_matched / total_model_boxes if total_model_boxes > 0 else 0
    )
    recall = (
        total_human_boxes_matched / total_human_boxes if total_human_boxes > 0 else 0
    )
    f1_score = (
        2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0
    )

    return {
        'precision': precision,
        'recall': recall,
        'f1_score': f1_score,
    }


In [7]:
def grid_search_best_f1_score(iou_thresholds):
    """
    Perform a grid search over IoU thresholds to find the best F1 score.
    
    Parameters:
        iou_thresholds: List of IoU threshold values to test.
    
    Returns:
        A dictionary containing the best IoU threshold and the corresponding F1 score.
    """
    best_f1_score = 0
    best_iou_threshold = 0

    # Iterate over all IoU thresholds and calculate the F1 score
    for iou_threshold in iou_thresholds:
        metrics = evaluate_predictions(iou_threshold)  # Using the original evaluate_predictions function
        current_f1_score = metrics['f1_score']
        
        # Update the best IoU threshold if current F1 score is higher
        if current_f1_score > best_f1_score:
            best_f1_score = current_f1_score
            best_iou_threshold = iou_threshold

    return {
        'best_f1_score': best_f1_score,
        'best_iou_threshold': best_iou_threshold
    }


In [11]:
def boxes_metrics_with_errors(iou_threshold, image_folder, human_boxes_folder, model_boxes_folder, output_folder):
    # Ensure output folder exists
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    # List image files
    image_files = [f for f in os.listdir(image_folder) if f.endswith('.jpg')]

    # Create a list to store results for each image
    results = []

    print('Processing images')
    print('')

    # Process each image
    for image_file in image_files:
        # Get image dimensions
        image_path = os.path.join(image_folder, image_file)
        img = Image.open(image_path)
        img_width, img_height = img.size
        
        # Load human-made bounding boxes
        human_boxes_file = os.path.join(human_boxes_folder, image_file.replace('.jpg', '.csv'))
        human_boxes = load_human_made_boxes(human_boxes_file)
        
        # Load model-made bounding boxes (fix the file naming to include `.jpg.csv`)
        model_boxes_file = os.path.join(model_boxes_folder, image_file.replace('.jpg', '.jpg.csv'))
        model_boxes = load_model_made_boxes(model_boxes_file, img_width, img_height)
        
        if human_boxes.empty or model_boxes.empty:
            print(f"Skipping {image_file} due to missing boxes.")
            continue

        # Track matched indices
        human_boxes_matched = set()
        model_boxes_matched = set()

        # Draw bounding boxes on the image
        draw = ImageDraw.Draw(img)

        # Load a larger font for the text (increase the font size here)
        try:
            font = ImageFont.truetype("arial.ttf", size=24)  # Increased font size (24)
        except IOError:
            font = ImageFont.load_default()  # Fallback to default font if arial is not available

        # Draw human-made boxes (red) and add larger text
        for _, human_box in human_boxes.iterrows():
            draw.rectangle([human_box['x_min'], human_box['y_min'], human_box['x_max'], human_box['y_max']], outline='red', width=2)
            # Draw larger text near the box
            draw.text((human_box['x_min'], human_box['y_min'] - 24), "human", fill="red", font=font)  # Adjusted position for larger text

        # Draw model-made boxes (green) and add larger text with confidence
        for _, model_box in model_boxes.iterrows():
            draw.rectangle([model_box['x_min'], model_box['y_min'], model_box['x_max'], model_box['y_max']], outline='green', width=2)
            # Draw text with "model" label
            draw.text((model_box['x_min'], model_box['y_min'] - 24), "model", fill="green", font=font)  # Adjusted position for larger text
            # Draw confidence next to the model label
            confidence_text = f"conf: {model_box['confidence']:.2f}"  # Format confidence to 2 decimal places
            draw.text((model_box['x_min'], model_box['y_min'] - 48), confidence_text, fill="green", font=font)  # Below "model" label

        # Compare each model box with each human box
        for model_idx, model_box in model_boxes.iterrows():
            for human_idx, human_box in human_boxes.iterrows():
                iou = calculate_iou(
                    [human_box['x_min'], human_box['y_min'], human_box['x_max'], human_box['y_max']],
                    [model_box['x_min'], model_box['y_min'], model_box['x_max'], model_box['y_max']]
                )
                if iou >= iou_threshold:
                    human_boxes_matched.add(human_idx)
                    model_boxes_matched.add(model_idx)

        # Identify errors: Missed detections (False Negatives) and False Positives
        missed_detections = human_boxes.loc[~human_boxes.index.isin(human_boxes_matched)]
        false_positives = model_boxes.loc[~model_boxes.index.isin(model_boxes_matched)]

        # Highlight missed detections (False Negatives) in red
        for _, missed_box in missed_detections.iterrows():
            draw.rectangle([missed_box['x_min'], missed_box['y_min'], missed_box['x_max'], missed_box['y_max']], outline='red', width=3)
            draw.text((missed_box['x_min'], missed_box['y_min'] - 24), "missed", fill="red", font=font)

        # Highlight false positives in blue
        for _, false_positive_box in false_positives.iterrows():
            draw.rectangle([false_positive_box['x_min'], false_positive_box['y_min'], false_positive_box['x_max'], false_positive_box['y_max']], outline='blue', width=3)
            draw.text((false_positive_box['x_min'], false_positive_box['y_min'] - 24), "false positive", fill="blue", font=font)

        # Save the image with boxes drawn
        output_image_path = os.path.join(output_folder, image_file)
        img.save(output_image_path)

        # Create a dictionary to store the results
        results.append({
            'image': image_file,
            'missed_detections': missed_detections,
            'false_positives': false_positives,
            'matches': len(human_boxes_matched),
            'image_with_boxes': output_image_path,  # Path to image with boxes
            'human_boxes': human_boxes[['x_min', 'y_min', 'x_max', 'y_max']].values.tolist(),
            'model_boxes': model_boxes[['x_min', 'y_min', 'x_max', 'y_max']].values.tolist()
        })

    # Convert results to a DataFrame
    results_df = pd.DataFrame(results)
    return results_df

# Helper functions
def load_human_made_boxes(file_path):
    boxes = pd.read_csv(file_path)
    print("Human boxes columns:", boxes.columns)  # Debug print
    
    # Check if the expected columns exist
    required_columns = ['x_min', 'y_min', 'x_max', 'y_max']
    if not all(col in boxes.columns for col in required_columns):
        print(f"Warning: Missing columns in human boxes file {file_path}. Columns found: {boxes.columns}")
        boxes = pd.DataFrame()  # Return an empty DataFrame if necessary columns are missing
        
    return boxes

def load_model_made_boxes(file_path, img_width, img_height):
    boxes = pd.read_csv(file_path)
    print("Model boxes columns:", boxes.columns)  # Debug print
    
    # Check if the expected columns exist
    required_columns = ['x_center_normalised', 'y_center_normalised', 'width_normalised', 'height_normalised', 'confidence']
    if not all(col in boxes.columns for col in required_columns):
        print(f"Warning: Missing columns in model boxes file {file_path}. Columns found: {boxes.columns}")
        boxes = pd.DataFrame()  # Return an empty DataFrame if necessary columns are missing
    
    # Convert normalized to absolute coordinates
    boxes['x_min'] = ((boxes['x_center_normalised'] - boxes['width_normalised'] / 2) * img_width).clip(0, img_width)
    boxes['y_min'] = ((boxes['y_center_normalised'] - boxes['height_normalised'] / 2) * img_height).clip(0, img_height)
    boxes['x_max'] = ((boxes['x_center_normalised'] + boxes['width_normalised'] / 2) * img_width).clip(0, img_width)
    boxes['y_max'] = ((boxes['y_center_normalised'] + boxes['height_normalised'] / 2) * img_height).clip(0, img_height)

    # Optionally, filter based on confidence score if needed
    #boxes = boxes[boxes['confidence'] >= 0.5]  # Only use boxes with confidence > 0.5

    return boxes

def calculate_iou(box1, box2):
    # Calculate Intersection over Union (IoU)
    x1, y1, x2, y2 = box1
    x1p, y1p, x2p, y2p = box2
    
    # Calculate intersection
    xi1, yi1 = max(x1, x1p), max(y1, y1p)
    xi2, yi2 = min(x2, x2p), min(y2, y2p)
    
    intersection_area = max(0, xi2 - xi1) * max(0, yi2 - yi1)
    box1_area = (x2 - x1) * (y2 - y1)
    box2_area = (x2p - x1p) * (y2p - y1p)
    
    # Calculate union
    union_area = box1_area + box2_area - intersection_area
    iou = intersection_area / union_area if union_area > 0 else 0
    return iou

# Define paths and call function
iou_threshold = 0.4

output_folder = 'output_images'

results_df = boxes_metrics_with_errors(iou_threshold, image_folder, human_boxes_folder, model_boxes_folder, output_folder)

# Show or save the DataFrame
print(results_df)


NameError: name 'image_folder' is not defined

In [None]:
def create_output_folder(base_folder='output_images', max_run=1000):
    """
    Creates an output folder with a unique name based on the run number.
    The folder name is in the format 'output_images_run_X', where X is the run number.
    
    Parameters:
    - base_folder: The base folder name (default is 'output_images').
    - max_run: The maximum number of attempts to find a unique folder name (to avoid infinite loop).

    Returns:
    - output_folder: The path to the created output folder.
    """
    run_number = 1  # Start from run_1
    output_folder = f"{base_folder}_run_{run_number}"

    # Try to create a folder with a unique name (run_1, run_2, ...)
    while os.path.exists(output_folder):
        run_number += 1
        if run_number > max_run:
            raise Exception(f"Could not find a unique output folder name after {max_run} attempts.")
        output_folder = f"{base_folder}_run_{run_number}"

    # Create the directory if it does not exist
    os.makedirs(output_folder)

    print(f"Created output folder: {output_folder}")
    return output_folder