In [None]:
# Import all necessary python packages
import boto3
from botocore.exceptions import ClientError
from google.cloud import vision, storage
from google.oauth2 import service_account
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import VisualFeatureTypes
from msrest.authentication import CognitiveServicesCredentials
from azure.storage.blob import BlobServiceClient, ContainerClient, BlobClient, StandardBlobTier, PublicAccess
import torch
import io, json
import cv2
import os, shutil
import random
import time
from ultralytics import YOLO
import matplotlib.pyplot as plt
from datetime import datetime
import json
import numpy as np
from IPython.display import Image
import psutil, gpustat, cpuinfo
import PIL

yolov5_model = YOLO('yolov5s.pt')
yolov8_model = YOLO('yolov8s.pt')

In [None]:
# Specify the folder containing the images and bbox data
dataset_name = "Roboflow, Self Driving Car Image Dataset"
original_image_folder = "C:\\Users\\feren\\Downloads\\Self Driving Car.v2-fixed-large.coco\\OUTPUT\\train"

#dataset_name = "NuImages"
#original_image_folder = "C:\\Users\\feren\\Downloads\\nuimages_output\\train"

image_folder = "temp_images"
output_path = "G:\\My Drive\\Thesis2024\\output"

# Specify the desired Y-axis resolution
desired_y_resolution = 1200 #480 = VGA, 1200 = max

# Batch size for processing images
batch_size = 16
inference_limit = 200
request_time_limiter_ms = 0
input_images_number = 200

GOOGLE_MAX_RESULTS = 100 # no threshold setting
REKOGNITION_CONFIDENCE_THRESHOLD = 0 # in percentage
YOLOV5_CONFIDENCE_THRESHOLD = 0.0
YOLOV8_CONFIDENCE_THRESHOLD = 0.0

In [None]:
# Change image resolution

# Delete the image_folder and its contents if it exists
if os.path.exists(image_folder):
    shutil.rmtree(image_folder)
# Create the image_folder directory if it doesn't exist
os.makedirs(image_folder, exist_ok=True)
# Iterate over the images in the original folder
for image_file in os.listdir(original_image_folder):
    # Check if the file is an image
    if image_file.endswith(".png") or image_file.endswith(".jpg"):
        # Open the image using PIL
        image_path = os.path.join(original_image_folder, image_file)
        image = PIL.Image.open(image_path)

        # Get the original image dimensions
        original_width, original_height = image.size

        # Calculate the new width while maintaining the aspect ratio
        new_width = int((desired_y_resolution / original_height) * original_width)

        # Resize the image
        resized_image = image.resize((new_width, desired_y_resolution))

        # Save the resized image to the image_folder
        out_p = os.path.join(image_folder, image_file)
        resized_image.save(out_p)
    elif image_file.endswith(".json"):
        # Copy the JSON file to the image_folder
        shutil.copy2(os.path.join(original_image_folder, image_file), image_folder)

In [None]:
# Set up all API clients

# Amazon Rekognition
rekognition_client = boto3.client(
    "rekognition",
    aws_access_key_id=AWS_ACCESS_KEY_ID,
    aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
    region_name=AWS_REGION
)
s3_client = boto3.client(
    "s3",
    aws_access_key_id=AWS_ACCESS_KEY_ID,
    aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
    region_name=AWS_REGION
)

# Google Vision and Cloud Storage
credentials = service_account.Credentials.from_service_account_file(GOOGLE_KEY_PATH)
google_client = vision.ImageAnnotatorClient(credentials=credentials)
storage_client = storage.Client(credentials=credentials, project=GOOGLE_PROJECT_ID)

# Microsoft Azure
azure_client = ComputerVisionClient(MICROSOFT_ENDPOINT, CognitiveServicesCredentials(MICROSOFT_SUBSCRIPTION_KEY))


In [None]:
# Amazon Rekognition API
def detect_objects_with_rekognition(image_paths: list) -> list:
    bucket_name = "fankfurteucentralrekognition"  # export this later
    responses = []

    # Upload all images to S3
    for image_path in image_paths:
        object_key = os.path.basename(image_path)
        start_upload_time = time.time()
        s3_client.upload_file(image_path, bucket_name, object_key)
        upload_time = (time.time() - start_upload_time) * 1000  # Convert to milliseconds
        responses.append({
            "image_path": image_path,
            "object_key": object_key,
            "upload_time": upload_time
        })

    # Perform inference on all images
    for response in responses:
        try:
            start_time = time.time()
            rekognition_response = rekognition_client.detect_labels(
                Image={"S3Object": {"Bucket": bucket_name, "Name": response["object_key"]}},
                MinConfidence=REKOGNITION_CONFIDENCE_THRESHOLD
            )
            inference_time = (time.time() - start_time) * 1000  # Convert to milliseconds
            rekognition_response["inference_time"] = inference_time
            rekognition_response["upload_time"] = response["upload_time"]
            response["rekognition_response"] = rekognition_response
        except ClientError as e:
            print("Error: {}".format(e))
            response["rekognition_response"] = {}

        # Clean up the S3 object
        s3_client.delete_object(Bucket=bucket_name, Key=response["object_key"])

    return responses

def convert_rekognition_output(rekognition_outputs: list) -> list:
    converted_outputs = []

    for rekognition_output in rekognition_outputs:
        if "rekognition_response" not in rekognition_output:
            print(f"Missing rekognition_response in output for image {rekognition_output.get('image_path', 'unknown')}")
            continue
        objects = []
        for label in rekognition_output["rekognition_response"].get("Labels", []):
            for instance in label.get("Instances", []):
                bbox = instance["BoundingBox"]
                objects.append({
                    "class_name": label["Name"],
                    "bbox": [bbox["Left"], bbox["Top"], bbox["Width"], bbox["Height"]],
                    "confidence": instance["Confidence"] / 100
                })

        converted_outputs.append({
            "image_id": rekognition_output["image_path"],
            "objects": objects,
            "inference_time": rekognition_output["rekognition_response"]["inference_time"],
            "upload_time": rekognition_output["rekognition_response"]["upload_time"]
        })

    return converted_outputs


In [None]:
# Google Vision
def detect_objects_with_google_vision(image_paths: list) -> list:
    bucket_name = "objectdetectionbucket_szakdoga"
    responses = []

    # Batch upload all images to Google Cloud Storage
    bucket = storage_client.bucket(bucket_name)
    blobs = []
    start_upload_time = time.time()
    for image_path in image_paths:
        blob_name = os.path.basename(image_path)
        blob = bucket.blob(blob_name)
        blob.upload_from_filename(image_path)
        blobs.append(blob)
    total_upload_time = (time.time() - start_upload_time) * 1000  # Convert to milliseconds
    average_upload_time = total_upload_time / len(image_paths)  # Average upload time per image

    for blob in blobs:
        responses.append({
            "image_path": blob.name,
            "blob_name": blob.name,
            "upload_time": average_upload_time
        })

    # Perform batch inference on all images
    image_requests = [
        vision.AnnotateImageRequest(
            image=vision.Image(source=vision.ImageSource(gcs_image_uri=f"gs://{bucket_name}/{blob.name}")),
            features=[vision.Feature(type=vision.Feature.Type.OBJECT_LOCALIZATION, max_results=GOOGLE_MAX_RESULTS)]
        )
        for blob in blobs
    ]

    start_time = time.time()
    vision_responses = google_client.batch_annotate_images(requests=image_requests)
    total_inference_time = (time.time() - start_time) * 1000  # Convert to milliseconds
    average_inference_time = total_inference_time / len(image_paths)  # Average inference time per image

    for response, vision_response in zip(responses, vision_responses.responses):
        response["vision_response"] = {"response": vision_response, "inference_time": average_inference_time}

        # Clean up the blob
        blob = bucket.blob(response["blob_name"])
        blob.delete()

    return responses

def convert_google_vision_output(google_vision_outputs: list) -> list:
    converted_outputs = []

    for google_vision_output in google_vision_outputs:
        if "vision_response" not in google_vision_output:
            print(f"Missing vision_response in output for image {google_vision_output.get('image_path', 'unknown')}")
            continue
        response = google_vision_output["vision_response"]["response"]
        objects = []

        for object_ in response.localized_object_annotations:
            vertices = object_.bounding_poly.normalized_vertices
            objects.append({
                "class_name": object_.name,
                "bbox": [vertices[0].x, vertices[0].y, vertices[2].x - vertices[0].x, vertices[2].y - vertices[0].y],
                "confidence": object_.score
            })

        converted_outputs.append({
            "image_id": google_vision_output["image_path"],
            "objects": objects,
            "inference_time": google_vision_output["vision_response"]["inference_time"],
            "upload_time": google_vision_output["upload_time"]
        })

    return converted_outputs


In [None]:
# Microsoft Azure
def detect_objects_with_azure(image_paths: list) -> list:
    container_name = "container1"  # export this later
    responses = []

    # Upload all images to Azure Blob Storage
    blob_service_client = BlobServiceClient.from_connection_string("CENSORED")
    container_client = blob_service_client.get_container_client(container_name)
    for image_path in image_paths:
        blob_name = os.path.basename(image_path)
        start_upload_time = time.time()
        with open(image_path, "rb") as data:
            blob_client = container_client.get_blob_client(blob_name)
            blob_client.upload_blob(data, overwrite=True)
        upload_time = (time.time() - start_upload_time) * 1000  # Convert to milliseconds
        responses.append({
            "image_path": image_path,
            "blob_name": blob_name,
            "upload_time": upload_time
        })

    # Perform inference on all images
    for response in responses:
        image_url = f"https://{blob_service_client.account_name}.blob.core.windows.net/{container_name}/{response[blob_name]}"
        start_time = time.time()
        analysis = azure_client.analyze_image(
            image_url,
            visual_features=[VisualFeatureTypes.objects],
            detection_model="detection_03",
        )
        inference_time = (time.time() - start_time) * 1000  # Convert to milliseconds
        response["azure_response"] = {"analysis": analysis.as_dict(), "inference_time": inference_time}

        # Clean up the blob
        container_client.delete_blob(response[blob_name])

    return responses

def convert_azure_output(azure_outputs: list) -> list:
    converted_outputs = []

    for azure_output in azure_outputs:
        if "azure_response" not in azure_output:
            print(f"Missing azure_response in output for image {azure_output.get('image_path', 'unknown')}")
            continue
        image = cv2.imread(azure_output["image_path"])
        if image is None:
            print(f"Could not read image for {azure_output.get('image_path', 'unknown')}")
            continue

        image_height, image_width, _ = image.shape

        objects = []
        for object_ in azure_output["azure_response"]["analysis"].get("objects", []):
            bbox = object_["rectangle"]
            normalized_bbox = [
                bbox["x"] / image_width,
                bbox["y"] / image_height,
                bbox["w"] / image_width,
                bbox["h"] / image_height
            ]
            objects.append({
                "class_name": object_["object_property"],
                "bbox": normalized_bbox,
                "confidence": object_["confidence"]
            })

        converted_outputs.append({
            "image_id": azure_output["image_path"],
            "objects": objects,
            "inference_time": azure_output["azure_response"]["inference_time"],
            "upload_time": azure_output["upload_time"]
        })

    return converted_outputs


In [None]:
# YOLOv5
def detect_objects_with_yolov5(image_paths: list) -> list:
    image = cv2.imread(image_paths[0])
    
    results = yolov5_model(image_paths, conf=YOLOV5_CONFIDENCE_THRESHOLD, imgsz=image.shape[:2])
    outputs = []
    for idx, result in enumerate(results):
        outputs.append({
            "results": result,
            "inference_time": result.speed['inference']  # Retrieve inference time from YOLOv5 output
        })
    return outputs


def convert_yolov5_output(yolov5_outputs: list) -> list:
    converted_outputs = []

    for yolov5_output in yolov5_outputs:
        try:
            if "results" not in yolov5_output:
                print(f"Missing results in output for image {yolov5_output.get('image_path', 'unknown')}")
                continue

            # Extract image path from the results object
            results = yolov5_output["results"]
            print(results)
            image_path = results.path if hasattr(results, 'path') else "unknown"
            if image_path == "unknown":
                print(f"Image path not found in YOLOv5 output: {yolov5_output}")

            image = cv2.imread(image_path)
            if image is None:
                print(f"Could not read image for {image_path}")
                continue

            image_height, image_width, _ = image.shape
            objects = []

            for object_ in results.boxes.data.cpu().numpy():
                xyxy = object_[:4]
                conf = object_[4]
                class_id = int(object_[5])

                normalized_bbox = [
                    xyxy[0] / image_width,
                    xyxy[1] / image_height,
                    (xyxy[2] - xyxy[0]) / image_width,
                    (xyxy[3] - xyxy[1]) / image_height
                ]

                objects.append({
                    "class_name": results.names[class_id],
                    "bbox": normalized_bbox,
                    "confidence": conf
                })

            converted_outputs.append({
                "image_id": image_path,
                "objects": objects,
                "inference_time": yolov5_output.get("inference_time", 0),
                "upload_time": 0  # Add a default upload time of 0 for local solutions
            })
        except Exception as e:
            print(f"Error processing YOLOv5 output for image: {image_path} - {str(e)}")

    return converted_outputs


In [None]:
# YOLOv8
def detect_objects_with_yolov8(image_paths: list) -> list:
    image = cv2.imread(image_paths[0])
    
    results = yolov8_model(image_paths, conf=YOLOV8_CONFIDENCE_THRESHOLD, imgsz=image.shape[:2])
    print(results)
    outputs = []
    for idx, result in enumerate(results):
        outputs.append({
            "results": result,
            "inference_time": result.speed['inference']  # Retrieve inference time from YOLOv8 output
        })
    return outputs


def convert_yolov8_output(yolov8_outputs: list) -> list:
    converted_outputs = []

    for yolov8_output in yolov8_outputs:
        try:
            if "results" not in yolov8_output:
                print(f"Missing results in output for image {yolov8_output.get('image_path', 'unknown')}")
                continue

            # Extract image path from the results object
            results = yolov8_output["results"]
            image_path = results.path if hasattr(results, 'path') else "unknown"
            if image_path == "unknown":
                print(f"Image path not found in YOLOv8 output: {yolov8_output}")

            image = cv2.imread(image_path)
            if image is None:
                print(f"Could not read image for {image_path}")
                continue

            image_height, image_width, _ = image.shape
            objects = []

            for object_ in results.boxes.data.cpu().numpy():
                xyxy = object_[:4]
                conf = object_[4]
                class_id = int(object_[5])

                normalized_bbox = [
                    xyxy[0] / image_width,
                    xyxy[1] / image_height,
                    (xyxy[2] - xyxy[0]) / image_width,
                    (xyxy[3] - xyxy[1]) / image_height
                ]

                objects.append({
                    "class_name": results.names[class_id],
                    "bbox": normalized_bbox,
                    "confidence": conf
                })

            converted_outputs.append({
                "image_id": image_path,
                "objects": objects,
                "inference_time": yolov8_output.get("inference_time", 0),
                "upload_time": 0  # Add a default upload time of 0 for local solutions
            })
        except Exception as e:
            print(f"Error processing YOLOv8 output for image: {image_path} - {str(e)}")

    return converted_outputs


In [None]:
def sort_bounding_boxes(output_dict):
    objects = output_dict["objects"]
    objects.sort(key=lambda x: x["bbox"][2] * x["bbox"][3], reverse=True)
    return output_dict

In [None]:
def inferenceFromAllSources(image_paths: list) -> list:
    try:
        print("Calling detect on Google...")
        google_vision_responses = detect_objects_with_google_vision(image_paths)
        print("Google responses received")
        '''
        print("Calling detect on Azure...")
        #azure_responses = detect_objects_with_azure(image_paths)
        print("Azure responses received")

        print("Calling detect on AWS...")
        #rekognition_responses = detect_objects_with_rekognition(image_paths)
        print("AWS responses received")

        print("Calling detect on YOLOv5...")
        yolov5_responses = detect_objects_with_yolov5(image_paths)
        print("YOLOv5 responses received")

        print("Calling detect on YOLOv8...")
        yolov8_responses = detect_objects_with_yolov8(image_paths)
        print("YOLOv8 responses received")
        '''

        print("Calling conversion on Google...")
        google_vision_outputs = convert_google_vision_output(google_vision_responses)
        print("Google conversion completed")
        '''

        print("Calling conversion on Azure...")
        #azure_outputs = convert_azure_output(azure_responses)
        print("Azure conversion completed")

        print("Calling conversion on AWS...")
        #rekognition_outputs = convert_rekognition_output(rekognition_responses)
        print("AWS conversion completed")

        print("Calling conversion on YOLOv5...")
        yolov5_outputs = convert_yolov5_output(yolov5_responses)
        print("YOLOv5 conversion completed")

        print("Calling conversion on YOLOv8...")
        yolov8_outputs = convert_yolov8_output(yolov8_responses)
        print("YOLOv8 conversion completed")

        '''
        combined_responses = []
        for idx in range(len(image_paths)):
            try:
                print(f"Combining results for image {image_paths[idx]}")
                combined_responses.append({
                    "google": sort_bounding_boxes(google_vision_outputs[idx]),
                    #"azure": sort_bounding_boxes(azure_outputs[idx]),
                    #"rekognition": sort_bounding_boxes(rekognition_outputs[idx]),
                    #"yolov5": sort_bounding_boxes(yolov5_outputs[idx]),
                    #"yolov8": sort_bounding_boxes(yolov8_outputs[idx])
                })
            except KeyError as e:
                print(f"Missing key in response for image {image_paths[idx]}: {e}")
                continue

        return combined_responses
    except Exception as e:
        print(f"Error in inferenceFromAllSources: {e}")
        raise e


In [None]:
def print_dict(data, indent_level=0):
    indent = "  " * indent_level
    if isinstance(data, dict):
        for key, value in data.items():
            if isinstance(value, dict):
                print(f"{indent}{key}:")
                print_dict(value, indent_level + 1)
            elif isinstance(value, list) and all(isinstance(item, dict) for item in value):
                print(f"{indent}{key}:")
                for item in value:
                    print(f"{indent}  -")
                    print_dict(item, indent_level + 2)
            else:
                print(f"{indent}{key}: {value}")
    else:
        print(f"{indent}{data}")

In [None]:
def draw_bounding_boxes(image_path, objects):
    image = cv2.imread(image_path)

    # Create a dictionary to store colors for each class
    class_colors = {}

    for obj in objects:
        class_name = obj["class_name"]
        bbox = obj["bbox"]

        # Generate a random color for each class if not already assigned
        if class_name not in class_colors:
            class_colors[class_name] = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))

        color = class_colors[class_name]

        # Convert normalized coordinates to pixel coordinates
        height, width, _ = image.shape
        bbox = [int(bbox[0] * width), int(bbox[1] * height), int(bbox[2] * width), int(bbox[3] * height)]

        # Draw bounding box rectangle
        cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), color, 2)

        # Put class name text above the rectangle
        cv2.putText(image, class_name, (bbox[0], bbox[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)

    # Convert the image to a format compatible with display()
    _, encoded_image = cv2.imencode('.png', image)
    display(Image(data=encoded_image.tobytes()))

In [None]:
# This gets the image file names, necessary to run
# Load the JSON file containing the bbox data
with open(os.path.join(image_folder, "labels.json"), "r") as file:
    original_bbox_data = json.load(file)

# Get a list of image file names
image_files = [file for file in os.listdir(image_folder) if file.endswith(".png") or file.endswith(".jpg")]

# Randomly select an image file
selected_image = random.choice(image_files)
image_path = os.path.join(image_folder, selected_image)

# Find the corresponding bbox data for the selected image
selected_bbox_data = next((data for data in original_bbox_data if data["image_id"] == selected_image), None)
image = cv2.imread(image_path)

# Convert the image to a format compatible with display()
_, encoded_image = cv2.imencode('.png', image)
display(Image(data=encoded_image.tobytes()))

In [None]:
def convert_class_names(objects):
    # Dictionary to map alternative class names to a common class name
    class_mapping = {
        "car": ["automobile", "taxi", "vehicle", "suv", "jeep", "sedan", "van", "land vehicle", "vehicle.car", "vehicle.emergency.police"],
        "truck": ["truck", "lorry", "bus", "shuttle bus", "pickup truck", "vehicle.truck", "vehicle.bus.bendy", "vehicle.bus.rigid", "vehicle.trailer", "vehicle.construction", "vehicle.emergency.ambulance"],
        "person": ["person", "pedestrian", "human", 
                   "human.pedestrian.adult", "human.pedestrian.child", "human.pedestrian.construction_worker", 
                   "human.pedestrian.personal_mobility", "human.pedestrian.police_officer", "human.pedestrian.stroller", 
                   "human.pedestrian.wheelchair"],
        "biker": ["bicycle", "bike", "biker", "motorcycle", "motorbike", "vehicle.bicycle", "vehicle.motorcycle"],
        # Add more mappings as needed
    }

    converted_objects = []
    for obj in objects:
        class_name = obj["class_name"].lower()  # Convert class name to lowercase
        obj["class_name"] = class_name  # Store the lowercase class name back in the original object
        for common_name, alt_names in class_mapping.items():
            if class_name == common_name or class_name in alt_names:
                obj["class_name"] = common_name
                converted_objects.append(obj)
                break
        # Remove the following line to delete objects not found in the class_mapping dictionary:
        # else: converted_objects.append(obj)

    return converted_objects


In [None]:
#----RUN INFERENCE SEQUENTIALLY ON IMAGES

# List to store the inference results for all images
all_inference_results = []

# Randomly select input_images_number of images from image_files
selected_images = random.sample(image_files, input_images_number)
selected_image_paths = [os.path.join(image_folder, image_file) for image_file in selected_images]

# Initialize variables for failsafe and time limiting
inference_count = 0
last_request_time = 0

# Process images in batches
for i in range(0, len(selected_image_paths), batch_size):
    batch_paths = selected_image_paths[i:i + batch_size]
    
    # Check if the inference limit has been reached
    if inference_count >= inference_limit:
        print("Inference limit reached. Stopping the loop.")
        break

    # Check if enough time has passed since the last request
    current_time = time.time()
    elapsed_time = (current_time - last_request_time) * 1000  # Convert to milliseconds
    if elapsed_time < request_time_limiter_ms:
        # Wait for the remaining time to reach the request time limit
        time.sleep((request_time_limiter_ms - elapsed_time) / 1000)

    # Update the last request time
    last_request_time = time.time()

    try:
        # Call the inferenceFromAllSources function for the current batch
        batch_inference_result = inferenceFromAllSources(batch_paths)
        
        # Append the inference result to the list
        all_inference_results.extend(batch_inference_result)

        # Increment the inference count by the batch size
        inference_count += len(batch_paths)

        print(f"Inference completed for batch starting with {batch_paths[0]}. Total inference count: {inference_count}")
    except Exception as e:
        print(f"Error occurred during inference for batch starting with {batch_paths[0]}: {str(e)}")
        continue

# Print the number of images processed
print(f"Inference completed for {len(all_inference_results)} images.")
print_dict(all_inference_results)


In [None]:
from shapely.geometry import box
from shapely.ops import unary_union

def calculate_iou(inferred_objects, original_objects):
    def merge_boxes(boxes):
        polygons = [box(b[0], b[1], b[0] + b[2], b[1] + b[3]) for b in boxes]
        merged = unary_union(polygons)
        if merged.geom_type == 'Polygon':
            merged = [merged]
        return merged

    def calculate_area(polygons):
        return sum(p.area for p in polygons)
    
    inferred_areas = {}
    original_areas = {}
    
    total_inferred_area = 0
    total_original_area = 0

    for inf_obj in inferred_objects:
        class_name = inf_obj['class_name']
        if class_name not in inferred_areas:
            inferred_areas[class_name] = []
        inferred_areas[class_name].append(inf_obj['bbox'])
        total_inferred_area += inf_obj['bbox'][2] * inf_obj['bbox'][3]

    for org_obj in original_objects:
        class_name = org_obj['class_name']
        if class_name not in original_areas:
            original_areas[class_name] = []
        original_areas[class_name].append(org_obj['bbox'])
        total_original_area += org_obj['bbox'][2] * org_obj['bbox'][3]

    merged_inferred_areas = {class_name: merge_boxes(boxes) for class_name, boxes in inferred_areas.items()}
    merged_original_areas = {class_name: merge_boxes(boxes) for class_name, boxes in original_areas.items()}

    total_intersection_area = 0

    for class_name in set(merged_inferred_areas.keys()).union(set(merged_original_areas.keys())):
        inferred_polygons = merged_inferred_areas.get(class_name, [])
        original_polygons = merged_original_areas.get(class_name, [])
        intersection_polygons = [p1.intersection(p2) for p1 in inferred_polygons for p2 in original_polygons]
        intersection_area = calculate_area(intersection_polygons)
        total_intersection_area += intersection_area

    total_union_area = total_inferred_area + total_original_area - total_intersection_area
    iou = total_intersection_area / total_union_area if total_union_area > 0 else 0
    return iou


In [None]:
def find_optimal_confidence_thresholds(inference_results, bbox_data, threshold_step=0.02):
    # Create a dictionary to store the optimal confidence thresholds for each method
    optimal_thresholds = {}

    # Iterate over each inference method
    for method in inference_results[0].keys():
        max_avg_iou = 0
        optimal_threshold = 0

        # Iterate over different confidence threshold values
        for threshold in np.arange(0, 1 + threshold_step, threshold_step):
            total_iou = 0
            image_count = 0

            # Iterate over each inference result
            for inference_result in inference_results:
                image_file = os.path.basename(inference_result[method]["image_id"])

                # Find the corresponding bbox data for the image
                bbox_data_item = next((data for data in bbox_data if data["image_id"] == image_file), None)

                if bbox_data_item is not None:
                    # Filter objects based on the current confidence threshold
                    filtered_objects = [obj for obj in inference_result[method]["objects"] if obj["confidence"] >= threshold]

                    # Calculate IoU for the current image
                    image_iou = calculate_iou(filtered_objects, bbox_data_item["objects"])
                    total_iou += image_iou
                    image_count += 1

            # Calculate the average IoU for the current threshold
            avg_iou = total_iou / image_count if image_count > 0 else 0

            # Update the optimal threshold if the current average IoU is higher
            if avg_iou > max_avg_iou:
                max_avg_iou = avg_iou
                optimal_threshold = threshold

        # Store the optimal threshold for the current method
        optimal_thresholds[method] = round(optimal_threshold, 2)

    return optimal_thresholds

In [None]:
# Converts class names and calculates optimal confidence thresholds for all images, also loads original bbox data

# Load the JSON file containing the bbox data
with open(os.path.join(image_folder, "labels.json"), "r") as file:
    original_bbox_data = json.load(file)

def convert_class_names_in_results(inference_results, bbox_data):
    for inference_result in inference_results:
        for method in inference_result:
            inference_result[method]["objects"] = convert_class_names(inference_result[method]["objects"])
        
        image_file = os.path.basename(inference_result[next(iter(inference_result))]["image_id"])
        bbox_data_item = next((data for data in bbox_data if data["image_id"] == image_file), None)
        
        if bbox_data_item is not None:
            bbox_data_item["objects"] = convert_class_names(bbox_data_item["objects"])
    
    return inference_results, bbox_data

# Convert class names in the inference results and original data
all_inference_results, original_bbox_data = convert_class_names_in_results(all_inference_results, original_bbox_data)

optimal_thresholds = find_optimal_confidence_thresholds(all_inference_results, original_bbox_data)
print(optimal_thresholds)


In [None]:
# Creates a new version of all_inference_results with bboxes below the confidence threshold removed
optimized_inference_results = []

for inference_result in all_inference_results:
    optimized_result = {}
    for method in inference_result:
        optimal_threshold = optimal_thresholds[method]
        
        # Filter objects based on the optimal confidence threshold
        filtered_objects = [obj for obj in inference_result[method]["objects"] if obj["confidence"] >= optimal_threshold]
        
        # Create a new inference result with the filtered objects
        optimized_result[method] = {
            "image_id": inference_result[method]["image_id"],
            "objects": filtered_objects,
            "inference_time": inference_result[method]["inference_time"],
            "upload_time": inference_result[method]["upload_time"],
            #"image_iou": inference_result[method]["image_iou"]
        }
    
    optimized_inference_results.append(optimized_result)

In [None]:
# Calculates IoU for each image
# Iterate over each inference result
for inference_result in optimized_inference_results:
    # Extract the image name from the inference result
    image_file = os.path.basename(inference_result[next(iter(inference_result))]["image_id"])
    
    # Find the corresponding bbox data for the image
    bbox_data_item = next((data for data in original_bbox_data if data["image_id"] == image_file), None)
    
    # Calculate IoU for each inference method
    for method in inference_result:
        if bbox_data_item is not None:
            image_iou = calculate_iou(inference_result[method]["objects"], bbox_data_item["objects"])
            inference_result[method]["image_iou"] = image_iou
        else:
            print(f"No corresponding bbox data found for image: {image_file}")
    
    # Load and display the image with bounding boxes
    image_path = os.path.join(image_folder, image_file)
    image = cv2.imread(image_path)
    
    for method in inference_result:
        print(method)
        print("Inference time:", inference_result[method]["inference_time"], "ms")
        print("IoU: ", inference_result[method]["image_iou"])
        #draw_bounding_boxes(image_path, inference_result[method]["objects"])
    
    # Display the original image with the original bboxes
    print("Original Image:")
    #draw_bounding_boxes(image_path, bbox_data_item["objects"])

In [None]:
def output_results(content):
    print(content)
    text_file.write(content + "\n")

# Get hardware information
def get_cpu_info():
    cpu_info = cpuinfo.get_cpu_info()
    brand = cpu_info['brand_raw']
    return brand

# Get image resolution
image_path = os.path.join(image_folder, optimized_inference_results[0][next(iter(optimized_inference_results[0]))]['image_id'])  # all images have the same resolution
image = cv2.imread(image_path)
height, width, _ = image.shape
image_resolution = f"{width}x{height}"

# Get hardware information
cpu_info = get_cpu_info()
ram_info = psutil.virtual_memory()
try:
    gpu_info = gpustat.new_query()
except (ImportError, FileNotFoundError, Exception):
    gpu_info = False

# Calculate average inference time and image IoU for each method
method_metrics = {}
for inference_result in optimized_inference_results:
    for method in inference_result:
        if method not in method_metrics:
            method_metrics[method] = {'inference_time': [], 'upload_time': [], 'image_iou': []}
        method_metrics[method]['inference_time'].append(float(inference_result[method]['inference_time']))
        method_metrics[method]['upload_time'].append(float(inference_result[method]['upload_time']))
        method_metrics[method]['image_iou'].append(float(inference_result[method]['image_iou']))

for method in method_metrics:
    method_metrics[method]['avg_inference_time'] = np.mean(method_metrics[method]['inference_time'])
    method_metrics[method]['avg_upload_time'] = np.mean(method_metrics[method]['upload_time'])
    method_metrics[method]['avg_image_iou'] = np.mean(method_metrics[method]['image_iou'])

# Create a folder with the current datetime as its name
current_datetime = datetime.now().strftime("%Y%m%d_%H%M%S")
folder_name = f"results_{current_datetime}"
folder_path = os.path.join(output_path, folder_name)
os.makedirs(folder_path, exist_ok=True)

# Convert numpy.float32 values to regular float objects
all_inference_results = json.loads(json.dumps(all_inference_results, default=lambda x: float(x) if isinstance(x, np.float32) else x))

# Save the original all_inference_results data as a JSON file
json_file_path = os.path.join(folder_path, "all_inference_results.json")
with open(json_file_path, "w") as json_file:
    json.dump(all_inference_results, json_file, indent=4)

# Save the printed information as a text file
text_file_path = os.path.join(folder_path, "results.txt")
with open(text_file_path, "w") as text_file:
    output_results("BATCH INFERENCE")
    output_results(f"Number of Images Tested: {len(optimized_inference_results)}")
    output_results(f"Image resolution: {image_resolution}")
    output_results(f"Dataset used: {dataset_name}")
    output_results(f"Dataset path: {image_folder}")
    output_results("SaaS region/server data:")
    output_results(f"AWS_REGION: {AWS_REGION}")
    output_results(f"GOOGLE_PROJECT_ID: {GOOGLE_PROJECT_ID}")
    output_results(f"MICROSOFT_ENDPOINT: {MICROSOFT_ENDPOINT}")
    output_results("Hardware Information:")
    output_results(f"CPU: {cpu_info}")
    output_results(f"RAM: {ram_info.total / (1024 * 1024 * 1024):.2f} GB")
    if (gpu_info):
        output_results(f"GPU: {gpu_info.name} - {gpu_info.memory_total / 1024:.2f} GB")
    else:
        output_results("GPU: N/A")
    output_results("Averaged Results:")
    for method, metrics in method_metrics.items():
        output_results(f"{method}:")
        output_results(f" Average Inference Time: {metrics['avg_inference_time']:.2f} ms")
        output_results(f" Average Upload Time: {metrics['avg_upload_time']:.2f} ms")
        output_results(f" Average Image IoU: {metrics['avg_image_iou']:.4f}")
        output_results(f" Optimal confidence threshold used: {optimal_thresholds[method]:.2f}")


In [None]:
# Prepare data for plotting
methods = list(method_metrics.keys())
avg_inference_times = [method_metrics[method]['avg_inference_time'] for method in methods]
avg_upload_times = [method_metrics[method]['avg_upload_time'] for method in methods]
avg_image_ious = [method_metrics[method]['avg_image_iou'] for method in methods]

# Create and save the average inference time and upload time plot
plt.figure(figsize=(10, 6))
bar_width = 0.35
x = np.arange(len(methods))
plt.bar(x, avg_inference_times, bar_width, label='Inference Time')
plt.bar(x, avg_upload_times, bar_width, bottom=avg_inference_times, label='Upload Time')
plt.xlabel('Method')
plt.ylabel('Time (ms)')
plt.title('Average Inference Time and Upload Time by Method')
plt.xticks(x, methods, rotation=45)
plt.legend()
plt.tight_layout()
plt.savefig(os.path.join(folder_path, "avg_inference_upload_time.png"))
plt.show()
plt.close()

# Create and save the average image IoU plot
plt.figure(figsize=(10, 6))
plt.bar(methods, avg_image_ious)
plt.xlabel('Method')
plt.ylabel('Average Image IoU')
plt.title('Average Image IoU by Method')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig(os.path.join(folder_path, "avg_image_iou.png"))
plt.show()
plt.close()

# Scatter charts showing how the average changes for both inference and accuracy over each inference call
inference_times = {method: [] for method in methods}
upload_times = {method: [] for method in methods}
image_ious = {method: [] for method in methods}

for inference_result in optimized_inference_results:
    for method in methods:
        inference_times[method].append(float(inference_result[method]['inference_time']))
        upload_times[method].append(float(inference_result[method]['upload_time']))
        image_ious[method].append(float(inference_result[method]['image_iou']))

# Combined line and scatter plots for inference time and upload time
plt.figure(figsize=(12, 6))
for method in methods:
    plt.plot(range(1, len(inference_times[method]) + 1), inference_times[method], label=f'{method} - Inference Time')
    plt.plot(range(1, len(upload_times[method]) + 1), upload_times[method], label=f'{method} - Upload Time')
    plt.scatter(range(1, len(inference_times[method]) + 1), inference_times[method])
    plt.scatter(range(1, len(upload_times[method]) + 1), upload_times[method])
plt.xlabel('Inference Call')
plt.ylabel('Time (ms)')
plt.title('Inference and Upload Time')
plt.legend()
plt.tight_layout()
plt.savefig(os.path.join(folder_path, "inference_upload_time.png"))
plt.show()
plt.close()

# Combined line and scatter plots for image IoU
plt.figure(figsize=(12, 6))
for method in methods:
    plt.plot(range(1, len(image_ious[method]) + 1), image_ious[method], label=method)
    plt.scatter(range(1, len(image_ious[method]) + 1), image_ious[method])
plt.xlabel('Inference Call')
plt.ylabel('Image IoU')
plt.title('Image IoU')
plt.legend()
plt.tight_layout()
plt.savefig(os.path.join(folder_path, "image_iou.png"))
plt.show()
plt.close()

# Histograms to visualize variance of confidence values, inference times, and IoU throughout each inference of image
confidence_values = {method: [] for method in methods}
inference_times_hist = {method: [] for method in methods}
upload_times_hist = {method: [] for method in methods}
image_ious_hist = {method: [] for method in methods}

for inference_result in optimized_inference_results:
    for method in methods:
        for obj in inference_result[method]['objects']:
            confidence_values[method].append(obj['confidence'])
        inference_times_hist[method].append(float(inference_result[method]['inference_time']))
        upload_times_hist[method].append(float(inference_result[method]['upload_time']))
        image_ious_hist[method].append(float(inference_result[method]['image_iou']))

# Confidence Value Distribution
fig, axs = plt.subplots(2, 3, figsize=(15, 10))
axs = axs.flatten()
for i, method in enumerate(methods):
    axs[i].hist(confidence_values[method], bins=np.linspace(0, 1, 21), density=True)
    axs[i].set_xlabel('Confidence Value')
    axs[i].set_ylabel('Frequency')
    axs[i].set_title(f'Confidence Value Distribution - {method}')
plt.tight_layout()
plt.savefig(os.path.join(folder_path, "confidence_value_distribution.png"))
plt.show()
plt.close()

# Inference Time Distribution
fig, axs = plt.subplots(2, 3, figsize=(15, 10))
axs = axs.flatten()
max_inference_time = max(max(times) for times in inference_times_hist.values())
bins = np.linspace(0, max_inference_time, 21)
for i, method in enumerate(methods):
    axs[i].hist(inference_times_hist[method], bins=bins, density=True)
    axs[i].hist(upload_times_hist[method], bins=bins, density=True, alpha=0.5, color='r')  # Added upload time histogram
    axs[i].set_xlabel('Time (ms)')
    axs[i].set_ylabel('Frequency')
    axs[i].set_title(f'Time Distribution - {method}')
plt.tight_layout()
plt.savefig(os.path.join(folder_path, "time_distribution.png"))
plt.show()
plt.close()

# Image IoU Distribution
fig, axs = plt.subplots(2, 3, figsize=(15, 10))
axs = axs.flatten()
for i, method in enumerate(methods):
    axs[i].hist(image_ious_hist[method], bins=np.linspace(0, 1, 21), density=True)
    axs[i].set_xlabel('Image IoU')
    axs[i].set_ylabel('Frequency')
    axs[i].set_title(f'Image IoU Distribution - {method}')
plt.tight_layout()
plt.savefig(os.path.join(folder_path, "image_iou_distribution.png"))
plt.show()
plt.close()

In [None]:
single_test_path_image = "G:\\My Drive\\Thesis2024\\data\\train\\1478897977253835405_jpg.rf.k1YHtF480ieDxDfcboNr.jpg"
#test_aio = inferenceFromAllSources(single_test_path_image)

In [None]:
for method in test_aio:
        print(method)
        print("Inference time:", test_aio[method]["inference_time"], "ms")
        #print("IoU: ", inference_result[method]["image_iou"])
        draw_bounding_boxes(single_test_path_image, test_aio[method]["objects"])

In [None]:
# Load a saved file
# Specify the path to the JSON file containing the saved inference results
json_file_path = os.path.join(folder_path, "all_inference_results.json")

# Read the JSON file and load the data into the all_inference_results variable
with open(json_file_path, "r") as json_file:
    all_inference_results = json.load(json_file)

# Print the loaded data to verify it has been read correctly
print("Loaded data from JSON file:")
print(all_inference_results)