### Label-Studio preannotated

In [None]:
"""
This script runs predictions using the best YOLOv8 model on a set of images in a specified folder. 
It generates a JSON file compatible with Label Studio for validation. 
Once the predictions are made, the model can be retrained on the newly validated data.

Workflow:
1. Load the trained YOLO model.
2. Fix any corrupted images.
3. Run inference on the images.
4. Format predictions in a way compatible with Label Studio's JSON schema.
5. Save the predictions in batches for synchronization with Label Studio.

### Required Libraries:
- `ultralytics` for YOLOv8 model.
- `PIL` for image handling.
- `json` for creating the result files.
"""

from ultralytics import YOLO  # YOLOv8 model from Ultralytics library
import os  # File and directory handling
import json  # To save results as JSON for Label Studio
from PIL import Image  # Image processing
from math import floor  # For rounding down batch numbers

# Paths
model = YOLO('runs/detect/train12/weights/best.pt')  # Load the trained model weights
image_folder = 'auto-annotation/image_inference'  # Folder containing images for inference
output_base_folder = 'auto-annotation/batched'  # Folder to save batched prediction results

# Label map: Mapping YOLO class indices to their respective labels
label_mapping = {
    0: 'bubble',     # Class 0 -> bubble
    1: 'narration',  # Class 1 -> narration
    2: 'other',      # Class 2 -> other
    3: 'text',       # Class 3 -> text
    4: 'ui'          # Class 4 -> ui
}

# Function to reload and save images to fix any corrupted files in the image folder
def reload_and_save_images(folder_path):
    """
    Reload and save images in the folder to fix corrupted files.
    Corrupted files are reloaded and saved back in RGB format.
    
    Args:
    - folder_path (str): Path to the folder containing images to be fixed.
    
    Returns:
    - int: The number of corrupted images that were fixed.
    """
    fixed_count = 0  # Counter for fixed images
    for filename in os.listdir(folder_path):  # Loop through all files in folder
        if filename.lower().endswith(('.jpg', '.jpeg', '.png')):  # Filter for image files
            path = os.path.join(folder_path, filename)
            try:
                img = Image.open(path)  # Open the image
                img = img.convert("RGB")  # Ensure the image is in RGB mode
                img.save(path)  # Save the fixed image back
                fixed_count += 1  # Increment the count for fixed images
            except Exception as e:
                print(f"Skipping {filename}: {e}")  # Log if any error occurs (e.g., corrupted file)
    return fixed_count  # Return the number of fixed images

# Fix corrupted images in the image folder
fixed_images_count = reload_and_save_images(image_folder)
print(f"Fixed {fixed_images_count} corrupted images in the folder: {image_folder}")

# Prepare images for inference by listing image files
image_files = [f for f in os.listdir(image_folder) if f.lower().endswith(('.jpg', '.png'))]
image_files.sort()  # Sort the images to maintain a consistent order (optional)

print(f"Found {len(image_files)} images in the folder.")

# Process each image file for inference
for i, filename in enumerate(image_files):
    image_path = os.path.join(image_folder, filename)
    
    # Debugging output to show which image is being processed
    print(f"Processing image: {image_path}")

    try:
        # Run inference on the image and get results
        results = model(image_path)[0]
        if results.boxes is None or len(results.boxes) == 0:
            print(f"No detections for {image_path}.")  # Log if no objects are detected
        else:
            print(f"Detections for {image_path}: {len(results.boxes)} objects detected.")  # Log the number of detections
    except Exception as e:
        print(f"Error processing {image_path}: {e}")  # Log any errors in processing
        continue  # Skip to the next image if an error occurs

    with Image.open(image_path) as img:  # Open image to get size
        width, height = img.size  # Get the width and height of the image

    # List to store the prediction results for Label Studio
    predictions = []
    for box in results.boxes:  # Loop through detected boxes
        cls = int(box.cls[0].item())  # Get the predicted class index
        label = label_mapping.get(cls, "unknown")  # Map class index to label
        xyxy = box.xyxy[0].tolist()  # Get the bounding box coordinates (x1, y1, x2, y2)

        # Convert box coordinates to percentage of image dimensions
        x = (xyxy[0] / width) * 100
        y = (xyxy[1] / height) * 100
        w = ((xyxy[2] - xyxy[0]) / width) * 100
        h = ((xyxy[3] - xyxy[1]) / height) * 100

        # Append prediction in the format required by Label Studio
        predictions.append({
            "value": {
                "x": x,
                "y": y,
                "width": w,
                "height": h,
                "rectanglelabels": [label]
            },
            "from_name": "label",  # The name of the label input
            "to_name": "image",  # The name of the target (the image)
            "type": "rectanglelabels"  # The type of annotation
        })

    # Structure the task as a Label Studio JSON object
    task = {
        "data": {
            "image": f"http://localhost:8080/data/local-files/?d=Users/handw/Documents/Portfolio-FTDS/YOLO-manga-bubble-detector/auto-annotation/image/{filename}"
        },
        "predictions": [
            {
                "model_version": "yolo-best.pt",  # Model version used
                "result": predictions  # The list of predictions
            }
        ]
    }

    # Split folders into batches of 100 files
    batch_folder = os.path.join(output_base_folder, f"batch_{i // 100 + 1}")
    os.makedirs(batch_folder, exist_ok=True)  # Create the batch folder if it doesn't exist

    # Save the task as a JSON file for each image
    json_filename = f"{os.path.splitext(filename)[0]}.json"  # Name JSON file based on image filename
    json_path = os.path.join(batch_folder, json_filename)  # Full path for JSON file
    with open(json_path, "w") as f:
        json.dump(task, f, indent=2)  # Save the task as a formatted JSON
