[![](https://img.shields.io/static/v1?message=Open%20in%20Colab&logo=googlecolab&labelColor=grey&color=yellow&logoColor=white&label=%20&style=flat-square)](https://colab.research.google.com/github/ap-mdi-it/ml-courses/blob/main/book/ml_principles/labo/taco.ipynb)

Navigeer naar deze notebook op GitHub: [https://github.com/ap-mdi-it/ml-courses/blob/main/book/ml_principles/labo/taco.ipynb](https://github.com/ap-mdi-it/ml-courses/blob/main/book/ml_principles/labo/taco.ipynb)  
  
Via bovenstaande link kan je deze notebook openen in Google Colaboratory. In die omgeving kunnen we gebruik maken van gratis quota voor GPUs (en TPUs). GPU acceleratie is hier sterk aanbevolen voor zowel model training als model inference.  
  
Deze notebook is gebaseerd op: [https://www.kaggle.com/code/shuvosharkr/taco-trash-detection-yolov8s](https://www.kaggle.com/code/shuvosharkr/taco-trash-detection-yolov8s)

# Colab setup
- Als je de notebook voor het eerst opent op Colab, kies je in het menu rechts bovenaan `Connect > Change runtime type`: `Python 3` en `T4 GPU`.
- Pas nadat de GPU Runtime is opgestart ga je verder met onderstaande installatie van het [ultralitics](https://github.com/ultralytics/ultralytics) package.
- ⚠️ Bij deze waarschuwing `Warning: This notebook was not authored by Google` selecteer je `Run anyway`.

In [None]:
!pip install ultralytics

In [None]:
import glob
import json
import os
import shutil
from pprint import pprint

import kagglehub
import matplotlib.pyplot as plt
import pandas as pd
import yaml
from PIL import Image, ImageDraw
from ultralytics import YOLO


In [None]:
# Set Colab datasets directory for Ultralytics package
with open("/root/.config/Ultralytics/settings.json") as f:
    settings = json.load(f)
    settings["datasets_dir"] = "/contents"

with open("/root/.config/Ultralytics/settings.json", "w") as f:
    json.dump(settings, f)

# Dataset

In [None]:
# Download the dataset
path = kagglehub.dataset_download("vencerlanz09/taco-dataset-yolo-format")

print(path)

In [None]:
# Copy the contents of the directory instead of moving it to avoid permission issues
destination = "./taco"
if not os.path.exists(destination):
    shutil.copytree(path, destination)
path = destination
print(path)

In [None]:
# Update the meta data (see taco#1.ipynb)
with open(path + "/data.yaml") as f:
    meta = yaml.safe_load(f)

# Update image paths
meta["train"] = path + "/train/images"
meta["val"] = path + "/valid/images"
meta["test"] = path + "/test/images"

# Swap Bottle cap and Bottle labels (see)
bottle_cap_idx = meta["names"].index("Bottle cap")
bottle_idx = meta["names"].index("Bottle")
meta["names"][bottle_cap_idx], meta["names"][bottle_idx] = (
    meta["names"][bottle_idx],
    meta["names"][bottle_cap_idx],
)

# write back to file
with open(path + "/data.yaml", "w") as f:
    yaml.dump(meta, f)

pprint(meta)

# Training (hyper) Parameters

In [None]:
# Define training parameters
epochs = 50
batch_size = 32
imgsz = 640  # Image size
optimizer_type = "AdamW"  # AdamW optimizer (recommended for better regularization)
lr = 1e-4
weight_decay = 1e-4

# Model Training

In [None]:
# Initialize YOLOv11 model (pre-trained weights)
model = YOLO("yolov8s.pt")

save_dir = "./runs/train/exp"
os.makedirs(save_dir, exist_ok=True)

# Train model with Cosine Annealing learning rate scheduler
model.train(
    data=path + "/data.yaml",
    epochs=epochs,
    batch=batch_size,
    imgsz=imgsz,
    optimizer=optimizer_type,
    lr0=lr,  # Initial learning rate
    weight_decay=weight_decay,
    save=True,  # Save the best model
    save_period=1,  # Save model every 10 epochs
    val=True,  # Evaluate on validation set
    save_dir=save_dir,
)

In [None]:
best_model = YOLO("./runs/detect/train/weights/best.pt")
val_results = best_model.val()

print("Best Validation Metrics from Best Model:")
print(f"Precision: {val_results.box.mp:.4f}")
print(f"Recall: {val_results.box.mr:.4f}")
print(f"mAP@50: {val_results.box.map50:.4f}")
print(f"mAP@50-95: {val_results.box.map:.4f}")


# log file testing

In [None]:
# Read the log file into a DataFrame
log_file = "./runs/detect/train/results.csv"
log_data = pd.read_csv(log_file)

# Check the first few rows of the data and column names
print(log_data.columns)
print(log_data.head())


# Graphs

In [None]:
# Convert necessary columns to numeric
log_data["epoch"] = pd.to_numeric(log_data["epoch"], errors="coerce").astype(
    int
)  # Convert to integer
log_data["train/box_loss"] = pd.to_numeric(log_data["train/box_loss"], errors="coerce")
log_data["train/cls_loss"] = pd.to_numeric(log_data["train/cls_loss"], errors="coerce")
log_data["train/dfl_loss"] = pd.to_numeric(log_data["train/dfl_loss"], errors="coerce")
log_data["val/box_loss"] = pd.to_numeric(log_data["val/box_loss"], errors="coerce")
log_data["val/cls_loss"] = pd.to_numeric(log_data["val/cls_loss"], errors="coerce")
log_data["val/dfl_loss"] = pd.to_numeric(log_data["val/dfl_loss"], errors="coerce")

# Drop rows with NaN values in relevant columns
log_data = log_data.dropna(
    subset=[
        "epoch",
        "train/box_loss",
        "train/cls_loss",
        "train/dfl_loss",
        "val/box_loss",
        "val/cls_loss",
        "val/dfl_loss",
    ]
)

# Plot the training and validation losses
plt.figure(figsize=(12, 8))

# Plot training losses
plt.plot(
    log_data["epoch"], log_data["train/box_loss"], label="Train Box Loss", linestyle="-", marker="o"
)
plt.plot(
    log_data["epoch"],
    log_data["train/cls_loss"],
    label="Train Class Loss",
    linestyle="-",
    marker="x",
)
plt.plot(
    log_data["epoch"], log_data["train/dfl_loss"], label="Train DFL Loss", linestyle="-", marker="s"
)

# Plot validation losses
plt.plot(
    log_data["epoch"], log_data["val/box_loss"], label="Val Box Loss", linestyle="--", marker="o"
)
plt.plot(
    log_data["epoch"], log_data["val/cls_loss"], label="Val Class Loss", linestyle="--", marker="x"
)
plt.plot(
    log_data["epoch"], log_data["val/dfl_loss"], label="Val DFL Loss", linestyle="--", marker="s"
)

# Customize the plot
plt.title("Training and Validation Losses over Epochs")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.xticks(log_data["epoch"])  # Ensure that the epoch ticks are shown as integers
plt.legend(loc="upper right")

# Show the plot
plt.show()


# Validation results

In [None]:
results = model.val()  # Evaluate on the validation set
print(f"Validation Results: {results}")

In [None]:
print("Validation Results:")
print("Mean Precision:", results.box.mp)  # Mean Precision
print("Mean Recall:", results.box.mr)  # Mean Recall
print("mAP 50:", results.box.map50)  # Mean Average Precision at IoU 0.5
print("mAP 50-95:", results.box.map)  # Mean Average Precision at IoU 0.5-0.95


# Validation Metrics plot

In [None]:
# Scalar values from results_dict
precision = results.results_dict["metrics/precision(B)"]
recall = results.results_dict["metrics/recall(B)"]
map50 = results.results_dict["metrics/mAP50(B)"]
map50_95 = results.results_dict["metrics/mAP50-95(B)"]

# Plotting single values (snapshot)
metrics = ["Precision", "Recall", "mAP50", "mAP50-95"]
values = [precision, recall, map50, map50_95]

plt.figure(figsize=(8, 6))
plt.bar(metrics, values, color=["b", "r", "g", "purple"])
plt.title("Model Evaluation Metrics")
plt.ylabel("Values")
plt.show()

# Testing

In [None]:
best_model = YOLO("./runs/detect/train/weights/best.pt")
test_results = best_model.val(data="data.yaml", split="test")

In [None]:
# Print test metrics
print(f"Test Precision: {test_results.box.mp:.4f}")
print(f"Test Recall: {test_results.box.mr:.4f}")
print(f"Test mAP@50: {test_results.box.map50:.4f}")
print(f"Test mAP@50-95: {test_results.box.map:.4f}")

In [None]:
# Function to parse ground truth annotations in YOLO format
def parse_annotation(annotation_path):
    """Parse a YOLO-style annotation file.

    Extract the class IDs and bounding box information.
    """
    if not os.path.exists(annotation_path):
        print(f"Annotation file {annotation_path} not found.")
        return [], []  # Return empty lists if annotation file is missing

    with open(annotation_path, "r") as file:
        lines = file.readlines()

    labels = []
    boxes = []
    for line in lines:
        parts = line.strip().split()
        class_id = int(parts[0])  # Class ID
        # YOLO format: class_id, x_center, y_center, width, height
        box = [float(x) for x in parts[1:]]  # Bounding box: [x_center, y_center, width, height]
        labels.append(class_id)
        boxes.append(box)
    return labels, boxes


# Load the YOLO model
model = YOLO("./runs/detect/train/weights/best.pt")

# Path to the test images and corresponding labels (annotations)
test_image_dir = path + "/test/images/"
test_label_dir = path + "/test/labels/"

# Get the list of test images
test_images = glob.glob(
    os.path.join(test_image_dir, "*.jpg")
)  # Adjust for correct extension if needed

# Output directory to save inference results
output_dir = "./inference_results/"
os.makedirs(output_dir, exist_ok=True)

# Class names
class_names = [
    "Aluminium foil",
    "Bottle cap",
    "Bottle",
    "Broken glass",
    "Can",
    "Carton",
    "Cigarette",
    "Cup",
    "Lid",
    "Other litter",
    "Other plastic",
    "Paper",
    "Plastic bag - wrapper",
    "Plastic container",
    "Pop tab",
    "Straw",
    "Styrofoam piece",
    "Unlabeled litter",
]

# Loop through each image and perform inference
for img_path in test_images:
    # Get the corresponding annotation file (in YOLO format)
    annotation_path = os.path.join(
        test_label_dir, os.path.basename(img_path).replace(".jpg", ".txt").replace(".JPG", ".txt")
    )

    # Perform inference without verbose output
    results = model(img_path, verbose=False)  # Run YOLOv8 on the image

    # Get actual labels (ground truth) from annotation file
    actual_labels, actual_boxes = parse_annotation(annotation_path)
    actual_labels_names = [class_names[label] for label in actual_labels]

    # Save the result image with predictions
    img_name = os.path.basename(img_path)
    result_img_path = os.path.join(output_dir, img_name)
    results[0].save(result_img_path)

    # Extract predicted labels and bounding boxes
    if results[0].boxes is None or len(results[0].boxes.cls) == 0:
        predicted_labels = ["No prediction"]
        predicted_boxes = []
    else:
        predicted_labels = [results[0].names[int(cls)] for cls in results[0].boxes.cls]
        predicted_boxes = (
            results[0].boxes.xywh.cpu().numpy()
        )  # Ensure numpy format for further processing

    # Open the original image for proper ground truth visualization
    img_predicted = Image.open(result_img_path)
    img_actual = Image.open(img_path)  # Reload the original image for ground truth

    # Create drawing objects
    draw_predicted = ImageDraw.Draw(img_predicted)
    draw_actual = ImageDraw.Draw(img_actual)

    # Draw predicted bounding boxes (blue) on the predicted image
    img_width, img_height = img_predicted.size
    if len(predicted_boxes) == 0:  # Check if predicted_boxes is empty
        draw_predicted.text((10, 10), "No prediction", fill="red")
    else:
        for i, box in enumerate(predicted_boxes):
            x_center, y_center, width, height = box
            x1 = int((x_center - width / 2) * img_width)
            y1 = int((y_center - height / 2) * img_height)
            x2 = int((x_center + width / 2) * img_width)
            y2 = int((y_center + height / 2) * img_height)
            draw_predicted.rectangle([x1, y1, x2, y2], outline="blue", width=2)
            draw_predicted.text(
                (x1, y1),
                predicted_labels[i] if i < len(predicted_labels) else "Unknown",
                fill="blue",
            )

    # Draw ground truth bounding boxes (green) on the actual image
    for i, box in enumerate(actual_boxes):
        x_center, y_center, width, height = box
        x1 = int((x_center - width / 2) * img_width)
        y1 = int((y_center - height / 2) * img_height)
        x2 = int((x_center + width / 2) * img_width)
        y2 = int((y_center + height / 2) * img_height)
        draw_actual.rectangle([x1, y1, x2, y2], outline="green", width=2)
        draw_actual.text((x1, y1), actual_labels_names[i], fill="white")

    # Display images side by side
    fig, axes = plt.subplots(1, 2, figsize=(15, 7))

    axes[0].imshow(img_predicted)
    axes[0].set_title("\n".join(predicted_labels), fontsize=14, wrap=True)
    axes[0].axis("off")

    axes[1].imshow(img_actual)
    axes[1].set_title("\n".join(actual_labels_names), fontsize=14, wrap=True)
    axes[1].axis("off")

    plt.show()
