# Minecraft Object Detection Project

Notebook for EDA, fine-tuning FCOS and YOLO, and benchmarking on the Minecraft mobs dataset.


## Roadmap

1. Environment setup and configuration checks
2. Dataset verification, EDA, and visualization
3. Baseline inference with pretrained FCOS and YOLO models
4. Fine-tuning experiments and metric tracking
5. Comparative evaluation, video inference, and reporting


In [None]:
# --- Environment bootstrap ---
import os
import sys
from pathlib import Path

PROJECT_ROOT = Path.cwd()
SRC_DIR = PROJECT_ROOT / "src"
if str(SRC_DIR) not in sys.path:
    sys.path.insert(0, str(SRC_DIR))

print(f"Project root: {PROJECT_ROOT}")


## 0. Dataset Inspection & EDA

> TODO: Load COCO annotations, validate splits, analyze class distribution, and visualize labeled samples.


In [None]:
# Dataset paths and annotation loading
from src.data.dataset_checks import (
    ensure_annotation_files_exist,
    load_coco_annotations,
    validate_image_annotation_counts,
    ensure_class_coverage,
    summarize_class_distribution,
)
from src.visualization.plots import plot_class_distribution, visualize_bboxes

DATASET_ROOT = PROJECT_ROOT / "datasets" / "minecraft"
annotation_paths = ensure_annotation_files_exist(DATASET_ROOT)

train_annotations = load_coco_annotations(annotation_paths["train"])
val_annotations = load_coco_annotations(annotation_paths["val"])
test_annotations = load_coco_annotations(annotation_paths["test"])

ensure_class_coverage(train_annotations)
ensure_class_coverage(val_annotations)
ensure_class_coverage(test_annotations)
print("Annotation files verified and class coverage confirmed.")


In [None]:
# Summaries for each split
import pandas as pd

split_summaries = []
for split_name, annotations in (
    ("train", train_annotations),
    ("val", val_annotations),
    ("test", test_annotations),
):
    num_images, num_ann = validate_image_annotation_counts(annotations)
    split_summaries.append({
        "split": split_name,
        "images": num_images,
        "annotations": num_ann,
        "ann_per_image": num_ann / num_images if num_images else 0,
    })

summary_df = pd.DataFrame(split_summaries)
display(summary_df)



In [None]:
# Class distribution plot
class_counts = summarize_class_distribution(train_annotations)
plot_class_distribution(class_counts, output_path=PROJECT_ROOT / "artifacts" / "metrics" / "class_distribution.png")
class_counts


In [None]:
# Visualise a sample from the test split
from random import choice

category_id_to_name = {c["id"]: c["name"] for c in test_annotations["categories"]}
image_info = choice(test_annotations["images"])
image_id = image_info["id"]
image_annotations = [ann for ann in test_annotations["annotations"] if ann["image_id"] == image_id]

image_path = DATASET_ROOT / "test" / "images" / image_info["file_name"]
output_path = PROJECT_ROOT / "artifacts" / "inference" / "eda_test_sample.jpg"
visualize_bboxes(image_path, image_annotations, category_id_to_name, output_path=output_path)
image_path, output_path


### Implementation Plan

- **Data pipeline**: maintain dual-format annotations (COCO + YOLO) and expose validation utilities for integrity checks.
- **FCOS branch**: customise MMDetection config (`configs/fcos/fcos_minecraft.py`), fine-tune with mixed precision, and log metrics to `artifacts/fcos/`.
- **YOLO branch**: leverage Ultralytics YOLOv8s with the generated YOLO labels, track outputs in `artifacts/yolo/`.
- **Benchmarking**: consolidate metrics (mAP, mAP@50, precision, recall, F1, FPS) into comparison tables and figures.
- **Inference layer**: produce image galleries and video runs for both detectors, persisting artefacts under `artifacts/inference/` and `artifacts/videos/`.
- **Reporting**: compile visualisations, qualitative examples, and conclusions into a PDF report and summarise findings in the project README.


## 1. FCOS Baseline & Fine-tuning

> TODO: Configure MMDetection, run pretrained inference, launch fine-tuning, and log metrics.


In [None]:
# FCOS fine-tuning helper setup
import torch
from src.training.fcos_trainer import FCOSConfig, FCOSFineTuner

fcos_config_path = PROJECT_ROOT / "configs" / "fcos" / "fcos_minecraft.py"
fcos_work_dir = PROJECT_ROOT / "artifacts" / "fcos"

checkpoint_candidates = sorted((PROJECT_ROOT / "checkpoints").glob("fcos*.pth"))
fcos_checkpoint = checkpoint_candidates[0] if checkpoint_candidates else None

fcos_cfg = FCOSConfig(
    config_path=fcos_config_path,
    work_dir=fcos_work_dir,
    checkpoint_path=fcos_checkpoint,
    device="cuda:0" if torch.cuda.is_available() else "cpu",
)

print(f"Using FCOS config: {fcos_config_path}")
print(f"Work dir: {fcos_work_dir}")
print(f"Pretrained checkpoint: {fcos_checkpoint}")
fcos_trainer = FCOSFineTuner(fcos_cfg)



In [None]:
# FCOS pretrained inference example
if fcos_checkpoint is None:
    raise FileNotFoundError("Place a pretrained FCOS checkpoint in the checkpoints/ folder (e.g. fcos_r50_caffe_fpn_gn-head_1x_coco.pth)")

fcos_sample_image = DATASET_ROOT / "val" / "images" / val_annotations["images"][0]["file_name"]
fcos_pretrained_output = PROJECT_ROOT / "artifacts" / "inference" / "test_pretrained.jpg"
fcos_trainer.inference_on_image(fcos_sample_image, fcos_pretrained_output, checkpoint_path=fcos_checkpoint)
fcos_pretrained_output


In [None]:
# YOLO fine-tuning helper setup
from src.training.yolo_trainer import YOLOConfig, YOLOFineTuner

yolo_weights = PROJECT_ROOT / "checkpoints" / "yolov8s.pt"
if not yolo_weights.exists():
    print("Weights not found locally; Ultralytics will download yolov8s.pt on demand.")
    yolo_weights = "yolov8s.pt"

yolo_cfg = YOLOConfig(
    weights=str(yolo_weights),
    data_yaml=DATASET_ROOT / "data_coco.yaml",
    project_dir=PROJECT_ROOT / "artifacts" / "yolo",
    device="cuda:0" if torch.cuda.is_available() else "cpu",
    epochs=50,
    imgsz=512,
    batch=16,
    name="minecraft_yolov8s",
)

yolo_trainer = YOLOFineTuner(yolo_cfg)



In [None]:
# YOLO pretrained inference example
yolo_trainer.setup()
yolo_sample_image = DATASET_ROOT / "val" / "images" / val_annotations["images"][0]["file_name"]
yolo_inference_dir = PROJECT_ROOT / "artifacts" / "inference" / "yolo_val"
yolo_output_path = yolo_trainer.inference_on_image(yolo_sample_image, yolo_inference_dir)
yolo_output_path


In [None]:
# YOLO fine-tuning (run when ready)
# yolo_trainer.train()



In [None]:
# YOLO metrics summary (populate after training)
from src.evaluation.metrics import load_yolo_metrics, summarise_yolo_results

yolo_results_path = yolo_trainer.validation_metrics()
if yolo_results_path.exists():
    yolo_results_df = load_yolo_metrics(yolo_results_path)
    yolo_summary = summarise_yolo_results(yolo_results_df)
    yolo_summary
else:
    print("Run YOLO fine-tuning to generate artifacts/yolo/results.csv.")



In [None]:
# Metrics comparison table
from dataclasses import asdict

comparison_rows = []
if "fcos_summary" in locals():
    comparison_rows.append({"model": "fcos", **asdict(fcos_summary)})
if "yolo_summary" in locals():
    comparison_rows.append({"model": "yolov8s", **asdict(yolo_summary)})

if comparison_rows:
    comparison_df = pd.DataFrame(comparison_rows)
    metrics_csv = PROJECT_ROOT / "artifacts" / "metrics" / "metrics_comparison.csv"
    metrics_csv.parent.mkdir(parents=True, exist_ok=True)
    comparison_df.to_csv(metrics_csv, index=False)
    display(comparison_df)
else:
    print("Populate fcos_summary and yolo_summary after training runs.")



In [None]:
# Image inference utilities for qualitative comparison
from itertools import islice

def _resolve_fcos_checkpoint() -> Path | None:
    candidates = [
        fcos_cfg.work_dir / "best_coco_bbox_mAP.pth",
        fcos_cfg.work_dir / "epoch_12.pth",
        fcos_cfg.work_dir / "latest.pth",
        fcos_checkpoint,
    ]
    for candidate in candidates:
        if candidate and Path(candidate).exists():
            return Path(candidate)
    return None


def generate_fcos_gallery(split: str = "test", limit: int = 5) -> list[Path]:
    checkpoint = _resolve_fcos_checkpoint()
    if checkpoint is None:
        raise FileNotFoundError("No trained FCOS checkpoint found. Run training first.")

    output_dir = PROJECT_ROOT / "artifacts" / "inference" / "fcos"
    output_dir.mkdir(parents=True, exist_ok=True)

    gallery_paths = []
    split_annotations = {
        "train": train_annotations,
        "val": val_annotations,
        "test": test_annotations,
    }[split]

    for image_info in islice(split_annotations["images"], limit):
        image_path = DATASET_ROOT / split / "images" / image_info["file_name"]
        output_path = output_dir / image_info["file_name"]
        fcos_trainer.inference_on_image(image_path, output_path, checkpoint_path=checkpoint)
        gallery_paths.append(output_path)
    return gallery_paths


def generate_yolo_gallery(split: str = "test", limit: int = 5, conf: float = 0.25) -> list[Path]:
    if yolo_trainer.last_run_dir is None:
        print("YOLO model has not been fine-tuned yet; using current weights for inference.")

    output_dir = PROJECT_ROOT / "artifacts" / "inference" / "yolo"
    output_dir.mkdir(parents=True, exist_ok=True)

    gallery_paths = []
    split_annotations = {
        "train": train_annotations,
        "val": val_annotations,
        "test": test_annotations,
    }[split]

    for image_info in islice(split_annotations["images"], limit):
        image_path = DATASET_ROOT / split / "images" / image_info["file_name"]
        output_path = yolo_trainer.inference_on_image(image_path, output_dir, conf=conf)
        gallery_paths.append(output_path)
    return gallery_paths



In [None]:
# Video inference (execute after fine-tuning)
minecraft_video = DATASET_ROOT / "video.mp4"
fcos_video_output = PROJECT_ROOT / "artifacts" / "videos" / "fcos_inference.mp4"
yolo_video_output = PROJECT_ROOT / "artifacts" / "videos" / "yolo_inference.mp4"

# fcos_trainer.inference_on_video(minecraft_video, fcos_video_output)
# yolo_trainer.inference_on_video(minecraft_video, yolo_video_output)



In [None]:
# Utility functions to benchmark FPS on image batches
import time
from mmdet.apis import inference_detector


def benchmark_fcos_fps(split: str = "val", limit: int = 50, score_thr: float = 0.25) -> float:
    checkpoint = _resolve_fcos_checkpoint()
    if checkpoint is None:
        raise FileNotFoundError("FCOS checkpoint not available for FPS benchmark.")

    model = fcos_trainer._ensure_model(checkpoint)
    split_annotations = {
        "train": train_annotations,
        "val": val_annotations,
        "test": test_annotations,
    }[split]

    image_paths = [DATASET_ROOT / split / "images" / info["file_name"] for info in islice(split_annotations["images"], limit)]

    start = time.perf_counter()
    for image_path in image_paths:
        _ = inference_detector(model, str(image_path))
    elapsed = time.perf_counter() - start
    return len(image_paths) / elapsed if elapsed else 0.0


def benchmark_yolo_fps(split: str = "val", limit: int = 50, conf: float = 0.25) -> float:
    if yolo_trainer.model is None:
        yolo_trainer.setup()
    assert yolo_trainer.model is not None

    split_annotations = {
        "train": train_annotations,
        "val": val_annotations,
        "test": test_annotations,
    }[split]

    image_paths = [DATASET_ROOT / split / "images" / info["file_name"] for info in islice(split_annotations["images"], limit)]

    start = time.perf_counter()
    for image_path in image_paths:
        _ = yolo_trainer.model.predict(
            source=str(image_path),
            device=yolo_cfg.device,
            imgsz=yolo_cfg.imgsz,
            conf=conf,
            save=False,
            verbose=False,
        )
    elapsed = time.perf_counter() - start
    return len(image_paths) / elapsed if elapsed else 0.0



In [None]:
# FCOS fine-tuning (run when ready)
# fcos_trainer.train()



In [None]:
# FCOS metrics summary (populate after training)
from src.evaluation.metrics import load_fcos_metrics, summarise_fcos_results

fcos_log_path = fcos_trainer.export_metrics()
if fcos_log_path.exists():
    fcos_log_entries = load_fcos_metrics(fcos_log_path)
    if fcos_log_entries:
        fcos_summary = summarise_fcos_results(fcos_log_entries)
        fcos_summary
    else:
        print("No validation entries found in log.json yet.")
else:
    print("Run training to generate artifacts/fcos/log.json.")



## 2. YOLO Baseline & Fine-tuning

> TODO: Prepare Ultralytics dataset YAML, run pretrained inference, fine-tune YOLOv8s, and capture metrics.


## 3. Comparative Evaluation & Reporting

> TODO: Summarize metric tables, plot comparisons, run video inference, and export report artifacts.
