In [None]:
!pip install -q ultralytics

from pathlib import Path
import os, zipfile, shutil, random

random.seed(0)  # for reproducible 80/20 split

from ultralytics import YOLO

print("âœ… Ultralytics installed, libs imported.")


In [None]:
# Adjust names here if your uploaded zip names are different
LABEL_ZIP = "pheasants1200.zip"
IMAGE_ZIP = "phesants1200images.zip"  # note spelling

label_zip_path = Path(LABEL_ZIP)
image_zip_path = Path(IMAGE_ZIP)

if not label_zip_path.exists() or not image_zip_path.exists():
    raise FileNotFoundError("One of the zip files is missing in /content. Check the names and upload again.")

# Extract labels
label_dir = label_zip_path.with_suffix("")
label_dir.mkdir(exist_ok=True)
with zipfile.ZipFile(label_zip_path, "r") as zf:
    zf.extractall(label_dir)
print(f"âœ… Extracted labels -> {label_dir}")

# Extract images
image_dir = image_zip_path.with_suffix("")
image_dir.mkdir(exist_ok=True)
with zipfile.ZipFile(image_zip_path, "r") as zf:
    zf.extractall(image_dir)
print(f"âœ… Extracted images -> {image_dir}")

# Collect all label and image files (recursively) from those two dirs only
image_exts = {".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff"}
label_ext = ".txt"

image_files = [p for p in image_dir.rglob("*") if p.suffix.lower() in image_exts]
label_files = [p for p in label_dir.rglob("*") if p.suffix.lower() == label_ext]

print(f"\nFound {len(image_files)} images and {len(label_files)} label files.")


In [None]:
# Root of YOLO dataset
DATA_ROOT = Path("pheasants_yolo")

for sub in ["images/all", "labels/all", "images/train", "images/val", "labels/train", "labels/val"]:
    (DATA_ROOT / sub).mkdir(parents=True, exist_ok=True)

# Copy all images and labels into "all" folders
for src in image_files:
    dst = DATA_ROOT / "images/all" / src.name
    shutil.copy2(src, dst)

for src in label_files:
    dst = DATA_ROOT / "labels/all" / src.name
    shutil.copy2(src, dst)

# Pair by stem (only keep those that have BOTH image and label)
image_stems = {p.stem for p in (DATA_ROOT / "images/all").iterdir() if p.is_file()}
label_stems = {p.stem for p in (DATA_ROOT / "labels/all").iterdir() if p.is_file()}
common_ids = sorted(image_stems & label_stems)

print(f"âœ… Found {len(common_ids)} matching image-label pairs.")

if len(common_ids) == 0:
    raise RuntimeError("No matching stems between images and labels. Check your filenames.")

# 80/20 split
random.shuffle(common_ids)
n_total = len(common_ids)
n_train = int(0.8 * n_total)
train_ids = set(common_ids[:n_train])
val_ids   = set(common_ids[n_train:])

print(f"ðŸ“Š Split: {len(train_ids)} train, {len(val_ids)} val")

def move_image(stem, src_dir, dst_dir, exts):
    """Move first existing image with given stem and extension from src_dir to dst_dir."""
    for ext in exts:
        src = src_dir / f"{stem}{ext}"
        if src.exists():
            dst = dst_dir / src.name
            shutil.move(str(src), str(dst))
            return True
    return False

all_img_dir = DATA_ROOT / "images/all"
all_lbl_dir = DATA_ROOT / "labels/all"

# Move train files
for sid in train_ids:
    moved_img = move_image(sid, all_img_dir, DATA_ROOT / "images/train", image_exts)
    lbl_src = all_lbl_dir / f"{sid}.txt"
    if lbl_src.exists():
        shutil.move(str(lbl_src), DATA_ROOT / "labels/train" / lbl_src.name)

# Move val files
for sid in val_ids:
    moved_img = move_image(sid, all_img_dir, DATA_ROOT / "images/val", image_exts)
    lbl_src = all_lbl_dir / f"{sid}.txt"
    if lbl_src.exists():
        shutil.move(str(lbl_src), DATA_ROOT / "labels/val" / lbl_src.name)

# Clean up 'all' folders if empty
for sub in ["images/all", "labels/all"]:
    d = DATA_ROOT / sub
    if d.exists() and not any(d.iterdir()):
        d.rmdir()

print("\nâœ… Final counts:")
print(" Train images:", len(list((DATA_ROOT / 'images/train').glob('*'))))
print(" Train labels:", len(list((DATA_ROOT / 'labels/train').glob('*.txt'))))
print(" Val images:", len(list((DATA_ROOT / 'images/val').glob('*'))))
print(" Val labels:", len(list((DATA_ROOT / 'labels/val').glob('*.txt'))))


In [None]:
# Detect distinct class IDs from all label files
class_ids = set()

for lbl_file in (DATA_ROOT / "labels").rglob("*.txt"):
    with open(lbl_file, "r") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            parts = line.split()
            try:
                cid = int(parts[0])
                class_ids.add(cid)
            except ValueError:
                pass

if not class_ids:
    raise RuntimeError("No class IDs found in label files. Check that labels are in YOLO format.")

num_classes = max(class_ids) + 1
print(f"âœ… Detected {num_classes} classes with IDs: {sorted(class_ids)}")

# Build data.yaml
yaml_lines = []
yaml_lines.append(f"path: {DATA_ROOT.resolve()}")  # absolute path
yaml_lines.append("train: images/train")
yaml_lines.append("val: images/val")
yaml_lines.append("names:")

for i in range(num_classes):
    yaml_lines.append(f"  {i}: class_{i}")  # change to 'male', 'female', etc. if you like

data_yaml_path = DATA_ROOT / "data.yaml"
with open(data_yaml_path, "w") as f:
    f.write("\n".join(yaml_lines))

print(f"\nâœ… Wrote data.yaml at: {data_yaml_path}")
print("\n----- data.yaml -----")
print(open(data_yaml_path).read())


In [None]:
data_yaml = str(data_yaml_path)

# Load pretrained YOLOv8 nano (you can change to yolov8s.pt / m / l)
model = YOLO("yolov8n.pt")

results = model.train(
    data=data_yaml,
    epochs=50,          # adjust as you like
    imgsz=640,
    batch=16,
    name="pheasants1200_yolov8n",
    project="runs_pheasants"
)

print("\nâœ… Training done. Check 'runs_pheasants/pheasants1200_yolov8n' for weights and plots.")


In [None]:
metrics = model.val(data=data_yaml)  # uses val split from data.yaml
print("\nâœ… Validation metrics:")
print(metrics)


In [None]:
from glob import glob

val_images = sorted(glob(str(DATA_ROOT / "images/val" / "*")))[:8]
print("Sample val images:")
for p in val_images:
    print(" -", p)

pred_results = model.predict(
    source=val_images,
    imgsz=640,
    conf=0.25,
    save=True  # saves annotated images under runs/detect/...
)

print("\nâœ… Predictions saved under 'runs/detect' in the file browser.")
