# Initial preparations

In [None]:
!pip install ultralytics
from ultralytics import YOLO

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Unzip the Dataset & Add artificial data

In [None]:
!unzip ./drive/MyDrive/vang/classification_dataset.zip -d .

In [None]:
!pip install torchvision opencv-python tqdm

In [None]:
import os
import random
from pathlib import Path
from PIL import Image, ImageEnhance, ImageFilter
import torchvision.transforms as T
from tqdm import tqdm

# --- CONFIG ---
DATA_DIR = Path("classification_dataset/train")
MIN_IMAGES = 200
EXTS = [".jpg", ".jpeg", ".png"]

# --- Augmentation pipeline ---
base_transforms = T.Compose([
    T.RandomResizedCrop(512, scale=(0.8, 1.0)),
    T.RandomHorizontalFlip(p=0.5),
    T.RandomVerticalFlip(p=0.2),
    T.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.02),
    T.RandomRotation(degrees=15),
    T.RandomAffine(degrees=10, translate=(0.05, 0.05), scale=(0.95, 1.05), shear=5),
])

def apply_custom_augment(img):
    img = base_transforms(img)
    if random.random() < 0.3:
        img = img.filter(ImageFilter.GaussianBlur(radius=random.uniform(0.5, 1.5)))
    if random.random() < 0.3:
        enhancer = ImageEnhance.Sharpness(img)
        img = enhancer.enhance(random.uniform(0.5, 2.0))
    return img

# --- Count images ---
def count_images(class_dir):
    return len([f for f in class_dir.iterdir() if f.suffix.lower() in EXTS])

# --- Augment one class ---
def augment_class_dir(class_dir, before):
    imgs = [f for f in class_dir.iterdir() if f.suffix.lower() in EXTS]
    count = before
    i = 0

    while count < MIN_IMAGES:
        src_img_path = random.choice(imgs)
        try:
            img = Image.open(src_img_path).convert('RGB')
        except:
            continue

        augmented = apply_custom_augment(img)
        aug_name = f"{src_img_path.stem}_aug_{i}.jpg"
        augmented.save(class_dir / aug_name, quality=95)
        count += 1
        i += 1

    return count

# --- Main ---
def main():
    print(f"📊 Scanning dataset in {DATA_DIR}")
    report = []

    for class_dir in sorted(DATA_DIR.iterdir()):
        if not class_dir.is_dir():
            continue

        class_name = class_dir.name
        before = count_images(class_dir)

        if before < MIN_IMAGES:
            print(f"📈 Augmenting '{class_name}': {before} → {MIN_IMAGES}")
            after = augment_class_dir(class_dir, before)
            report.append((class_name, before, after))
        else:
            report.append((class_name, before, before))

    print("\n✅ Dataset Balancing Report:")
    for name, before, after in report:
        status = "✅" if after >= MIN_IMAGES else "⚠️"
        print(f"{status} {name.ljust(20)} : {str(before).rjust(4)} → {str(after).rjust(4)}")

if __name__ == "__main__":
    main()


# Starting the training

## Initial run

In [None]:
model = YOLO('yolov8s-cls')

model.train(
    data='classification_dataset',
    epochs=20,
    imgsz=384,
    name='cls_run_2',
    batch=16
)


## Second run epochs 1-11 (colab ran out of resources)

In [None]:
model = YOLO('yolov8l-cls.pt')


model.train(
    data='classification_dataset',    # cleaned dataset path
    project='/content/drive/MyDrive/yolo_classification_checkpoints',
    name='cls_drive_run_w_augmented',

    # Core training loop
    epochs=50,                        # plenty of time to converge
    batch=16,                         # T4 can handle ~12 @ 640; drop to 8 if OOM
    imgsz=512,                        # high-res for subtle features

    # Checkpointing & early stop
    save_period=5,                    # checkpoint every 5 epochs
    patience=10,                      # early stop after 10 stagnating epochs

    # Learning rate schedule
    lr0=0.001,
    lrf=0.01,
    cos_lr=True,                   # cosine decay

    optimizer='AdamW',                # smoother convergence than SGD

    # Augmentation (leave on)
    augment=True,                     # default YOLO aug pipeline
    fliplr=0.5,                       # horizontal flips
    hsv_h=0.015, hsv_s=0.7, hsv_v=0.4,
    translate=0.1, scale=0.5,         # slight spatial aug
    degrees=10,

    # Data loading & verbosity
    workers=4,                        # parallel data loading
    device='cuda:0',
    verbose=True,
)


## Third run epochs 11-13 (colab stopped working)

In [None]:
model = YOLO('last_aug_1.pt')


model.train(
    data='classification_dataset',    # cleaned dataset path
    project='/content/drive/MyDrive/yolo_classification_checkpoints',
    name='cls_drive_run_w_augmented',

    # Core training loop
    epochs=39,                        # plenty of time to converge
    batch=16,                         # T4 can handle ~12 @ 640; drop to 8 if OOM
    imgsz=512,                        # high-res for subtle features

    # Checkpointing & early stop
    save_period=5,                    # checkpoint every 5 epochs
    patience=10,  # early stop after 10 stagnating epochs
    resume=True,
    # Learning rate schedule
    lr0=0.001,
    lrf=0.01,
    cos_lr=True,                   # cosine decay

    optimizer='AdamW',                # smoother convergence than SGD

    # Augmentation (leave on)
    augment=True,                     # default YOLO aug pipeline
    fliplr=0.5,                       # horizontal flips
    hsv_h=0.015, hsv_s=0.7, hsv_v=0.4,
    translate=0.1, scale=0.5,         # slight spatial aug
    degrees=10,

    # Data loading & verbosity
    workers=4,                        # parallel data loading
    device='cuda:0',
    verbose=True,
)


## Final training run epochs 13-18

In [None]:
model = YOLO('last_aug_2.pt')


model.train(
    data='classification_dataset',    # cleaned dataset path
    project='/content/drive/MyDrive/yolo_classification_checkpoints',
    name='cls_drive_run_w_augmented_v3',

    # Core training loop
    epochs=34,                        # plenty of time to converge
    batch=16,                         # T4 can handle ~12 @ 640; drop to 8 if OOM
    imgsz=512,                        # high-res for subtle features

    # Checkpointing & early stop
    save_period=5,                    # checkpoint every 5 epochs
    patience=10,  # early stop after 10 stagnating epochs
    resume=True,
    # Learning rate schedule
    lr0=0.001,
    lrf=0.01,
    cos_lr=True,                   # cosine decay

    optimizer='AdamW',                # smoother convergence than SGD

    # Augmentation (leave on)
    augment=True,                     # default YOLO aug pipeline
    fliplr=0.5,                       # horizontal flips
    hsv_h=0.015, hsv_s=0.7, hsv_v=0.4,
    translate=0.1, scale=0.5,         # slight spatial aug
    degrees=10,

    # Data loading & verbosity
    workers=4,                        # parallel data loading
    device='cuda:0',
    verbose=True,
)


# Starting testing the model

In [None]:
!pip install pillow numpy

In [None]:
import cv2
import os
from ultralytics import YOLO
from PIL import Image
import numpy as np
import torch


def detect_and_crop_objects(model, image_path, confidence_threshold=0.5, padding=10,
                          iou_threshold=0.45, max_detections=300):


    image = cv2.imread(image_path)
    if image is None:
        print(f"Error: Could not load image from {image_path}")
        return []

    print(f"Image dimensions: {image.shape}")

    results = model(image_path, conf=confidence_threshold, iou=iou_threshold, max_det=max_detections)

    cropped_images = []

    # Process results
    for i, result in enumerate(results):
        boxes = result.boxes

        if boxes is not None:
            # Get detection data
            xyxy = boxes.xyxy.cpu().numpy()  # Bounding boxes in xyxy format
            conf = boxes.conf.cpu().numpy()  # Confidence scores
            cls = boxes.cls.cpu().numpy()    # Class IDs

            # print(f"Found {len(xyxy)} objects in the image")

            # Debug: Print all detection info
            for j, (box, confidence, class_id) in enumerate(zip(xyxy, conf, cls)):
                x1, y1, x2, y2 = box
                width = x2 - x1
                height = y2 - y1
                area_ratio = (width * height) / (image.shape[0] * image.shape[1])

                class_name = model.names.get(int(class_id), f"class_{int(class_id)}")
                # print(f"Detection {j+1}: {class_name}")
                # print(f"  Bbox: ({x1:.1f}, {y1:.1f}, {x2:.1f}, {y2:.1f})")
                # print(f"  Size: {width:.1f}x{height:.1f} (area: {area_ratio:.3f} of image)")
                # print(f"  Confidence: {confidence:.3f}")
                # print()

            # Crop each detected object
            for j, (box, confidence, class_id) in enumerate(zip(xyxy, conf, cls)):
                x1, y1, x2, y2 = map(int, box)

                # Add padding around the object
                x1 = max(0, x1 - padding)
                y1 = max(0, y1 - padding)
                x2 = min(image.shape[1], x2 + padding)
                y2 = min(image.shape[0], y2 + padding)

                # Crop the object
                cropped_object = image[y1:y2, x1:x2]

                # Convert BGR (OpenCV) to RGB (PIL)
                cropped_rgb = cv2.cvtColor(cropped_object, cv2.COLOR_BGR2RGB)

                # Convert to PIL Image
                pil_image = Image.fromarray(cropped_rgb)
                cropped_images.append(pil_image)


    return cropped_images

def classify_image(model, image):
    """
    Classify a single PIL Image using a YOLO classification model

    Args:
        model: YOLO classification model (already loaded)
        image: PIL Image object

    Returns:
        dict: Classification result with 'class_id', 'class_name', 'confidence'
    """

    # Run inference
    results = model(image)

    # Process results
    for result in results:
        # Get prediction data
        probs = result.probs  # Classification probabilities

        if probs is not None:
            # Get top prediction
            top_idx = torch.argmax(probs.data).cpu().numpy()
            top_conf = torch.max(probs.data).cpu().numpy()

            class_name = model.names.get(int(top_idx), f"class_{int(top_idx)}")

            return {
                'class_id': int(top_idx),
                'class_name': class_name,
                'confidence': float(top_conf)
            }

    # No classification results
    return {
        'class_id': -1,
        'class_name': 'unknown',
        'confidence': 0.0
    }


def visualize_detections(model_path, image_path, output_path="detections_visualized.jpg", confidence_threshold=0.5):
    """
    Visualize detections with bounding boxes on the original image
    """
    model = YOLO(model_path)

    # Run inference and save annotated image
    results = model(image_path, conf=confidence_threshold)

    # Plot results
    for i, result in enumerate(results):
        # Save image with annotations
        annotated_image = result.plot()
        cv2.imwrite(output_path, annotated_image)
        print(f"Annotated image saved to: {output_path}")




In [None]:
model = YOLO("./detection.pt")
cls_model = YOLO("./drive/MyDrive/yolo_classification_checkpoints/cls_drive_run_w_augmented/weights/best.pt")

crops = detect_and_crop_objects(model, "./f35.jpg", 0.5, 10)

for crop in crops:
  print(classify_image(cls_model, crop))

