In [2]:
import os
import numpy as np
import pandas as pd
import cv2
from ultralytics import YOLO
from tqdm import tqdm  # nice progress bars
import time
import matplotlib.pyplot as plt
import random

import torch.nn as nn


# RAW MODELS TRAINING

from glob import glob
import gc
import seaborn as sns

# Pytorch
import torch
from torch.utils.data import DataLoader
import torch.optim as optim
from torchvision.models import resnet50, ResNet50_Weights
from torchvision import datasets, transforms, models

from sklearn.metrics import classification_report, confusion_matrix

from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import seaborn as sns
import csv


YOLO Transfer Train

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

detector = YOLO("yolo11n.pt")  # load a pretrained model (recommended for training)
# detector.train(
#     data='dataset/data.yaml', 
#     epochs=100, 
#     imgsz=640, 
#     batch=16, 
#     name='yolo_transfer_model', 
#     device=0, 
#     patience=50, 
# ) OLD ONE
detector.train(data='dataset/data.yaml', 
               epochs=100, 
               imgsz=640, 
               batch=16, 
               name='yolo_transfer_model', 
               device=0, 
               patience=50, 
               pretrained=False,
               hsv_h=0.015,
               hsv_s=0.7,
               hsv_v=0.4,
               degrees=90, 
               translate=0.2,
               scale=0.5,
               shear=20,
               perspective=0.0,
               flipud=1,
               fliplr=1,
               bgr=0.0,
               mosaic=1.0,
               mixup=0.0,
               cutmix=0.0,
               copy_paste=0.0,
               copy_paste_mode = 'flip',
               auto_augment = 'randaugment',
               erasing=0.4,
               ) # Train the mode

detector = YOLO("runs/detect/yolo_transfer_model/weights/best.pt")
metrics = detector.val()
print(metrics)


Using device: cuda
Ultralytics 8.3.228 üöÄ Python-3.13.9 torch-2.9.1+cu128 CUDA:0 (NVIDIA RTX 3500 Ada Generation Laptop GPU, 11874MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=dataset/data.yaml, degrees=90, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=False, fliplr=1, flipud=1, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolo11n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=yolo_transfer_model2, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, over

FileNotFoundError: [Errno 2] No such file or directory: 'runs/detect/yolo_transfer_model/weights/best.pt'

In [None]:
# Clean Up GPU

# 1. Run Python garbage collector
gc.collect()

# 2. Clear PyTorch cached memory
torch.cuda.empty_cache()

# 3. Reset PyTorch CUDA memory allocator
torch.cuda.synchronize()

In [5]:
detector = YOLO("runs/detect/yolo_transfer_model2/weights/best.pt")
metrics = detector.val()
print(metrics)


Ultralytics 8.3.228 üöÄ Python-3.13.9 torch-2.9.1+cu128 CUDA:0 (NVIDIA RTX 3500 Ada Generation Laptop GPU, 11874MiB)
YOLO11n summary (fused): 100 layers, 2,584,492 parameters, 0 gradients, 6.3 GFLOPs
[34m[1mval: [0mFast image access ‚úÖ (ping: 0.0¬±0.0 ms, read: 199.8¬±60.0 MB/s, size: 41.0 KB)
[K[34m[1mval: [0mScanning /home/cormac/cv/dataset/valid/labels.cache... 1095 images, 0 backgrounds, 0 corrupt: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 1095/1095 6.4Mit/s 0.0s0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 69/69 19.3it/s 3.6s0.1s
                   all       1095       1341      0.812        0.7      0.752      0.441
                  Ants         96        178       0.77      0.646      0.731      0.281
                  Bees         99        110      0.949      0.842       0.87       0.43
               Beetles         89        100      0.678      0.505      0.577      0.291
  

YOLO Raw Train

In [None]:
detector = YOLO("yolo11n.yaml") # Create a new model from YAML

detector.train(data='dataset/data.yaml', 
               epochs=100, 
               imgsz=640, 
               batch=16, 
               name='yolo_raw_model', 
               device=0, 
               patience=50, 
               pretrained=False,
               hsv_h=0.015,
               hsv_s=0.7,
               hsv_v=0.4,
               degrees=90, 
               translate=0.2,
               scale=0.5,
               shear=20,
               perspective=0.0,
               flipud=1,
               fliplr=1,
               bgr=0.0,
               mosaic=1.0,
               mixup=0.0,
               cutmix=0.0,
               copy_paste=0.0,
               copy_paste_mode = 'flip',
               auto_augment = 'randaugment',
               erasing=0.4,
               ) # Train the mode

detector = YOLO("runs/detect/yolo_raw_model/weights/best.pt")
metrics = detector.val()
print(metrics)

In [None]:
# Clean Up GPU

# 1. Run Python garbage collector
gc.collect()

# 2. Clear PyTorch cached memory
torch.cuda.empty_cache()

# 3. Reset PyTorch CUDA memory allocator
torch.cuda.synchronize()


In [None]:
# Function To Evaluate Model

def evaluate(model, dataloader, class_names, device, name):
    all_preds = []
    all_labels = []


    with torch.no_grad():
        for images, labels in dataloader:
            images = images.to(device)
            outputs = model(images)
            preds = outputs.argmax(dim=1).cpu()


            all_preds.extend(preds)
            all_labels.extend(labels)


    print("\n===== Classification Report =====\n")
    print(classification_report(all_labels, all_preds, target_names=class_names))


    cm = confusion_matrix(all_labels, all_preds)


    plt.figure(figsize=(7, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.title("Confusion Matrix")
    plt.tight_layout()
    plt.savefig(f"{name}confusion_matrix.png")
    print("Confusion matrix saved as confusion_matrix.png")


    per_class_accuracy = cm.diagonal() / cm.sum(axis=1)
    print("\nPer-class accuracy:")
    for cls, acc in zip(class_names, per_class_accuracy):
        print(f"{cls}: {acc:.4f}")

    
def trainResNet50Model(train_loader, val_loader, test_loader, dataset, weights, name, num_epochs=70, learning_rate=0.001):
    logfile = f"resnet_{name}.csv"

    # Create and write header
    with open(logfile, "w", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(["epoch", "train_loss", "val_loss", "val_acc"])


    resnet_model = models.resnet50(weights=weights)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = resnet_model.to(device)

    # Define loss function and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimiser = optim.Adam(model.parameters(), lr=learning_rate)

    for epoch in range(num_epochs):
        resnet_model.train()
        train_loss = 0

        for imgs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
            imgs, labels = imgs.to(device), labels.to(device)

            optimiser.zero_grad()
            outputs = resnet_model(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimiser.step()

            train_loss += loss.item()

        # Validation
        resnet_model.eval()
        val_loss = 0
        val_correct = 0
        total = 0

        with torch.no_grad():
            for imgs, labels in val_loader:
                imgs, labels = imgs.to(device), labels.to(device)
                outputs = resnet_model(imgs)

                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, predicted = outputs.max(1)
                val_correct += predicted.eq(labels).sum().item()
                total += labels.size(0)

        print(f"Epoch {epoch+1}/{num_epochs}")
        print(f"Train Loss: {train_loss/len(train_loader):.4f}")
        print(f"Val Loss:   {val_loss/len(val_loader):.4f}")
        print(f"Val Acc:    {100*val_correct/total:.2f}%\n")

        avg_train_loss = train_loss / len(train_loader)
        avg_val_loss   = val_loss / len(val_loader)
        val_acc        = 100 * val_correct / total

        print(f"Epoch {epoch+1}/{num_epochs}")
        print(f"Train Loss: {avg_train_loss:.4f}")
        print(f"Val Loss:   {avg_val_loss:.4f}")
        print(f"Val Acc:    {val_acc:.2f}%\n")

        with open(logfile, "a", newline="") as f:
            writer = csv.writer(f)
            writer.writerow([epoch+1, avg_train_loss, avg_val_loss, val_acc])


        torch.save(model.state_dict(), f"resnet50_{name}_custom.pth")
        evaluate(model, test_loader, dataset.classes, device, name)

        # Stop GPU Exploding
        del model
        del optimiser
        del train_loader
        del val_loader
        gc.collect()
        torch.cuda.empty_cache()
        torch.cuda.synchronize()
        return




In [None]:
res_data_root = "cropped_dataset"     
batch_size = 32               # Higher batch is okay for ResNet50
num_workers = 8              # Use 0 if RAM is small; 2 is safe
num_epochs = 70               # Enough for transfer learning
learning_rate = 1e-4
img_size = 224                # ResNet50 requires 224x224 input

temp_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

dataset = datasets.ImageFolder("cropped_dataset/train", transform=temp_transform)
loader = DataLoader(dataset, batch_size=64, shuffle=False)

mean = 0.
std = 0.
total_images = 0

for images, _ in loader:
    batch_samples = images.size(0)
    images = images.view(batch_samples, images.size(1), -1)
    mean += images.mean(2).sum(0)
    std += images.std(2).sum(0)
    total_images += batch_samples

mean /= total_images
std /= total_images

print("MEAN:", mean)
print("STD:", std)


# Data transformations (augmentation + normalization)
train_tf = transforms.Compose([
    transforms.Resize((224, 224)),
    
    # --- YOLO-style color jitter ---
    transforms.ColorJitter(
        hue=0.015,        # hsv_h
        saturation=0.7,   # hsv_s
        brightness=0.4    # hsv_v
    ),
    
    # --- Spatial augmentations ---
    transforms.RandomRotation(90),          # degrees=90
    transforms.RandomAffine(
        degrees=0,
        translate=(0.2, 0.2),                # translate=0.2
        scale=(0.5, 1.5),                    # scale=0.5 (min)
        shear=20                             # shear=20
    ),
    transforms.RandomHorizontalFlip(p=1.0),  # fliplr=1
    transforms.RandomVerticalFlip(p=1.0),    # flipud=1
    
    # --- YOLO autoaugment equivalent ---
    transforms.RandAugment(),                # auto_augment = "randaugment"
    
    # --- Random erasing applied after tensor ---
    transforms.ToTensor(),
    transforms.RandomErasing(p=0.4),         # erasing=0.4
    transforms.Normalize(mean=mean.tolist(), std=std.tolist())
])

val_tf = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean.tolist(), std=std.tolist())
])

# Load datasets

train_ds = datasets.ImageFolder(os.path.join(res_data_root, "train"), transform=train_tf)
val_ds   = datasets.ImageFolder(os.path.join(res_data_root, "valid"), transform=val_tf)
test_ds  = datasets.ImageFolder(os.path.join(res_data_root, "test"),  transform=val_tf)

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True,  num_workers=num_workers)
val_loader   = DataLoader(val_ds,   batch_size=batch_size, shuffle=False, num_workers=num_workers)
test_loader   = DataLoader(test_ds,   batch_size=batch_size, shuffle=False, num_workers=num_workers)


In [None]:
resnetRawModel = trainResNet50Model(train_loader, val_loader, test_loader, dataset, None, name='raw', num_epochs=70, learning_rate=0.001)


In [None]:
res_data_root = "cropped_dataset"   
batch_size = 32               # Higher batch is okay for ResNet50
num_workers = 2               # Use 0 if RAM is small; 2 is safe
num_epochs = 10               # Enough for transfer learning
learning_rate = 1e-4
img_size = 224                # ResNet50 requires 224x224 input

# Load pretrained weights
weights = ResNet50_Weights.IMAGENET1K_V2

# ImageNet normalisation stats from pretrained weights
imagenet_mean = weights.transforms().mean
imagenet_std = weights.transforms().std

# training transfomrations
train_tf = transforms.Compose([
    transforms.Resize((224, 224)),
    
    # --- YOLO-style color jitter ---
    transforms.ColorJitter(
        hue=0.015,        # hsv_h
        saturation=0.7,   # hsv_s
        brightness=0.4    # hsv_v
    ),
    
    # --- Spatial augmentations ---
    transforms.RandomRotation(90),          # degrees=90
    transforms.RandomAffine(
        degrees=0,
        translate=(0.2, 0.2),                # translate=0.2
        scale=(0.5, 1.5),                    # scale=0.5 (min)
        shear=20                             # shear=20
    ),
    transforms.RandomHorizontalFlip(p=1.0),  # fliplr=1
    transforms.RandomVerticalFlip(p=1.0),    # flipud=1
    
    # --- YOLO autoaugment equivalent ---
    transforms.RandAugment(),                # auto_augment = "randaugment"
    
    # --- Random erasing applied after tensor ---
    transforms.ToTensor(),
    transforms.RandomErasing(p=0.4),         # erasing=0.4
    transforms.Normalize(mean=imagenet_mean, std=imagenet_std)
])

# validation + test transformation
test_tf = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=imagenet_mean, std=imagenet_std)
])

# Create PyTorch datasets and dataloaders using folder structure
train_ds = datasets.ImageFolder(os.path.join(res_data_root, "train"), transform=train_tf)
val_ds   = datasets.ImageFolder(os.path.join(res_data_root, "valid"), transform=test_tf)
test_ds  = datasets.ImageFolder(os.path.join(res_data_root, "test"),  transform=test_tf)

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True,  num_workers=num_workers)
val_loader   = DataLoader(val_ds,   batch_size=batch_size, shuffle=False, num_workers=num_workers)
test_loader  = DataLoader(test_ds,  batch_size=batch_size, shuffle=False, num_workers=num_workers)

classes = train_ds.classes
num_classes = len(classes)
print("Classes found:", classes)



In [None]:
resnetTransferModel = trainResNet50Model(train_loader, val_loader, test_loader, test_ds, weights, name='transfer', num_epochs=70, learning_rate=0.001)