In [1]:
# RAW MODELS TRAINING
import os
import numpy as np
import pandas as pd
import cv2
from glob import glob
from ultralytics import YOLO
from tqdm import tqdm # Progress bar
import time
import gc
import matplotlib.pyplot as plt
import seaborn as sns

# Pytorch
import torch
from torchvision import models
import torchvision.transforms as transforms
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.optim as optim

from sklearn.metrics import classification_report, confusion_matrix


In [2]:
# GPU Information
# Needed to make sure nvidia-smi userspace was installed properly
print("Checking GPU information...")
if torch.cuda.is_available():
    print("GPU is available")
    print(f"Device Name: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory Free: {torch.cuda.mem_get_info()[0]/1024**3:.2f} GB")
    print(f"GPU Memory Total: {torch.cuda.mem_get_info()[1]/1024**3:.2f} GB")
    print(f"Memory Allocated: {torch.cuda.memory_allocated(0)/1024**3:.2f} GB")
    print(f"Memory Cached: {torch.cuda.memory_reserved(0)/1024**3:.2f} GB")
else:
    print("GPU is not available")

Checking GPU information...
GPU is available
Device Name: NVIDIA RTX 3500 Ada Generation Laptop GPU
GPU Memory Free: 8.79 GB
GPU Memory Total: 11.60 GB
Memory Allocated: 0.00 GB
Memory Cached: 0.00 GB


In [None]:
# RAW Training the Yolo Detector Model
detector = YOLO("yolo11n.yaml") # Create a new model from YAML
detector.train(data='dataset/data.yaml', 
               epochs=100, 
               imgsz=640, 
               batch=16, 
               name='yolo_raw_model', 
               device=0, 
               resume=True,
               patience=50, 
               pretrained=False,    
               hsv_h=0.015,
               hsv_s=0.7,
               hsv_v=0.4,
               degrees=0, # maybe you wanna rotate this guy selena ? 
               translate=0.1,
               scale=0.5,
               shear=0,
               perspective=0.0,
               flipud=0.0,
               fliplr=0.5,
               bgr=0.0,
               mosaic=1.0,
               mixup=0.0,
               cutmix=0.0,
               copy_paste=0.0,
               copy_paste_mode = 'flip',
               auto_augment = 'randaugment',
               erasing=0.4,
               flip=True,
               ) # Train the mode





Ultralytics 8.3.228 üöÄ Python-3.13.9 torch-2.9.1+cu128 CUDA:0 (NVIDIA RTX 3500 Ada Generation Laptop GPU, 11874MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=dataset/data.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=runs/detect/yolo_raw_model/weights/best.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=yolo_raw_model, nbs=64, nms=False, opset=None, optimize=False, optimize

ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7f0cf0dbaeb0>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,  

In [4]:
detector = YOLO("runs/detect/yolo_raw_model/weights/best.pt")
metrics = detector.val()
print(metrics)


Ultralytics 8.3.228 üöÄ Python-3.13.9 torch-2.9.1+cu128 CUDA:0 (NVIDIA RTX 3500 Ada Generation Laptop GPU, 11874MiB)
YOLO11n summary (fused): 100 layers, 2,584,492 parameters, 0 gradients, 6.3 GFLOPs
[34m[1mval: [0mFast image access ‚úÖ (ping: 0.0¬±0.0 ms, read: 3672.0¬±1034.1 MB/s, size: 40.4 KB)
[K[34m[1mval: [0mScanning /home/cormac/cv/dataset/valid/labels.cache... 1095 images, 0 backgrounds, 0 corrupt: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 1095/1095 3.2Mit/s 0.0s0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 69/69 17.5it/s 3.9s0.1s
                   all       1095       1341      0.757      0.684      0.718       0.43
                  Ants         96        178      0.796      0.646      0.696      0.278
                  Bees         99        110      0.823        0.8      0.837      0.406
               Beetles         89        100      0.606       0.56      0.579      0.278

In [3]:
def evaluate(model, dataloader, class_names, device):
    all_preds = []
    all_labels = []


    with torch.no_grad():
        for images, labels in dataloader:
            images = images.to(device)
            outputs = model(images)
            preds = outputs.argmax(dim=1).cpu()


            all_preds.extend(preds)
            all_labels.extend(labels)


    print("\n===== Classification Report =====\n")
    print(classification_report(all_labels, all_preds, target_names=class_names))


    cm = confusion_matrix(all_labels, all_preds)


    plt.figure(figsize=(7, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.title("Confusion Matrix")
    plt.tight_layout()
    plt.savefig("confusion_matrix.png")
    print("Confusion matrix saved as confusion_matrix.png")


    per_class_accuracy = cm.diagonal() / cm.sum(axis=1)
    print("\nPer-class accuracy:")
    for cls, acc in zip(class_names, per_class_accuracy):
        print(f"{cls}: {acc:.4f}")



In [4]:
# RAW Training the Resnet Classifier Model



temp_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

dataset = datasets.ImageFolder("cropped_dataset/train", transform=temp_transform)
loader = DataLoader(dataset, batch_size=64, shuffle=False)

mean = 0.
std = 0.
total_images = 0

for images, _ in loader:
    batch_samples = images.size(0)
    images = images.view(batch_samples, images.size(1), -1)
    mean += images.mean(2).sum(0)
    std += images.std(2).sum(0)
    total_images += batch_samples

mean /= total_images
std /= total_images

print("MEAN:", mean)
print("STD:", std)


# Data transformations (augmentation + normalization)
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean.tolist(), std=std.tolist())
])

val_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean.tolist(), std=std.tolist())
])

# Load datasets
train_dataset = datasets.ImageFolder('cropped_dataset/train', transform=train_transforms)
val_dataset = datasets.ImageFolder('cropped_dataset/valid', transform=val_transforms)
test_dataset = datasets.ImageFolder('cropped_dataset/test', transform=val_transforms)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)



MEAN: tensor([0.4917, 0.4541, 0.3436])
STD: tensor([0.1954, 0.1931, 0.1816])


In [None]:
# Model Training
import csv

logfile = "training_log.csv"

# Create and write header
with open(logfile, "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["epoch", "train_loss", "val_loss", "val_acc"])


resnet_model = models.resnet50(pretrained=False)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = resnet_model.to(device)

# Define loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimiser = optim.Adam(model.parameters(), lr=1e-4)

num_epochs = 100

for epoch in range(num_epochs):
    resnet_model.train()
    train_loss = 0

    for imgs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        imgs, labels = imgs.to(device), labels.to(device)

        optimiser.zero_grad()
        outputs = resnet_model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimiser.step()

        train_loss += loss.item()

    # Validation
    resnet_model.eval()
    val_loss = 0
    val_correct = 0
    total = 0

    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = resnet_model(imgs)

            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = outputs.max(1)
            val_correct += predicted.eq(labels).sum().item()
            total += labels.size(0)

    print(f"Epoch {epoch+1}/{num_epochs}")
    print(f"Train Loss: {train_loss/len(train_loader):.4f}")
    print(f"Val Loss:   {val_loss/len(val_loader):.4f}")
    print(f"Val Acc:    {100*val_correct/total:.2f}%\n")

    avg_train_loss = train_loss / len(train_loader)
    avg_val_loss   = val_loss / len(val_loader)
    val_acc        = 100 * val_correct / total

    print(f"Epoch {epoch+1}/{num_epochs}")
    print(f"Train Loss: {avg_train_loss:.4f}")
    print(f"Val Loss:   {avg_val_loss:.4f}")
    print(f"Val Acc:    {val_acc:.2f}%\n")

    with open(logfile, "a", newline="") as f:
        writer = csv.writer(f)
        writer.writerow([epoch+1, avg_train_loss, avg_val_loss, val_acc])


    torch.save(model.state_dict(), "resnet50_custom.pth")


Epoch 1/100:  28%|‚ñà‚ñà‚ñä       | 67/239 [02:18<1:19:52, 27.86s/it]

In [None]:

evaluate(model, test_loader, dataset.classes, device)