In [3]:
# RAW MODELS TRAINING
import os
import numpy as np
import pandas as pd
import cv2
from glob import glob
from ultralytics import YOLO
from tqdm import tqdm # Progress bar
import time
import gc
import matplotlib.pyplot as plt
import seaborn as sns

# Pytorch
import torch
from torch.utils.data import DataLoader
import torch.optim as optim
from torchvision.models import resnet50, ResNet50_Weights
from torchvision import datasets, transforms, models

from sklearn.metrics import classification_report, confusion_matrix

import csv


In [None]:
# GPU Information
# Needed to make sure nvidia-smi userspace was installed properly
print("Checking GPU information...")
if torch.cuda.is_available():
    print("GPU is available")
    print(f"Device Name: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory Free: {torch.cuda.mem_get_info()[0]/1024**3:.2f} GB")
    print(f"GPU Memory Total: {torch.cuda.mem_get_info()[1]/1024**3:.2f} GB")
    print(f"Memory Allocated: {torch.cuda.memory_allocated(0)/1024**3:.2f} GB")
    print(f"Memory Cached: {torch.cuda.memory_reserved(0)/1024**3:.2f} GB")
else:
    print("GPU is not available")

Raw Training YOLO Model

In [None]:
# RAW Training the Yolo Detector Model
detector = YOLO("yolo11n.yaml") # Create a new model from YAML
# detector.train(data='dataset/data.yaml', 
#                epochs=100, 
#                imgsz=640, 
#                batch=16, 
#                name='yolo_raw_model', 
#                device=0, 
#                resume=True,
#                patience=50, 
#                pretrained=False,    
#                hsv_h=0.015,
#                hsv_s=0.7,
#                hsv_v=0.4,
#                degrees=0, # maybe you wanna rotate this guy selena ? 
#                translate=0.1,
#                scale=0.5,
#                shear=0,
#                perspective=0.0,
#                flipud=0.0,
#                fliplr=0.5,
#                bgr=0.0,
#                mosaic=1.0,
#                mixup=0.0,
#                cutmix=0.0,
#                copy_paste=0.0,
#                copy_paste_mode = 'flip',
#                auto_augment = 'randaugment',
#                erasing=0.4,
#                flip=True,
#                ) # Train the mode old one


detector.train(data='dataset/data.yaml', 
               epochs=100, 
               imgsz=640, 
               batch=16, 
               name='yolo_raw_model', 
               device=0, 
               resume=True,
               patience=50, 
               pretrained=False,
               hsv_h=0.015,
               hsv_s=0.7,
               hsv_v=0.4,
               degrees=90, 
               translate=0.2,
               scale=0.5,
               shear=20,
               perspective=0.0,
               flipud=1,
               fliplr=1,
               bgr=0.0,
               mosaic=1.0,
               mixup=0.0,
               cutmix=0.0,
               copy_paste=0.0,
               copy_paste_mode = 'flip',
               auto_augment = 'randaugment',
               erasing=0.4,
               flip=True,
               ) # Train the mode





Transfer Train YOLO Model

In [None]:

detector = YOLO("yolo11n.pt") # Create a new model from YAML
detector.train(data='dataset/data.yaml', 
               epochs=100, 
               imgsz=640, 
               batch=16, 
               name='yolo_transfer_model', 
               device=0, 
               resume=True,
               patience=50, 
               pretrained=False,
               hsv_h=0.015,
               hsv_s=0.7,
               hsv_v=0.4,
               degrees=90, 
               translate=0.2,
               scale=0.5,
               shear=20,
               perspective=0.0,
               flipud=1,
               fliplr=1,
               bgr=0.0,
               mosaic=1.0,
               mixup=0.0,
               cutmix=0.0,
               copy_paste=0.0,
               copy_paste_mode = 'flip',
               auto_augment = 'randaugment',
               erasing=0.4,
               flip=True,
               ) # Train the mode

In [None]:
detector = YOLO("runs/detect/yolo_raw_model/weights/best.pt")
metrics = detector.val()
print(metrics)

In [2]:
def evaluate(model, dataloader, class_names, device):
    all_preds = []
    all_labels = []


    with torch.no_grad():
        for images, labels in dataloader:
            images = images.to(device)
            outputs = model(images)
            preds = outputs.argmax(dim=1).cpu()


            all_preds.extend(preds)
            all_labels.extend(labels)


    print("\n===== Classification Report =====\n")
    print(classification_report(all_labels, all_preds, target_names=class_names))


    cm = confusion_matrix(all_labels, all_preds)


    plt.figure(figsize=(7, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.title("Confusion Matrix")
    plt.tight_layout()
    plt.savefig("confusion_matrix.png")
    print("Confusion matrix saved as confusion_matrix.png")


    per_class_accuracy = cm.diagonal() / cm.sum(axis=1)
    print("\nPer-class accuracy:")
    for cls, acc in zip(class_names, per_class_accuracy):
        print(f"{cls}: {acc:.4f}")



In [1]:
# RAW Training the Resnet Classifier Model
res_data_root = "cropped_dataset"     # Your YOLO-cropped dataset
batch_size = 32               # Higher batch is okay for ResNet50
num_workers = 8              # Use 0 if RAM is small; 2 is safe
num_epochs = 70               # Enough for transfer learning
learning_rate = 1e-4
img_size = 224                # ResNet50 requires 224x224 input

temp_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

dataset = datasets.ImageFolder("cropped_dataset/train", transform=temp_transform)
loader = DataLoader(dataset, batch_size=64, shuffle=False)

mean = 0.
std = 0.
total_images = 0

for images, _ in loader:
    batch_samples = images.size(0)
    images = images.view(batch_samples, images.size(1), -1)
    mean += images.mean(2).sum(0)
    std += images.std(2).sum(0)
    total_images += batch_samples

mean /= total_images
std /= total_images

print("MEAN:", mean)
print("STD:", std)


# Data transformations (augmentation + normalization)
train_tf = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean.tolist(), std=std.tolist())
])

val_tf = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean.tolist(), std=std.tolist())
])

# Load datasets

train_ds = datasets.ImageFolder(os.path.join(res_data_root, "train"), transform=train_tf)
val_ds   = datasets.ImageFolder(os.path.join(res_data_root, "valid"), transform=val_tf)
test_ds  = datasets.ImageFolder(os.path.join(res_data_root, "test"),  transform=val_tf)

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True,  num_workers=num_workers)
val_loader   = DataLoader(val_ds,   batch_size=batch_size, shuffle=False, num_workers=num_workers)
test_loader  = DataLoader(test_ds,  batch_size=batch_size, shuffle=False, num_workers=num_workers)


NameError: name 'transforms' is not defined

In [None]:
# Model Training

logfile = "training_log_resnet_raw.csv"

# Create and write header
with open(logfile, "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["epoch", "train_loss", "val_loss", "val_acc"])


resnet_model = models.resnet50(pretrained=False)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = resnet_model.to(device)

# Define loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimiser = optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    resnet_model.train()
    train_loss = 0

    for imgs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        imgs, labels = imgs.to(device), labels.to(device)

        optimiser.zero_grad()
        outputs = resnet_model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimiser.step()

        train_loss += loss.item()

    # Validation
    resnet_model.eval()
    val_loss = 0
    val_correct = 0
    total = 0

    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = resnet_model(imgs)

            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = outputs.max(1)
            val_correct += predicted.eq(labels).sum().item()
            total += labels.size(0)

    print(f"Epoch {epoch+1}/{num_epochs}")
    print(f"Train Loss: {train_loss/len(train_loader):.4f}")
    print(f"Val Loss:   {val_loss/len(val_loader):.4f}")
    print(f"Val Acc:    {100*val_correct/total:.2f}%\n")

    avg_train_loss = train_loss / len(train_loader)
    avg_val_loss   = val_loss / len(val_loader)
    val_acc        = 100 * val_correct / total

    print(f"Epoch {epoch+1}/{num_epochs}")
    print(f"Train Loss: {avg_train_loss:.4f}")
    print(f"Val Loss:   {avg_val_loss:.4f}")
    print(f"Val Acc:    {val_acc:.2f}%\n")

    with open(logfile, "a", newline="") as f:
        writer = csv.writer(f)
        writer.writerow([epoch+1, avg_train_loss, avg_val_loss, val_acc])


    torch.save(model.state_dict(), "resnet50_raw_custom.pth")


In [None]:

evaluate(model, test_loader, dataset.classes, device)

Tranfer Learning Resnet

In [None]:
res_data_root = "cropped_dataset"     # Your YOLO-cropped dataset
batch_size = 32               # Higher batch is okay for ResNet50
num_workers = 2               # Use 0 if RAM is small; 2 is safe
num_epochs = 10               # Enough for transfer learning
learning_rate = 1e-4
img_size = 224                # ResNet50 requires 224x224 input

# Load pretrained weights
weights = ResNet50_Weights.IMAGENET1K_V2

# ImageNet normalisation stats from pretrained weights
imagenet_mean = weights.transforms().mean
imagenet_std = weights.transforms().std

# training transfomrations
train_tf = transforms.Compose([
    transforms.Resize((img_size, img_size)), 
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=imagenet_mean, std=imagenet_std)
])
# validation + test transformation
test_tf = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=imagenet_mean, std=imagenet_std)
])

# Create PyTorch datasets and dataloaders using folder structure
train_ds = datasets.ImageFolder(os.path.join(res_data_root, "train"), transform=train_tf)
val_ds   = datasets.ImageFolder(os.path.join(res_data_root, "valid"), transform=test_tf)
test_ds  = datasets.ImageFolder(os.path.join(res_data_root, "test"),  transform=test_tf)

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True,  num_workers=num_workers)
val_loader   = DataLoader(val_ds,   batch_size=batch_size, shuffle=False, num_workers=num_workers)
test_loader  = DataLoader(test_ds,  batch_size=batch_size, shuffle=False, num_workers=num_workers)

classes = train_ds.classes
num_classes = len(classes)
print("Classes found:", classes)

In [None]:

    # RAW Training the Resnet Classifier Model
res_data_root = "cropped_dataset"     # Your YOLO-cropped dataset
batch_size = 32               # Higher batch is okay for ResNet50
num_workers = 8              # Use 0 if RAM is small; 2 is safe
num_epochs = 70               # Enough for transfer learning
learning_rate = 1e-4
img_size = 224                # ResNet50 requires 224x224 input

temp_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

dataset = datasets.ImageFolder("cropped_dataset/train", transform=temp_transform)
loader = DataLoader(dataset, batch_size=64, shuffle=False)

mean = 0.
std = 0.
total_images = 0

for images, _ in loader:
    batch_samples = images.size(0)
    images = images.view(batch_samples, images.size(1), -1)
    mean += images.mean(2).sum(0)
    std += images.std(2).sum(0)
    total_images += batch_samples

mean /= total_images
std /= total_images

print("MEAN:", mean)
print("STD:", std)


# Data transformations (augmentation + normalization)
train_tf = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean.tolist(), std=std.tolist())
])

val_tf = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean.tolist(), std=std.tolist())
])

# Load datasets

train_ds = datasets.ImageFolder(os.path.join(res_data_root, "train"), transform=train_tf)
val_ds   = datasets.ImageFolder(os.path.join(res_data_root, "valid"), transform=val_tf)
test_ds  = datasets.ImageFolder(os.path.join(res_data_root, "test"),  transform=val_tf)

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True,  num_workers=num_workers)
val_loader   = DataLoader(val_ds,   batch_size=batch_size, shuffle=False, num_workers=num_workers)
test_loader  = DataLoader(test_ds,  batch_size=batch_size, shuffle=False, num_workers=num_workers)

# Model Training

        
