In [None]:
pip install --upgrade ultralytics

In [None]:
##TRAINS THE MODEL KFOLD

import os
import gc
import torch
from ultralytics import YOLO

# ---------------- CONFIG ----------------
BASE_DIR    = r"C:\Users\jazzb\ImageDetection-Yolov11"  # root path where fold_1, …, fold_5 live
MODEL_FILE  = "yolo11n-cls.pt"                         # pretrained classification model
FOLDS       = 5
EPOCHS      = 100
IMGSZ       = 640
BATCH       = 8                                        # reduced for lower VRAM usage
PATIENCE    = 5
DEVICE      = "cuda" if torch.cuda.is_available() else "cpu"
MEM_FRACTION = 0.6                                     # 60% GPU memory cap
# ----------------------------------------

print(f"Using device: {DEVICE}\n")

if DEVICE == "cuda":
    torch.cuda.set_per_process_memory_fraction(MEM_FRACTION, device=0)

for fold in range(1, FOLDS + 1):
    gc.collect()
    torch.cuda.empty_cache()

    fold_dir = os.path.join(BASE_DIR, f"fold_{fold}")
    if not os.path.isdir(os.path.join(fold_dir, 'train')):
        raise FileNotFoundError(f"{fold_dir}/train not found")

    print(f"\n=== Training Fold {fold}/{FOLDS} on folder {fold_dir} ===")
    model = YOLO(MODEL_FILE)
    model.train(
        data=fold_dir,
        task='classify',
        epochs=EPOCHS,
        imgsz=IMGSZ,
        batch=BATCH,
        patience=PATIENCE,
        device=DEVICE,
        project=fold_dir,
        name='results',
        exist_ok=True,
        dropout=0.2,
        weight_decay=0.0005,
        amp=False
    )

    print(f"--- Fold {fold} complete. Best weights in {fold_dir}/results ---")


In [None]:
##SHOWS WHICH FOLD IS THE BEST

import pandas as pd
import os
import shutil

# Automatically get the base directory where the script is run
base_dir = os.getcwd()

# Dynamically find the fold directories inside the base folder
fold_dirs = [f"fold_{i+1}" for i in range(5)]

results = []

for fold in fold_dirs:
    csv_path = os.path.join(base_dir, fold, "results", "results.csv")
    print(f"Checking: {csv_path} -> Exists: {os.path.exists(csv_path)}")  # Debug line

    if not os.path.exists(csv_path):
        print(f"❌ Missing results.csv in {csv_path}")
        continue

    df = pd.read_csv(csv_path)
    print(f"🔍 Columns in {csv_path}:\n{df.columns.tolist()}")  # Debugging line

    # Look for the accuracy column
    accuracy_column = None
    for col in df.columns:
        if 'accuracy' in col.lower():
            accuracy_column = col
            print(f"✅ Found column: {col}")
            break

    if accuracy_column is None:
        print(f"⚠️ 'accuracy' column not found in {csv_path}")
        continue

    best_row = df.loc[df[accuracy_column].idxmax()]
    train_loss = best_row["train/loss"]
    val_loss = best_row["val/loss"]
    loss_gap = abs(train_loss - val_loss)

    results.append({
        "Fold": fold,
        "Epoch": int(best_row["epoch"]),
        "Accuracy": best_row[accuracy_column],
        "Train Loss": train_loss,
        "Validation Loss": val_loss,
        "Loss Gap": loss_gap,
        "Learning Rate": best_row["lr/pg0"]
    })

results_df = pd.DataFrame(results)

if not results_df.empty:
    print("\n📊 5-Fold Cross-Validation Summary:")
    print(results_df.to_string(index=False))

    # Normalize accuracy and loss gap for ranking
    results_df["Accuracy Rank"] = results_df["Accuracy"].rank(ascending=False)
    results_df["Loss Gap Rank"] = results_df["Loss Gap"].rank(ascending=True)

    # Combined score: prioritize high accuracy + low overfitting
    results_df["Overall Score"] = results_df["Accuracy Rank"] + results_df["Loss Gap Rank"]

    best_fold = results_df.loc[results_df["Overall Score"].idxmin()]
    print(f"\n🎯 Selected Best Generalizing Fold: {best_fold['Fold']} (Epoch {best_fold['Epoch']})")
    print(f"   Accuracy: {best_fold['Accuracy']:.4f}, Loss Gap: {best_fold['Loss Gap']:.4f}")

    # Adjust the model path to match the correct directory structure
    best_model_path = os.path.join(base_dir, best_fold["Fold"], "results", "weights", "best.pt")
    dest_dir = os.path.join(base_dir, "final_model")
    os.makedirs(dest_dir, exist_ok=True)

    if os.path.exists(best_model_path):
        shutil.copy(best_model_path, os.path.join(dest_dir, "best_fold.pt"))
        print(f"📦 Copied best.pt to: {os.path.join(dest_dir, 'best_fold.pt')}")
    else:
        print(f"❌ Missing best.pt at: {best_model_path}")
else:
    print("\n⚠️ No valid results found. Please check the folder structure and CSV files.")


In [None]:
# PURE CLASSIFICATION + NO_DAMAGE BY CONFIDENCE THRESHOLD

from ultralytics import YOLO
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import os

# 1. Reload your trained classification weights
weights = "fold_1/results/weights/best.pt"
model   = YOLO(weights)

# 2. Grab and extend the model's names dict
names = model.names.copy()
no_damage_idx = max(names.keys()) + 1
names[no_damage_idx] = "no_damage"

# 3. Validation setup
val_root = "Road_Damage/val"
folders  = [
    "potholes",
    "crack_issues",
    "alligator_crack_issues",
    "ravelling",
    "open_manhole"
]

# 4. Invert for lookup
name_to_idx = {v: k for k, v in names.items()}

# 5. Choose a confidence threshold below which we call it "no_damage"
CONF_THRESH = 0.3

y_true, y_pred = [], []

for folder in folders:
    true_idx = name_to_idx[folder]
    img_dir  = os.path.join(val_root, folder, "images")
    for fn in os.listdir(img_dir):
        if not fn.lower().endswith((".jpg", ".jpeg", ".png")):
            continue

        path = os.path.join(img_dir, fn)
        y_true.append(true_idx)

        # Run classification
        res = model(path, imgsz=640)[0]

        # Use top1 and top1conf directly
        top_idx  = int(res.probs.top1)
        top_conf = float(res.probs.top1conf)

        # Assign no_damage if confidence is too low
        if top_conf < CONF_THRESH:
            pred_idx = no_damage_idx
        else:
            pred_idx = top_idx

        y_pred.append(pred_idx)
# 6. Print classification report (include labels for all classes)
sorted_idx = sorted(name_to_idx.values())  # e.g. [0,1,2,3,4,5,6]

print("\n📊 Classification Report:")
print(classification_report(
    y_true,
    y_pred,
    labels=sorted_idx,
    target_names=[names[i] for i in sorted_idx],
    zero_division=0
))

# 7. Build & save the confusion matrix
cm = confusion_matrix(
    y_true,
    y_pred,
    labels=sorted_idx
)
disp = ConfusionMatrixDisplay(
    cm,
    display_labels=[names[i] for i in sorted_idx]
)

fig, ax = plt.subplots(figsize=(8, 7))
disp.plot(ax=ax, cmap="Blues", xticks_rotation="vertical")
plt.title("Validation Confusion Matrix (incl. no_damage)")
plt.tight_layout()

out_path = "confusion_matrix_with_no_damage.png"
plt.savefig(out_path, dpi=150)
print(f"Saved confusion matrix to {out_path}")
plt.close(fig)
