In [1]:
from ultralytics import YOLO
import cv2
import os
import glob
import os
import glob
import random
import shutil
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from ultralytics import YOLO
import torch
from pathlib import Path

In [2]:
# Paths
BASE_DIR = "Normal_cups"  # Change to your dataset path
IMAGE_DIR = os.path.join(BASE_DIR, "images")
LABEL_DIR = os.path.join(BASE_DIR, "data")
OUTPUT_DIR = os.path.join(BASE_DIR, "yolo_dataset")
MODEL_PATH = "yolov5"  # Change if using another YOLOv5 model

# Create output directories
os.makedirs(os.path.join(OUTPUT_DIR, "train", "images"), exist_ok=True)
os.makedirs(os.path.join(OUTPUT_DIR, "train", "labels"), exist_ok=True)
os.makedirs(os.path.join(OUTPUT_DIR, "val", "images"), exist_ok=True)
os.makedirs(os.path.join(OUTPUT_DIR, "val", "labels"), exist_ok=True)

# Load images & labels
image_paths = glob.glob(os.path.join(IMAGE_DIR, "*"))
image_list = []
labels = []

for img_path in image_paths:
    base_name = os.path.splitext(os.path.basename(img_path))[0]
    label_path = os.path.join(LABEL_DIR, base_name + ".txt")

    if os.path.exists(label_path) and os.path.getsize(label_path) > 0:
        with open(label_path, "r") as file:
            label_data = file.readlines()
            class_ids = [int(line.split()[0]) for line in label_data]
            label_class = 1 if 1 in class_ids else 0  # If a Tim Hortons cup is present, label as 1
    else:
        label_class = 0  # No annotation means it's a normal cup (background class)

    image_list.append((img_path, label_path))
    labels.append(label_class)

# Convert to DataFrame for processing
df = pd.DataFrame(image_list, columns=["image", "label"])
df["class"] = labels

# Print dataset summary
print(f"Total Images: {len(df)}")
print(f"Class Distribution: {df['class'].value_counts()}")


Total Images: 1669
Class Distribution: class
1    1026
0     643
Name: count, dtype: int64


In [3]:
import shutil
import os
import pandas as pd
from sklearn.model_selection import StratifiedKFold

# Create necessary directories and copy data (train/validation)
OUTPUT_DIR = "C:/Users/Chinmay Nagesh/Desktop/Yolo/SYDE770/Normal_cups/yolo_dataset"

# Stratified K-Fold Cross Validation (Keep Class Balance)
k_folds = 5
skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

# Select one fold as validation (80-20 Split)
for train_idx, val_idx in skf.split(df["image"], df["class"]):
    train_data = df.iloc[train_idx]
    val_data = df.iloc[val_idx]
    break  # We only need the first split

# Function to copy files
def copy_files(data, split):
    for _, row in data.iterrows():
        shutil.copy(row["image"], os.path.join(OUTPUT_DIR, split, "images", os.path.basename(row["image"])))

        if os.path.exists(row["label"]) and os.path.getsize(row["label"]) > 0:
            shutil.copy(row["label"], os.path.join(OUTPUT_DIR, split, "labels", os.path.basename(row["label"])))
        else:
            # Create an empty label file if no annotation exists
            open(os.path.join(OUTPUT_DIR, split, "labels", os.path.basename(row["label"])), 'w').close()

copy_files(train_data, "train")
copy_files(val_data, "val")

# Create YOLO dataset.yaml file
yaml_content = f"""
path: C:/Users/Chinmay Nagesh/Desktop/Yolo/SYDE770/Normal_cups/yolo_dataset
train: train/images
val: val/images
nc: 2
names: ["Normal Cup", "Tim Hortons Cup"]
"""

yaml_path = "C:/Users/Chinmay Nagesh/Desktop/Yolo/SYDE770/Normal_cups/yolo_dataset/dataset.yaml"
with open(yaml_path, "w") as f:
    f.write(yaml_content)

# Train YOLOv5 (No MLflow integration in this cell)
command = r'python yolov5/train.py --imgsz 320 --batch-size 32 --epochs 5 --data "C:/Users/Chinmay Nagesh/Desktop/Yolo/SYDE770/Normal_cups/yolo_dataset/dataset.yaml" --weights yolov5s.pt --device cpu --freeze 10'
os.system(command)

print("Training completed! Model saved in YOLO output folder.")


Training completed! Model saved in YOLO output folder.


Keeping results ready for MLflow

In [4]:
import pandas as pd
import os

# Path to results.csv in latest exp folder
latest_exp= 'yolov5/runs/train/exp2'
results_path = os.path.join(latest_exp, "results.csv")

# Check if results.csv exists
if os.path.exists(results_path):
    df = pd.read_csv(results_path)
    # print("Available columns in results.csv:", df.columns.tolist())  # Print column names
    df.columns = df.columns.str.strip()
    # print(df.iloc[-1])  # See exact column names

train_box_loss = df.iloc[-1]["train/box_loss"]
train_obj_loss=df.iloc[-1]["train/obj_loss"]
train_cls_loss=df.iloc[-1]["train/cls_loss"]
val_box_loss=df.iloc[-1]["val/box_loss"]
mAP_50=df.iloc[-1]["metrics/mAP_0.5"]
mAP_50_95=df.iloc[-1]["metrics/mAP_0.5:0.95"]
precision=df.iloc[-1]["metrics/precision"]
recall=df.iloc[-1]["metrics/recall"]

ML Flow section

In [6]:
import mlflow
import dagshub
import pandas as pd
import shutil
import os

# Initialize MLflow tracking
dagshub.init(repo_owner='chinmay-nagesh',
             repo_name='SYDE770-dagshub',
             mlflow=True)

mlflow.set_tracking_uri("https://dagshub.com/chinmay-nagesh/SYDE770-dagshub.mlflow")
mlflow.set_experiment("YOLOv5 Training")

# Start MLflow run to track hyperparameters and metrics
with mlflow.start_run():
    # Log Hyperparameters
    mlflow.log_param("img_size", 320)
    mlflow.log_param("batch_size", 32)
    mlflow.log_param("epochs", 5)
    mlflow.log_param("weights", "yolov5s.pt")
    mlflow.log_param("device", "cpu")
    mlflow.log_param("freeze", 10)
    

    # Log Metrics
    mlflow.log_metric("train_box_loss", train_box_loss)
    mlflow.log_metric("train_obj_loss", train_obj_loss)
    mlflow.log_metric("train_cls_loss", train_cls_loss)
    mlflow.log_metric("val_box_loss", val_box_loss)
    mlflow.log_metric("mAP_50", mAP_50)
    mlflow.log_metric("mAP_50_95", mAP_50_95)
    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)

    # Log Model
    best_model_path = "yolov5/runs/train/exp2/weights/best.pt"
    model_save_path = "yolov5_model"

    if os.path.exists(best_model_path):
        os.makedirs(model_save_path, exist_ok=True)
        shutil.copy(best_model_path, os.path.join(model_save_path, "best.pt"))

        # Log Model as an artifact in MLflow
        mlflow.log_artifact(os.path.join(model_save_path, "best.pt"), artifact_path="models/yolov5")

        # Check if model "yolov5" exists in MLflow model registry
        client = mlflow.tracking.MlflowClient()
        model_name = "yolov5"

        try:
            # Try registering a new version of the existing model
            mlflow.register_model(f"runs:/{mlflow.active_run().info.run_id}/models/yolov5/best.pt", model_name)
            print(f"New version of {model_name} registered in MLflow.")
        except Exception:
            # If model does not exist, create a new registered model
            mlflow.create_registered_model(model_name)
            mlflow.register_model(f"runs:/{mlflow.active_run().info.run_id}/models/yolov5/best.pt", model_name)
            print(f"Model {model_name} created and registered in MLflow.")

    print("Metrics and model logged to MLflow.")


Registered model 'yolov5' already exists. Creating a new version of this model...
2025/03/11 23:41:52 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: yolov5, version 2
Created version '2' of model 'yolov5'.


New version of yolov5 registered in MLflow.
Metrics and model logged to MLflow.
🏃 View run fun-pug-728 at: https://dagshub.com/chinmay-nagesh/SYDE770-dagshub.mlflow/#/experiments/1/runs/ea0f2dac654a417ca27550e261535965
🧪 View experiment at: https://dagshub.com/chinmay-nagesh/SYDE770-dagshub.mlflow/#/experiments/1
