In [10]:
import os
from dotenv import load_dotenv
from roboflow import Roboflow
import yaml
import mlflow
from ultralytics import YOLO

ModuleNotFoundError: No module named 'ultralytics'

In [None]:
# ----------- JUST RUN IT ONCE! ----------- #
load_dotenv('../.env')
ROBOFLOW_API_KEY = os.getenv('ROBOFLOW_API_KEY')
rf = Roboflow(api_key=ROBOFLOW_API_KEY)
project = rf.workspace("snapcal-4vbih").project("snapcal-3lsxh")
version = project.version(1)
dataset = version.download("yolov8")
# ----------- JUST RUN IT ONCE! ----------- #
                

loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in snapcal-1 to yolov8:: 100%|██████████| 220948/220948 [01:58<00:00, 1864.67it/s]





Extracting Dataset Version Zip to snapcal-1 in yolov8:: 100%|██████████| 3510/3510 [00:28<00:00, 121.63it/s]


In [6]:
# --- Configuration ---
DATASET_PATH_YAML = '../data/processed/dataset.yaml'

In [7]:
# --- 1. MLflow Configuration ---
mlflow.set_experiment("SnapCal Component Detection")

NameError: name 'mlflow' is not defined

In [None]:
# --- 2. Model Training ---
# Start an MLflow run. Everything inside this 'with' block will be automatically logged.
with mlflow.start_run() as run:
    run_id = run.info.run_id
    print(f"Starting model training with MLflow Run ID: {run_id}")

    # --- Log Parameters ---
    # Log key parameters that you might want to change in future experiments.
    params = {
        "epochs": 50,
        "batch_size": 16,
        "image_size": 640,
        "model_type": "yolov8n.pt" # 'n' for nano, the smallest model
    }
    mlflow.log_params(params)
    print(f"Logged Parameters: {params}")

    # --- Load Pre-trained Model ---
    # Load a pre-trained YOLOv8 model from Ultralytics.
    # Using a pre-trained model is crucial for getting good results with a small dataset.
    model = YOLO(params["model_type"])

    # --- Train the Model ---
    # The 'train' method will handle the entire training loop, including validation.
    results = model.train(
        data=DATASET_YAML_PATH,
        epochs=params["epochs"],
        batch=params["batch_size"],
        imgsz=params["image_size"],
        project="training_runs", # Local folder to save Ultralytics' output
        name=run_id # Name the output folder with the MLflow run ID for easy mapping
    )

    # --- Log Metrics ---
    # After training, the 'results' object contains the final performance metrics.
    # We log the most important ones to MLflow for comparison.
    final_metrics = {
        "mAP50-95": results.box.map,    # Main metric: Mean Average Precision
        "mAP50": results.box.map50,      # mAP at 0.5 IoU threshold
        "precision": results.box.mp,     # Mean Precision
        "recall": results.box.mr       # Mean Recall
    }
    mlflow.log_metrics(final_metrics)
    print(f"Logged Final Metrics: {final_metrics}")

    # --- Log the Model as an Artifact ---
    # This saves the trained model file (.pt) directly into the MLflow run.
    # This is the key to versioning and deploying your models later.
    mlflow.pytorch.log_model(model, "model")
    print("Trained model has been logged as an artifact.")

    print("\n--- Training complete and all results logged to MLflow! ---")