In [1]:
import os
from dotenv import load_dotenv
from roboflow import Roboflow
import yaml
import mlflow
from ultralytics import YOLO

In [None]:
# ----------- JUST RUN IT ONCE! ----------- #
load_dotenv('../.env')
ROBOFLOW_API_KEY = os.getenv('ROBOFLOW_API_KEY')
rf = Roboflow(api_key=ROBOFLOW_API_KEY)
project = rf.workspace("snapcal-4vbih").project("snapcal-3lsxh")
version = project.version(1)
dataset = version.download("yolov8")
# ----------- JUST RUN IT ONCE! ----------- #
                

In [8]:
# --- Configuration ---
DATASET_DIR = os.path.abspath('../data/processed/snapcal-1')
DATASET_YAML_PATH = os.path.join(DATASET_DIR, 'data.yaml')

In [9]:
# Verify directories exist
train_dir = os.path.join(DATASET_DIR, 'train', 'images')
val_dir = os.path.join(DATASET_DIR, 'valid', 'images')
print(f"Train dir exists: {os.path.exists(train_dir)}")
print(f"Valid dir exists: {os.path.exists(val_dir)}")

Train dir exists: True
Valid dir exists: True


In [10]:
# If directories exist, proceed with training
if os.path.exists(DATASET_YAML_PATH) and os.path.exists(val_dir):
    print("✅ All paths verified! Starting training...")
    
    # --- 1. MLflow Configuration ---
    mlflow.set_experiment("SnapCal Component Detection")
    
    # --- 2. Model Training ---
    with mlflow.start_run() as run:
        run_id = run.info.run_id
        print(f"Starting model training with MLflow Run ID: {run_id}")

        # --- Log Parameters ---
        params = {
            "epochs": 50,
            "batch_size": 16,
            "image_size": 640,
            "model_type": "yolov8n.pt"
        }
        mlflow.log_params(params)
        print(f"Logged Parameters: {params}")

        # --- Load Pre-trained Model ---
        model = YOLO(params["model_type"])

        # --- Train the Model ---
        results = model.train(
            data=DATASET_YAML_PATH,  # Using absolute path
            epochs=params["epochs"],
            batch=params["batch_size"],
            imgsz=params["image_size"],
            project="training_runs",
            name=run_id
        )

        # --- Log Metrics ---
        final_metrics = {
            "mAP50-95": results.box.map,
            "mAP50": results.box.map50,
            "precision": results.box.mp,
            "recall": results.box.mr
        }
        mlflow.log_metrics(final_metrics)
        print(f"Logged Final Metrics: {final_metrics}")

        # --- Log the Model as an Artifact ---
        mlflow.pytorch.log_model(model, "model")
        print("Trained model has been logged as an artifact.")

        print("\n--- Training complete and all results logged to MLflow! ---")
else:
    print("❌ Dataset paths not found. Please check your directory structure.")

✅ All paths verified! Starting training...
Starting model training with MLflow Run ID: 3da8ec1c309c4fd0add67ccda360af36
Logged Parameters: {'epochs': 50, 'batch_size': 16, 'image_size': 640, 'model_type': 'yolov8n.pt'}
New https://pypi.org/project/ultralytics/8.3.189 available 😃 Update with 'pip install -U ultralytics'
Ultralytics YOLOv8.2.2 🚀 Python-3.10.12 torch-2.3.0+cu121 CPU (11th Gen Intel Core(TM) i5-11300H 3.10GHz)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=/workspaces/snapcal/data/processed/snapcal-1/data.yaml, epochs=50, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=training_runs, name=3da8ec1c309c4fd0add67ccda360af36, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, m

100%|██████████| 755k/755k [00:00<00:00, 4.06MB/s]


Overriding model.yaml nc=80 with nc=231

                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      7360  ultralytics.nn.modules.block.C2f             [32, 32, 1, True]             
  3                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  4                  -1  2     49664  ultralytics.nn.modules.block.C2f             [64, 64, 2, True]             
  5                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  6                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  7                  -1  1    295424  ultralyti

[34m[1mtrain: [0mScanning /workspaces/snapcal/data/processed/snapcal-1/train/labels... 1152 images, 13 backgrounds, 0 corrupt: 100%|██████████| 1152/1152 [00:11<00:00, 102.84it/s]


KeyboardInterrupt: 