# Production Training Pipeline

**Input**: `4_training_data/` (Unzipped Roboflow Export)
**Output**: `models/best.pt`

**Logic**:
1.  Finds the latest dataset folder in `4_training_data`.
2.  Fixes `data.yaml` paths for Colab.
3.  Loads previous `models/best.pt`.
4.  Retrains (Fine-tunes) on new data.

In [None]:
# Step 1: Install Dependencies
!pip install ultralytics roboflow

In [None]:
# Step 2: Setup Paths & Auto-Detect Dataset
import os
import glob
from google.colab import drive

drive.mount('/content/drive')

REPO_ROOT = "/content/drive/MyDrive/Vision-Project-Prod-Pipeline"
os.chdir(REPO_ROOT)
print(f"Working Directory: {os.getcwd()}")

# Look for datasets in 4_training_data
TRAIN_ROOT = os.path.join(REPO_ROOT, "4_training_data")
possible_datasets = glob.glob(os.path.join(TRAIN_ROOT, "*"))

# Filter for folders that look like datasets (contain data.yaml)
valid_datasets = [d for d in possible_datasets if os.path.isdir(d) and os.path.exists(os.path.join(d, "data.yaml"))]

if not valid_datasets:
    print("ERROR: No valid dataset found in '4_training_data'. Valid dataset must contain 'data.yaml'.")
else:
    # Pick latest modified
    DATASET_DIR = max(valid_datasets, key=os.path.getmtime)
    print(f"FOUND DATASET: {DATASET_DIR}")

In [None]:
# Step 3: Fix YAML Paths
import yaml

yaml_path = os.path.join(DATASET_DIR, "data.yaml")

with open(yaml_path, 'r') as f:
    data = yaml.safe_load(f)

# Update paths to be absolute
data['train'] = os.path.join(DATASET_DIR, "train", "images")
data['val'] = os.path.join(DATASET_DIR, "valid", "images")
data['test'] = os.path.join(DATASET_DIR, "test", "images")

with open(yaml_path, 'w') as f:
    yaml.dump(data, f)

print(f"Fixed data.yaml paths!")
print("Classes:", data['names'])

In [None]:
# Step 4: Train
from ultralytics import YOLO

MODEL_PATH = os.path.join(REPO_ROOT, "models/best.pt")
if not os.path.exists(MODEL_PATH):
    print("Warning: No existing best.pt found. Starting fresh from yolov8n.pt")
    model = YOLO("yolov8n.pt")
else:
    print(f"Loading custom model: {MODEL_PATH}")
    model = YOLO(MODEL_PATH)

results = model.train(
    data=yaml_path,
    epochs=50,
    imgsz=640,
    project=os.path.join(DATASET_DIR, "runs"),
    name="production_train_run"
)

# Optional: Copy new best model to models folder?
# shutil.copy(new_best_path, MODEL_PATH)