In [None]:
# ================================================================
# YOLOv5 → Addax (Windows-safe .pt)  —  One-Cell Converter
# ---------------------------------------------------------------
# What this does:
#  1) Mounts Google Drive
#  2) Locates your YOLOv5 checkpoint (best.pt) on Drive
#     - If not found, prompts for upload
#  3) Loads the model safely (PyTorch 2.6+ compatible)
#  4) Embeds class names, with BOTH int and string keys in model.names
#     - Addax indexes names with "0" (str), so we ensure both 0 and "0" work
#  5) Removes any pathlib.PosixPath objects (Windows unpickling fix)
#  6) Strips training-only keys and saves a clean file:
#         /MyDrive/AddaxExports/<stem>_addaxwin.pt
#
# Edit the CONFIG section below if your Drive layout is different.
# This cell is self-contained and safe to share with clients.
# ================================================================

# ------------------------
# CONFIG — EDIT HERE
# ------------------------
# (A) Where to find class names (dataset.yaml). If missing, we'll fallback to names in the model,
#     and if those are missing, we'll use MANUAL_NAMES below.
DATA_YAML  = "/content/drive/MyDrive/combined_copy/dataset.yaml"  # <-- YOUR example

# (B) Where to look for the YOLOv5 checkpoint on Drive (your example paths, can edit)
SEARCH_GLOB = "/content/drive/MyDrive/yolov5_runs/train/*/weights/best.pt"   # common YOLOv5 run path
FALLBACK_PT = "/content/drive/MyDrive/combined_copy/best.pt"                 # optional fixed path

# (C) If neither path finds a file, we will prompt for upload.
#     You can also hardcode a full path here if preferred.

# (D) If dataset.yaml is missing or unreadable, we use this list:
MANUAL_NAMES = ["Cyclist", "Equestrian", "Pedestrian", "Vehicle"]  # <-- YOUR 4 classes

# (E) Where to save the Addax-ready file on Drive:
SAVE_DIR = "/content/drive/MyDrive/AddaxExports"                    # <-- YOUR example
# ------------------------

# ========== NO EDITS NEEDED BELOW THIS LINE ==========
from google.colab import drive
drive.mount('/content/drive')

import os, sys, glob, shutil, time, yaml, torch, hashlib, pathlib
from pathlib import Path

os.makedirs(SAVE_DIR, exist_ok=True)

def log(msg): print(f"[AddaxPT] {msg}")

# 1) Locate source checkpoint on Drive (or upload)
hits = sorted(glob.glob(SEARCH_GLOB))
SRC = hits[-1] if hits else (FALLBACK_PT if os.path.exists(FALLBACK_PT) else None)
if not SRC:
    from google.colab import files
    log("No best.pt found on Drive. Please upload your YOLOv5 .pt now (e.g., best.pt).")
    up = files.upload()
    SRC = next((f"/content/{k}" for k in up if k.lower().endswith(".pt")), None)
    assert SRC, "No .pt uploaded; cannot proceed."

log(f"Source: {SRC}  |  size: {round(os.path.getsize(SRC)/1e6, 2)} MB")

# Copy to local for faster/safer I/O
local_pt = "/content/best_local.pt"
shutil.copyfile(SRC, local_pt)
log(f"Copied to: {local_pt}")

# 2) Make sure YOLOv5 code is available (for attempt_load & class allowlisting)
if not Path("/content/yolov5").exists():
    !git clone -q https://github.com/ultralytics/yolov5 /content/yolov5
sys.path.append("/content/yolov5")

# Allowlist YOLOv5 DetectionModel for torch.load (PyTorch 2.6 safe loading)
from models.yolo import DetectionModel
try:
    from torch.serialization import add_safe_globals
    add_safe_globals([DetectionModel])
except Exception:
    pass

# 3) Resolve class names
names = None
if os.path.exists(DATA_YAML):
    try:
        with open(DATA_YAML, "r") as f:
            data = yaml.safe_load(f) or {}
        if isinstance(data.get("names"), list) and data["names"]:
            names = [str(x) for x in data["names"]]
            log(f"Loaded class names from dataset.yaml: {names}")
    except Exception as e:
        log(f"Warning: Failed to read dataset.yaml ({e}). Will fallback.")
if not names:
    # we will try to read from model later; if still missing, use MANUAL_NAMES
    names = None

# 4) Load the model
model, ckpt = None, None
try:
    from models.experimental import attempt_load
    model = attempt_load(local_pt, map_location="cpu", inplace=True, fuse=True)  # robust path
    log("Loaded model via attempt_load ✅")
except Exception as e:
    log(f"attempt_load failed ({e}); trying safe torch.load")
    ckpt = torch.load(local_pt, map_location="cpu", weights_only=False)  # trusted file
    model = ckpt.get("ema") or ckpt.get("model")
    assert model is not None, "Checkpoint missing 'model' or 'ema'."

# If names still unknown, try inside the model then fallback to MANUAL_NAMES
if names is None:
    if hasattr(model, "names") and model.names:
        names = list(model.names.values()) if isinstance(model.names, dict) else list(model.names)
        names = [str(n) for n in names]
        log(f"Using class names from checkpoint: {names}")
    else:
        names = [str(n) for n in MANUAL_NAMES]
        log(f"Using MANUAL_NAMES fallback: {names}")

# Build names mapping with BOTH int and str keys (Addax expects string indexing too)
name_map = {i: n for i, n in enumerate(names)}
name_map.update({str(i): n for i, n in enumerate(names)})
setattr(model, "names", name_map)
setattr(model, "nc", len(names))

# Optional: fuse layers (may already be fused) and set to eval/float
try:
    if hasattr(model, "fuse"):
        model.fuse()
except Exception:
    pass
try:
    model.float().eval()
except Exception:
    pass

# 5) Sanitize: recursively convert ANY pathlib.Path/PurePath to strings
def to_serializable(obj, depth=0, max_depth=10):
    if depth > max_depth:
        return obj
    try:
        if isinstance(obj, (pathlib.Path, pathlib.PurePath)):
            return str(obj)
        if isinstance(obj, dict):
            return {to_serializable(k, depth+1): to_serializable(v, depth+1) for k, v in obj.items()}
        if isinstance(obj, (list, tuple, set)):
            T = type(obj)
            return T(to_serializable(v, depth+1) for v in obj)
        if hasattr(obj, "__dict__") and not isinstance(obj, torch.nn.Module):
            for k in list(vars(obj).keys()):
                try:
                    setattr(obj, k, to_serializable(getattr(obj, k), depth+1))
                except Exception:
                    pass
            return obj
    except Exception:
        return obj
    return obj

# Clean checkpoint dict
clean = ckpt if isinstance(ckpt, dict) else {}
clean["model"] = model

# Common attributes that might contain Path objects
for attr in ("yaml", "cfg", "args"):
    if hasattr(model, attr):
        try:
            setattr(model, attr, to_serializable(getattr(model, attr)))
        except Exception:
            pass

# Sanitize entire checkpoint
for k in list(clean.keys()):
    clean[k] = to_serializable(clean[k])

# Strip training-only baggage
for k in ("optimizer", "updates", "wandb_id", "training_results", "ema"):
    if k in clean:
        clean[k] = None
clean["epoch"] = clean.get("epoch", -1)
clean["best_fitness"] = clean.get("best_fitness", None)
clean["date"] = time.strftime("%Y-%m-%d %H:%M:%S")

# Save as Windows-safe Addax file
stem = Path(SRC).stem
OUT = os.path.join(SAVE_DIR, f"{stem}_addaxwin.pt")
torch.save(clean, OUT)

# Quick verification
def md5(p):
    h = hashlib.md5()
    with open(p, "rb") as f:
        for chunk in iter(lambda: f.read(1<<20), b""):
            h.update(chunk)
    return h.hexdigest()

chk = torch.load(OUT, map_location="cpu", weights_only=False)
m2 = chk["model"]
ok_names = (m2.names[0] == names[0]) and (m2.names["0"] == names[0]) and (getattr(m2, "nc", None) == len(names))

print("\n================= DONE =================")
print("Saved Addax file :", OUT)
print("Size (MB)        :", round(os.path.getsize(OUT)/1e6, 2), "| MD5:", md5(OUT))
print("Class check      :", "OK" if ok_names else "CHECK FAILED")
print("Classes          :", names)
print("Upload this file to Addax. (It includes both int & str keys in model.names and no PosixPath.)")
