In [None]:
# @title Cell 1: Final Combined Installation and Verification
# This is the ONLY cell you need for installations.

# 1. Uninstall to ensure a clean slate
print("Uninstalling all relevant libraries...")
!pip uninstall -y torch torchvision torchaudio ultralytics fiftyone tflite-runtime roboflow datasets

# 2. Install GPU-enabled PyTorch
print("\nInstalling GPU-enabled PyTorch...")
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124

# 3. Install all other dependencies in one go
print("\nInstalling Ultralytics and other libraries...")
!pip install ultralytics roboflow fiftyone datasets tflite-runtime --quiet

# 4. Verify the installation
print("\nVerifying the environment...")
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"Is CUDA available? {torch.cuda.is_available()}")

print("\nRunning Ultralytics checks:")
import ultralytics
ultralytics.checks()

In [None]:
# @title Cell 2: Class Setup and Directory Initialization
# ----------------------------
# CLASS SETUP & DIRECTORY INIT
# ----------------------------
import os
MASTER_CLASS_LIST = ["grass", "dirt", "sand", "mulch", "pavement", "concrete", "gravel", "tree", "shrub", "flower", "planter", "stump", "rock", "hill", "water_feature", "ditch", "pool", "lake", "river", "fountain", "waterfall", "field", "curb", "edging", "fence", "gate", "retaining_wall", "railing", "bench", "bridge", "stairs", "path", "sign", "pole", "lamp_post", "streetlight", "traffic_light", "person", "animal", "dog", "cat", "bicycle", "toy", "tool", "hose", "sprinkler", "swing_set", "slide", "sandbox", "trampoline", "furniture", "decoration", "vehicle", "car", "bus", "truck", "mailbox", "trash_bin", "recycling_bin"]
master_index = {name: idx for idx, name in enumerate(MASTER_CLASS_LIST)}
BASE_DIR = "/content/mower_dataset"
for split in ["train", "val"]:
    os.makedirs(f"{BASE_DIR}/images/{split}", exist_ok=True)
    os.makedirs(f"{BASE_DIR}/labels/{split}", exist_ok=True)


In [None]:
# @title Cell 3: Kaggle setup for COCO
from google.colab import drive
import os
import shutil

# Mount Google Drive
drive.mount('/content/drive')

# Define the path to kaggle.json in your Google Drive
kaggle_drive_path = "/content/drive/MyDrive/kaggle.json"
kaggle_colab_path = "/root/.kaggle/kaggle.json"

# Create the .kaggle directory if it doesn't exist
if not os.path.exists("/root/.kaggle"):
    os.makedirs("/root/.kaggle")

# Check if the file exists in Google Drive before copying
if os.path.exists(kaggle_drive_path):
    shutil.copy(kaggle_drive_path, kaggle_colab_path)
    os.chmod(kaggle_colab_path, 0o600)
    print("kaggle.json copied from Google Drive.")
else:
    print(f"Error: {kaggle_drive_path} not found in your Google Drive.")
    print("Please upload kaggle.json to the root of your MyDrive or manually upload it.")
    # Fallback to manual upload if the file is not in Drive
    from google.colab import files
    uploaded = files.upload()  # upload kaggle.json
    for fn in uploaded.keys():
        shutil.move(fn, kaggle_colab_path)
    os.chmod(kaggle_colab_path, 0o600)
    print("kaggle.json uploaded manually.")

In [None]:
# @title Cell 4: Download COCO 2017 (YOLOv8 format from Kaggle)
!kaggle datasets download -d paragmraw/coco-2017-dataset-yolov8-format -p /content
!unzip -q /content/coco-2017-dataset-yolov8-format.zip -d /content/coco2017
print("COCO 2017 dataset downloaded and extracted!")

In [None]:
# @title Cell 5: COCO mapping and filtering
COCO_TO_MASTER = {
    0: 36,    # person
    1: 40,    # bicycle
    2: 51,    # car
    3: 50,    # motorcycle -> vehicle
    5: 52,    # bus
    7: 53,    # truck
    15: 39,   # cat
    16: 38,   # dog
    57: 48,   # chair -> furniture
    60: 48,   # dining table -> furniture
    58: 9,    # potted plant -> planter
    77: 42,   # teddy bear -> toy
}
COCO_IMG_LIMIT = None  # Adjust for RAM/time; set to None for full dataset
import os
import glob
import shutil
import random
from tqdm import tqdm

# Ensure BASE_DIR is defined (from Cell 2)
if 'BASE_DIR' not in globals():
    print("Error: BASE_DIR is not defined. Please run Cell 2 first.")

for split in ["train", "val"]:
    img_dir = f"/content/coco2017/{split}/images"
    lbl_dir = f"/content/coco2017/{split}/labels"
    imgs = glob.glob(f"{img_dir}/*.jpg")
    if COCO_IMG_LIMIT:
        imgs = random.sample(imgs, min(COCO_IMG_LIMIT, len(imgs)))
    for img_path in tqdm(imgs, desc=f"COCO {split}"):
        base = os.path.splitext(os.path.basename(img_path))[0]
        lbl_path = os.path.join(lbl_dir, base + ".txt")
        if not os.path.exists(lbl_path): continue
        new_lines = []
        with open(lbl_path, "r") as f:
            for line in f:
                arr = line.strip().split()
                if not arr: continue
                cls = int(arr[0])
                if cls in COCO_TO_MASTER:
                    new_cls = COCO_TO_MASTER[cls]
                    # Ensure we only use the 4 bounding box values, ignoring segmentation data
                    new_lines.append(" ".join([str(new_cls)] + arr[1:5]))
        if new_lines:
            out_img = f"{BASE_DIR}/images/{split}/{base}.jpg"
            out_lbl = f"{BASE_DIR}/labels/{split}/{base}.txt"
            shutil.copy(img_path, out_img)
            with open(out_lbl, "w") as f:
                f.write("\n".join(new_lines))

print("COCO dataset processed!")

In [None]:
# @title Cell 6: OpenImages dataset download

import fiftyone as fo
import fiftyone.zoo as foz
import fiftyone.utils.openimages as fouo

# 1. Full valid OI class set
oi_valid_classes = set(fouo.get_classes())

# 2. Define desired yard/obstacle classes
OI_YARD_CLASSES = [
    "Flower", "Fountain", "Stairs", "Person", "Dog", "Cat", "Bicycle",
    "Car", "Bus", "Truck", "Motorcycle", "Bench", "Tree", "Lamp", "Wheelchair", "Table", "Chair",
]

# 3. Only request OI classes that exist
oi_classes_to_load = [c for c in OI_YARD_CLASSES if c in oi_valid_classes]
print("Attempting to load from OpenImages:", oi_classes_to_load)

# 4. Download data
oi_dataset = foz.load_zoo_dataset(
    "open-images-v6",
    split="train",
    label_types=["detections"],
    classes=oi_classes_to_load,
    max_samples=2500,  # Increased sample size for more variety
    shuffle=True,
)

# 5. Export to YOLO format for processing
oi_dataset.export(
    export_dir="/content/fo_openimages_yolo",
    dataset_type=fo.types.YOLOv5Dataset
)
print("OpenImages export complete.")

In [None]:
# @title Cell 7: Process and Integrate OpenImages Data
import yaml

print("Processing and integrating OpenImages dataset...")

# Directory where fiftyone exported the data
oi_export_dir = "/content/fo_openimages_yolo"
oi_img_dir = f"{oi_export_dir}/data/images"
oi_lbl_dir = f"{oi_export_dir}/data/labels"

# Check if the export directory exists
if not os.path.isdir(oi_export_dir):
    print("OpenImages export directory not found. Skipping integration.")
else:
    # Read the class mapping from the exported dataset.yaml
    with open(f"{oi_export_dir}/dataset.yaml", 'r') as f:
        oi_yaml = yaml.safe_load(f)
    oi_names = oi_yaml['names']

    # Create a mapping from OI class name to our master class index
    OI_NAME_TO_MASTER = {
        "Tree": master_index["tree"],
        "Flower": master_index["flower"],
        "Person": master_index["person"],
        "Car": master_index["car"],
        "Bus": master_index["bus"],
        "Truck": master_index["truck"],
        "Bicycle": master_index["bicycle"],
        "Cat": master_index["cat"],
        "Dog": master_index["dog"],
        "Stairs": master_index["stairs"],
        "Fountain": master_index["fountain"],
        "Bench": master_index["bench"],
        "Lamp": master_index["lamp_post"],
        "Chair": master_index["furniture"],
        "Table": master_index["furniture"],
        "Motorcycle": master_index["vehicle"],
        "Wheelchair": master_index["vehicle"]
    }

    # Map from the numeric index in the OI export to our master index
    oi_idx_to_master_idx = {
        oi_idx: OI_NAME_TO_MASTER.get(name)
        for oi_idx, name in enumerate(oi_names)
        if OI_NAME_TO_MASTER.get(name) is not None
    }

    # Process and copy the files
    imgs = glob.glob(f"{oi_img_dir}/*.jpg")
    for img_path in tqdm(imgs, desc="OpenImages Process"):
        base = os.path.splitext(os.path.basename(img_path))[0]
        lbl_path = os.path.join(oi_lbl_dir, base + ".txt")

        if not os.path.exists(lbl_path):
            continue

        new_lines = []
        with open(lbl_path, 'r') as f:
            for line in f:
                arr = line.strip().split()
                if not arr: continue
                oi_cls_idx = int(arr[0])
                if oi_cls_idx in oi_idx_to_master_idx:
                    master_cls_idx = oi_idx_to_master_idx[oi_cls_idx]
                    # Ensure we only take the 4 bounding box coordinates
                    new_lines.append(f"{master_cls_idx} " + " ".join(arr[1:5]))

        if new_lines:
            # Copy to train split
            out_img = f"{BASE_DIR}/images/train/oi_{base}.jpg"
            out_lbl = f"{BASE_DIR}/labels/train/oi_{base}.txt"
            shutil.copy(img_path, out_img)
            with open(out_lbl, 'w') as f:
                f.write("\n".join(new_lines))

    print("OpenImages dataset processed and added to training set!")

In [None]:
# @title Cell 8: Roboflow Fence dataset (requires API key)
from roboflow import Roboflow
from google.colab import userdata
ROBOFLOW_API_KEY = userdata.get('ROBOFLOW_API_KEY')
rf = Roboflow(api_key=ROBOFLOW_API_KEY)
fence_ds = rf.workspace("uji-thesis").project("broken-fence-detection").version(1).download("yolov8", location=f"{BASE_DIR}/fence")
for split in ["train", "valid"]:
    img_dir = f"{BASE_DIR}/fence/{split}/images"
    lbl_dir = f"{BASE_DIR}/fence/{split}/labels"
    imgs = glob.glob(f"{img_dir}/*.jpg")
    for img_path in tqdm(imgs, desc=f"Fence {split}"):
        base = os.path.splitext(os.path.basename(img_path))[0]
        lbl_path = os.path.join(lbl_dir, base + ".txt")
        if not os.path.exists(lbl_path): continue
        new_lines = []
        with open(lbl_path, "r") as f:
            for line in f:
                arr = line.strip().split()
                if not arr: continue
                # Ensure we only use the 4 bounding box values, ignoring segmentation data
                new_lines.append(f"{master_index['fence']} " + " ".join(arr[1:5]))
        if new_lines:
          # Add all Roboflow data to the training set
          out_img = f"{BASE_DIR}/images/train/{base}_rf.jpg"
          out_lbl = f"{BASE_DIR}/labels/train/{base}_rf.txt"
          shutil.copy(img_path, out_img)
          with open(out_lbl, "w") as f:
              f.write("\n".join(new_lines))

print("Fence dataset processed!")

In [None]:
# @title Cell 9: ADE20K via Kaggle (awsaf49/ade20k-dataset)
if not os.path.exists("/content/ade20k-dataset.zip"):
    !kaggle datasets download -d awsaf49/ade20k-dataset -p /content
    !unzip -q /content/ade20k-dataset.zip -d /content/ade20k

import os
import glob
import shutil
from PIL import Image
import numpy as np
from tqdm import tqdm

# ADE20K category mapping for yard/obstacle/terrain, index: master_class
ADE_TO_MASTER = {
    1: 26,      # wall -> retaining_wall
    5: 7,       # tree -> tree
    7: 4,       # road, route -> pavement
    10: 0,      # grass -> grass
    12: 4,      # sidewalk, pavement -> pavement
    13: 36,     # person -> person
    14: 1,      # earth, ground -> dirt
    16: 48,     # table -> furniture
    18: 8,      # plant -> shrub
    21: 51,     # car -> car
    22: 14,     # water -> water_feature
    30: 21,     # field -> field
    33: 24,     # fence -> fence
    35: 12,     # rock, stone -> rock
    39: 27,     # railing, rail -> railing
    44: 31,     # signboard, sign -> sign
    47: 2,      # sand -> sand
    53: 30,     # path -> path
    54: 29,     # stairs, steps -> stairs
    60: 29,     # stairway, staircase -> stairs
    61: 18,     # river -> river
    62: 28,     # bridge -> bridge
    67: 9,      # flower -> flower
    69: 13,     # hill -> hill
    70: 28,     # bench -> bench
    73: 7,      # palm tree -> tree
    80: 8,      # shrub -> shrub
    81: 52,     # bus -> bus
    83: 33,     # light -> lamp_post
    84: 53,     # truck -> truck
    88: 34,     # streetlight -> streetlight
    94: 32,     # pole -> pole
    95: 1,      # land, ground, soil -> dirt
    105: 19,    # fountain -> fountain
    109: 42,    # plaything, toy -> toy
    110: 16,    # swimming pool -> pool
    114: 20,    # waterfall, falls -> waterfall
    127: 37,    # animal -> animal
    128: 40,    # bicycle -> bicycle
    129: 17,    # lake -> lake
    137: 35,    # traffic light -> traffic_light
    139: 55,    # ashcan, trash can -> trash_bin
}
print("ADE20K category mapping loaded!")

img_dir = "/content/ade20k/ADEChallengeData2016/images/training"
mask_dir = "/content/ade20k/ADEChallengeData2016/annotations/training"

imgs = sorted(glob.glob(f"{img_dir}/*.jpg"))
masks = sorted(glob.glob(f"{mask_dir}/*.png"))

print(f"Found {len(imgs)} images and {len(masks)} masks!")

assert len(imgs) == len(masks), "Mismatch in image and mask counts!"

print("Processing ADE20K images...")

for img_path, mask_path in tqdm(zip(imgs, masks), total=len(imgs), desc="ADE20K images"):
    img = Image.open(img_path).convert("RGB")
    mask = np.array(Image.open(mask_path))
    W, H = img.size # Correctly get width and height from PIL Image
    objs = []
    for ade_class, master_id in ADE_TO_MASTER.items():
        ys, xs = np.where(mask == ade_class)
        if len(xs) < 2 or len(ys) < 2: # Need at least 2 points to form a box
            continue
        xmin, xmax = xs.min(), xs.max()
        ymin, ymax = ys.min(), ys.max()
        # Skip zero-area boxes
        if xmin >= xmax or ymin >= ymax:
            continue
        x_c = (xmin + xmax) / 2.0 / W
        y_c = (ymin + ymax) / 2.0 / H
        bw = (xmax - xmin) / W
        bh = (ymax - ymin) / H
        objs.append(f"{master_id} {x_c:.6f} {y_c:.6f} {bw:.6f} {bh:.6f}")
    if objs:
        base = os.path.splitext(os.path.basename(img_path))[0]
        out_img = f"{BASE_DIR}/images/train/ade_{base}.jpg"
        out_lbl = f"{BASE_DIR}/labels/train/ade_{base}.txt"
        img.save(out_img)
        with open(out_lbl, "w") as f:
            f.write("\n".join(objs))

print("ADE20K Kaggle dataset processed!")

In [None]:
# ----------------------------
# @title Cell 10: WRITE DATA CONFIG FILE
# ----------------------------
with open(f"{BASE_DIR}/data.yaml", "w") as f:
    f.write(f"path: {BASE_DIR}\n")
    f.write("train: images/train\n")
    f.write("val: images/val\n")
    f.write(f"nc: {len(MASTER_CLASS_LIST)}\n")
    f.write("names: " + str(MASTER_CLASS_LIST) + "\n")
    print(f"data.yaml saved at {BASE_DIR}/data.yaml")


In [None]:
# @title Cell 10a: Save all files to Google Drive for resuming later if session ends

import os
import shutil
import zipfile
import time # Import time for timestamp

# Function to save all files in BASE_DIR to Google Drive
def save_to_drive():
    drive_backup_dir = "/content/drive/MyDrive/mower_dataset_backup"
    timestamp = time.strftime("%Y%m%d-%H%M%S")
    zip_filename = f"mower_dataset_backup_{timestamp}.zip"
    temp_zip_path = f"/tmp/{zip_filename}" # Save zip to a temporary location
    print(f"Creating zip archive of {BASE_DIR} at {temp_zip_path}...")

    try:
        # Create the zip file
        with zipfile.ZipFile(temp_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
            for root, dirs, files in os.walk(BASE_DIR):
                # Adjust the root path in the archive so it doesn't include /content/
                arcname = os.path.relpath(root, BASE_DIR)
                if arcname != '.': # Don't add the base directory itself
                    zipf.write(root, arcname)
                for file in files:
                    file_path = os.path.join(root, file)
                    arcname = os.path.relpath(file_path, BASE_DIR)
                    zipf.write(file_path, arcname)
        print("Zip archive created successfully.")

        # Ensure the target directory exists in Drive
        os.makedirs(drive_backup_dir, exist_ok=True)
        drive_dst_path = os.path.join(drive_backup_dir, zip_filename)

        # Copy the zip file to Google Drive
        print(f"Copying zip archive to Google Drive at {drive_dst_path}...")
        shutil.copy2(temp_zip_path, drive_dst_path)
        print("Data successfully saved to Google Drive.")

    except Exception as e:
        print(f"Error saving data to Google Drive: {e}")

    finally:
        # Clean up the temporary zip file
        if os.path.exists(temp_zip_path):
            os.remove(temp_zip_path)
            print(f"Removed temporary zip file: {temp_zip_path}")

# Call the function to save
# Ensure BASE_DIR is defined (e.g., from Cell 2)
if 'BASE_DIR' in globals():
    save_to_drive()
else:
    print("Error: BASE_DIR is not defined. Please run Cell 2 first.")

In [None]:
# @title Cell 10b: Recover saved backup from Google Drive if resuming later

import os
import shutil
import zipfile
from google.colab import drive
import glob
import time # Import time for sorting by modification date

# Function to recover data from Google Drive backup
def recover_from_drive():
    # Ensure Google Drive is mounted
    print("Checking Google Drive mount status...")
    if not os.path.exists('/content/drive'):
        print("Google Drive not mounted. Attempting to mount...")
        try:
            drive.mount('/content/drive')
            print("Google Drive mounted successfully.")
        except Exception as e:
            print(f"Error mounting Google Drive: {e}")
            print("Cannot proceed with recovery without Google Drive mounted.")
            return # Exit function if mount fails
    else:
        print("Google Drive is already mounted.")

    drive_backup_dir = "/content/drive/MyDrive/mower_dataset_backup"
    target_base_dir = "/content/mower_dataset" # This should match BASE_DIR

    print(f"Attempting to recover data from zip backup in {drive_backup_dir}...")

    if not os.path.exists(drive_backup_dir):
        print(f"Backup directory not found at {drive_backup_dir}. Skipping recovery.")
        print("If this is your first run or you haven't saved a backup, this is expected.")
        return

    # Find the latest zip file based on modification time
    list_of_files = glob.glob(f"{drive_backup_dir}/mower_dataset_backup_*.zip")
    if not list_of_files:
        print(f"No zip backup files found in {drive_backup_dir}. Skipping recovery.")
        return

    latest_zip_path = max(list_of_files, key=os.path.getmtime)
    print(f"Found latest backup zip: {latest_zip_path}")

    temp_zip_path = "/tmp/latest_mower_dataset_backup.zip"

    try:
        # Copy the zip file to a temporary location in Colab
        print(f"Copying zip file to temporary location: {temp_zip_path}...")
        shutil.copy2(latest_zip_path, temp_zip_path)
        print("Zip file copied successfully.")

        # Remove existing directory if it exists to ensure a clean unzip
        if os.path.exists(target_base_dir):
            print(f"Removing existing directory: {target_base_dir}")
            shutil.rmtree(target_base_dir)

        # Ensure the target directory exists for unzipping
        os.makedirs(target_base_dir, exist_ok=True)
        print(f"Extracting zip archive to {target_base_dir}...")

        # Extract the zip file
        with zipfile.ZipFile(temp_zip_path, 'r') as zipf:
            zipf.extractall(target_base_dir)
        print("Data successfully recovered and extracted.")

        # Optionally, verify some files exist
        if os.path.exists(f"{target_base_dir}/data.yaml"):
            print("Verified data.yaml exists in recovered directory.")
        else:
             print("Warning: data.yaml not found in the recovered directory.")

    except Exception as e:
        print(f"Error recovering data from Google Drive: {e}")

    finally:
        # Clean up the temporary zip file
        if os.path.exists(temp_zip_path):
            os.remove(temp_zip_path)
            print(f"Removed temporary zip file: {temp_zip_path}")

# Call the function to attempt recovery
recover_from_drive()

# Ensure BASE_DIR variable is set after potential recovery
# If recover_from_drive ran, BASE_DIR might have been created/overwritten.
# If not, we need to ensure it's defined for subsequent cells.
if 'BASE_DIR' not in globals():
    BASE_DIR = "/content/mower_dataset"
    print(f"BASE_DIR was not set, initializing to {BASE_DIR}")
    # Recreate necessary subdirectories if starting fresh or recovery failed
    # This might be redundant if recovery was successful, but safe if not.
    for split in ["train", "val"]:
        os.makedirs(f"{BASE_DIR}/images/{split}", exist_ok=True)
        os.makedirs(f"{BASE_DIR}/labels/{split}", exist_ok=True)
    print("Initialized necessary dataset directories.")

In [None]:
# ----------------------------
# @title Cell 11: TRAIN MODELS WITH CHECKPOINT RESUME
# ----------------------------
import torch
import os
import shutil
import random
import glob
import gc
import re
from tqdm import tqdm # Import tqdm here as it's used later
from ultralytics import YOLO
from google.colab import drive

# Mount Google Drive for automated backups
drive.mount('/content/drive')
BACKUP_DIR = "/content/drive/MyDrive/mower_model_checkpoints"
os.makedirs(BACKUP_DIR, exist_ok=True)

# --- (IMPROVED) Define the backup callback function ---
def backup_checkpoint_callback(trainer):
    """
    A callback to save model-specific checkpoints to Google Drive every 5 epochs.
    """
    epoch = trainer.epoch
    # Get the actual run name (which might have suffixes like '_2')
    run_name = os.path.basename(trainer.save_dir)
    model_name = trainer.args.name # Gets the base model name (e.g., 'pi_model_yolov8n')

    # Use the run_name from save_dir, which reflects the actual directory name
    backup_file_name = f"{run_name}_epoch_{epoch+1}.pt"
    src_path = trainer.last
    dst_path = os.path.join(BACKUP_DIR, backup_file_name)

    if (epoch + 1) % 5 == 0:
        if os.path.exists(src_path):
            try:
                shutil.copy2(src_path, dst_path)
                print(f"✅ [Backup] Epoch {epoch+1} for '{run_name}' saved to Google Drive at {dst_path}")
            except Exception as e:
                print(f"❌ [Backup] Failed to save checkpoint {backup_file_name} to Google Drive: {e}")


# --- (NEW) Function to find the latest checkpoint ---
def find_latest_checkpoint(backup_dir, base_model_name):
    """
    Finds the latest checkpoint file for a given base model name in the backup directory,
    handling potential suffixes added by the training framework.
    Returns the file path or None if no checkpoint is found.
    """
    latest_checkpoint = None
    latest_epoch = -1
    # Match files starting with the base name, potentially followed by a number suffix
    # The suffix can be like 'model_name10' or 'model_name_10' depending on how Ultralytics names runs
    # Let's make the regex more flexible to handle both cases
    pattern = re.compile(rf"^{re.escape(base_model_name)}(\d+)?(_\d+)?_epoch_(\d+)\.pt$")


    if not os.path.exists(backup_dir):
        print(f"Backup directory not found: {backup_dir}")
        return None

    print(f"Searching for checkpoints matching pattern '{pattern.pattern}' in {backup_dir}")
    found_checkpoints = []
    for filename in os.listdir(backup_dir):
        match = pattern.match(filename)
        if match:
            # The epoch number is always the last captured group before the extension
            epoch = int(match.group(match.lastindex))
            found_checkpoints.append((epoch, os.path.join(backup_dir, filename)))

    if found_checkpoints:
        # Find the checkpoint with the highest epoch number
        latest_epoch, latest_checkpoint = max(found_checkpoints, key=lambda item: item[0])
        print(f"🔎 Found latest checkpoint for '{base_model_name}' at epoch {latest_epoch}: {latest_checkpoint}")
    else:
        print(f"👍 No existing checkpoint found matching pattern '{pattern.pattern}'. Starting new training.")

    return latest_checkpoint

# --- Clear GPU function ---
def clear_gpu():
    torch.cuda.empty_cache()
    gc.collect()
    print("Cleared GPU cache.")

# --- Auto-create validation set if empty ---
# (This part of the code remains unchanged)
train_img_dir = f"{BASE_DIR}/images/train"
val_img_dir = f"{BASE_DIR}/images/val"
train_lbl_dir = f"{BASE_DIR}/labels/train"
val_lbl_dir = f"{BASE_DIR}/labels/val"

# Ensure BASE_DIR is defined (from Cell 2)
if 'BASE_DIR' not in globals():
    print("Error: BASE_DIR is not defined. Please run Cell 2 first.")
else:
    if not os.path.exists(val_img_dir) or not os.listdir(val_img_dir):
        print("Validation set is empty or not found. Creating one...")
        all_imgs = [img for img in os.listdir(train_img_dir) if img.lower().endswith(('.jpg', '.jpeg', '.png'))]
        # Calculate number of validation images, ensure at least 1 and at most 1500
        num_val = min(1500, max(1, len(all_imgs) // 10))
        if len(all_imgs) > num_val:
            val_imgs_to_move = random.sample(all_imgs, num_val)

            for img_name in tqdm(val_imgs_to_move, desc="Moving validation images"):
                src_img = os.path.join(train_img_dir, img_name)
                dst_img = os.path.join(val_img_dir, img_name)
                shutil.move(src_img, dst_img)

                label_name = os.path.splitext(img_name)[0] + ".txt"
                src_lbl = os.path.join(train_lbl_dir, label_name)
                dst_lbl = os.path.join(val_lbl_dir, label_name)
                if os.path.exists(src_lbl):
                    shutil.move(src_lbl, dst_lbl)
                elif not os.path.exists(src_lbl):
                    # Create an empty label file if it doesn't exist in the source
                    # This prevents errors later if the image had no annotations
                    with open(dst_lbl, 'w') as f:
                        pass # Create an empty file
            print(f"Moved {num_val} images/labels to validation set.")
        else:
            print(f"Not enough images ({len(all_imgs)}) to create a validation set of size {num_val}. Skipping validation set creation.")
    else:
        print("Validation set already exists and is not empty.")


# --- Train Pi model with resume logic ---
clear_gpu()
pi_model_name = "pi_model_yolov8n"
# Look for a checkpoint starting with the base name
pi_checkpoint_path = find_latest_checkpoint(BACKUP_DIR, pi_model_name)

# Load from checkpoint if it exists, otherwise start new
# If resuming, get the epoch number from the filename to set correct starting epoch
start_epoch_pi = 0
if pi_checkpoint_path:
    # Extract epoch number from the found checkpoint path
    match = re.search(rf"{re.escape(pi_model_name)}(\d+)?(_\d+)?_epoch_(\d+)\.pt$", pi_checkpoint_path)
    if match:
        start_epoch_pi = int(match.group(match.lastindex))
        # Ultralytics resume starts from the *next* epoch, so set total epochs accordingly
        # If we want to train for a total of 50 epochs, and we resume from epoch 25,
        # we need to train for 50 - 25 = 25 more epochs.
        # The `epochs` argument in train() is the *total* number of epochs to run.
        # So if we resume from start_epoch_pi, we want to run until epoch 50 (or more if user desires).
        # Let's aim for a total of 50 epochs for now, which means if we resume from epoch X,
        # we train X+1 to 50. The train function handles the starting epoch when resume=True.
        total_epochs_pi = 50 # Set your desired total epochs here
        print(f"Resuming Pi model training from epoch {start_epoch_pi}. Total epochs set to {total_epochs_pi}.")
    else:
         print(f"Could not parse epoch from checkpoint filename: {pi_checkpoint_path}. Starting new training.")
         pi_checkpoint_path = None # Treat as no valid checkpoint
         total_epochs_pi = 50 # Default total epochs for new training

model_pi = YOLO(pi_checkpoint_path) if pi_checkpoint_path else YOLO("yolov8n.yaml")
model_pi.add_callback("on_train_epoch_end", backup_checkpoint_callback)

# Adjust epochs and resume flag based on whether a checkpoint was found
pi_train_args = {
    "data": f"{BASE_DIR}/data.yaml",
    "epochs": total_epochs_pi if pi_checkpoint_path else 50, # Set total epochs
    "imgsz": 640,
    "batch": .7,
    "workers": 6,
    "patience": 10,
    "seed": 42,
    "project": "mower_model",
    "name": pi_model_name,
    "resume": bool(pi_checkpoint_path) # Explicitly tell trainer to resume
}

# Only set device if CUDA is available
if torch.cuda.is_available():
    pi_train_args["device"] = 0 # Use GPU 0

model_pi.train(**pi_train_args)


clear_gpu()
del model_pi
gc.collect()

# --- Train Coral model with resume logic ---
coral_model_name = "coral_model_yolov8n"
# Look for a checkpoint starting with the base name
coral_checkpoint_path = find_latest_checkpoint(BACKUP_DIR, coral_model_name)

# Load from checkpoint if it exists, otherwise start new
start_epoch_coral = 0
if coral_checkpoint_path:
     # Extract epoch number from the found checkpoint path
    match = re.search(rf"{re.escape(coral_model_name)}(\d+)?(_\d+)?_epoch_(\d+)\.pt$", coral_checkpoint_path)
    if match:
        start_epoch_coral = int(match.group(match.lastindex))
        total_epochs_coral = 50 # Set your desired total epochs here
        print(f"Resuming Coral model training from epoch {start_epoch_coral}. Total epochs set to {total_epochs_coral}.")
    else:
         print(f"Could not parse epoch from checkpoint filename: {coral_checkpoint_path}. Starting new training.")
         coral_checkpoint_path = None # Treat as no valid checkpoint
         total_epochs_coral = 50 # Default total epochs for new training


model_coral = YOLO(coral_checkpoint_path) if coral_checkpoint_path else YOLO("yolov8n.yaml")
model_coral.add_callback("on_train_epoch_end", backup_checkpoint_callback)

# Adjust epochs and resume flag based on whether a checkpoint was found
coral_train_args = {
    "data": f"{BASE_DIR}/data.yaml",
    "epochs": total_epochs_coral if coral_checkpoint_path else 50, # Set total epochs
    "imgsz": 640,
    "batch": .7,
    "workers": 6,
    "patience": 10,
    "seed": 42,
    "project": "mower_model",
    "name": coral_model_name,
    "resume": bool(coral_checkpoint_path) # Explicitly tell trainer to resume
}

# Only set device if CUDA is available
if torch.cuda.is_available():
    coral_train_args["device"] = 0 # Use GPU 0

model_coral.train(**coral_train_args)

In [None]:
# @title Cell 12: Back up only necessary files to Export Models for Notebook 2

import time

# --- New Cell: Backup mower_dataset and mower_model folders to Google Drive ---

def backup_specific_folders_to_drive():
    """
    Zips and backs up the mower_dataset and mower_model folders to Google Drive.
    """
    # Ensure Google Drive is mounted
    print("Checking Google Drive mount status...")
    if not os.path.exists('/content/drive'):
        print("Google Drive not mounted. Attempting to mount...")
        try:
            drive.mount('/content/drive')
            print("Google Drive mounted successfully.")
        except Exception as e:
            print(f"Error mounting Google Drive: {e}")
            print("Cannot proceed with backup without Google Drive mounted.")
            return # Exit function if mount fails
    else:
        print("Google Drive is already mounted.")

    drive_backup_dir_base = "/content/drive/MyDrive/mower_backups" # Use a dedicated backup dir
    timestamp = time.strftime("%Y%m%d-%H%M%S")
    zip_filename = f"mower_backup_{timestamp}.zip"
    temp_zip_path = f"/tmp/{zip_filename}" # Save zip to a temporary location
    print(f"Creating zip archive at {temp_zip_path}...")

    # Define the folders to back up (relative to /content)
    folders_to_backup = ["mower_dataset", "mower_model"]

    try:
        # Create the zip file
        with zipfile.ZipFile(temp_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
            for folder_name in folders_to_backup:
                folder_path = f"/content/{folder_name}"
                if os.path.exists(folder_path):
                    print(f"Adding folder '{folder_name}' to zip...")
                    for root, dirs, files in os.walk(folder_path):
                        # Create the path in the zip archive relative to the base folder name
                        arcname_root = os.path.join(folder_name, os.path.relpath(root, folder_path))
                        # Ensure the directory structure is added
                        if arcname_root != folder_name: # Avoid adding the base folder itself twice
                             zipf.write(root, arcname_root)

                        for file in files:
                            file_path = os.path.join(root, file)
                            arcname_file = os.path.join(folder_name, os.path.relpath(file_path, folder_path))
                            zipf.write(file_path, arcname_file)
                else:
                    print(f"Warning: Folder '{folder_name}' not found at {folder_path}. Skipping.")

        print("Zip archive created successfully.")

        # Ensure the target directory exists in Drive
        os.makedirs(drive_backup_dir_base, exist_ok=True)
        drive_dst_path = os.path.join(drive_backup_dir_base, zip_filename)

        # Copy the zip file to Google Drive
        print(f"Copying zip archive to Google Drive at {drive_dst_path}...")
        shutil.copy2(temp_zip_path, drive_dst_path)
        print("Specific folders successfully saved to Google Drive.")

    except Exception as e:
        print(f"Error saving specific folders to Google Drive: {e}")

    finally:
        # Clean up the temporary zip file
        if os.path.exists(temp_zip_path):
            os.remove(temp_zip_path)
            print(f"Removed temporary zip file: {temp_zip_path}")

# Call the function to perform the backup
backup_specific_folders_to_drive()

