In [None]:
import zipfile
from pathlib import Path
import gdown
from ultralytics import YOLO
import yaml

In [None]:
google_drive_file_id = '1ZK9J4sU1jen3CPZgY0gDsTfz9Qoh9msN'  # <-- From sharing link in google drive, eg https://drive.google.com/file/d/1ZK9J4sU1jen3CPZgY0gDsTfz9Qoh9msN/view?usp=drive_link
google_drive_file_name = '20250602z_mju-waste_yolo.zip'
project_root = Path('/content')
dataset_root_name = 'yolo_dataset'
output_path = project_root / google_drive_file_name
extract_dir = project_root / dataset_root_name
dataset_root_path = Path('/content') / dataset_root_name
yaml_path = dataset_root_path / 'data.yaml'


In [None]:
# download from Google Drive and extract
gdown.download(id=google_drive_file_id, output=str(output_path), quiet=False)
print(f"Dataset downloaded from google drive: {output_path}\n")

print("Extracting...\n")
with zipfile.ZipFile(output_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)
print(f"Dataset extracted to: {extract_dir}\n", )

In [None]:
# Update path in data.yaml to work with where we've unzipped it
print("Updating data.yaml path...\n")
with yaml_path.open('r') as file:
    data = yaml.safe_load(file)

data['path'] = str(dataset_root_path)

with yaml_path.open('w') as file:
    yaml.dump(data, file)

print(f"Updated path in data.yaml to: {data['path']}")

In [None]:
# Sanity check that it's possible to train a model with the dataset
dataset_yaml = Path('/content/yolo_dataset/data.yaml')
model = YOLO('yolo11n.pt')
results = model.train(data=str(dataset_yaml), epochs=1, imgsz=640)

print(f"Training complete. Results saved to: {model.trainer.save_dir}")