In [None]:
pip install ultralytics

In [None]:
import zipfile
from pathlib import Path
import gdown
from ultralytics import YOLO
import yaml
import cv2
from random import sample
import matplotlib.pyplot as plt
import os

In [None]:
google_drive_file_id = '10JylvIoTHO3hV8-aOnVlKIAzb_gZJfRN'  # <-- From sharing link in google drive, https://drive.google.com/file/d/10JylvIoTHO3hV8-aOnVlKIAzb_gZJfRN/view?usp=drive_link
google_drive_file_name = '20250616_mju_waste_yolo_2475.zip'
project_root = Path('/content')
dataset_root_name = 'yolo_dataset'
output_path = project_root / google_drive_file_name
extract_dir = project_root / dataset_root_name
dataset_root_path = Path('/content') / dataset_root_name
yaml_path = dataset_root_path / 'data.yaml'


In [None]:
print("working dir is " + str(dataset_root_path))

In [None]:
# download from Google Drive and extract
gdown.download(id=google_drive_file_id, output=str(output_path), quiet=False)
print(f"Dataset downloaded from google drive: {output_path}\n")

print("Extracting...\n")
with zipfile.ZipFile(output_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)
print(f"Dataset extracted to: {extract_dir}\n", )

In [None]:
  # if nessecary delete the old data.yaml
  os.remove(dataset_root_path / 'data.yaml')
  print(f"Deleted file: {dataset_root_path / 'data.yaml'}")

In [None]:
# update root path
dataset_root_path = Path('/content') / dataset_root_name / "20250616_mju_waste_yolo_2475"
dataset_root_path

In [None]:
# create updated data.yaml
categories = [{'id': 0, 'name': 'trash'}]  # Replace this with your actual categories

train_path = dataset_root_path / 'images' / 'train'
val_path = dataset_root_path / 'images' / 'val'
test_path = dataset_root_path / 'images' / 'test'
# Build the data dictionary
data = {
    'train': str(train_path),
    'val': str(val_path),
    'test': str(test_path),
    'nc': len(categories),
    'names': [cat['name'] for cat in categories]
}

with open(dataset_root_path / 'data.yaml', 'w') as file:
    yaml.dump(data, file, default_flow_style=False)

print("data.yaml created successfully.")


In [None]:
# OPTIONAL: Sanity check that it's possible to train a model with the dataset
dataset_yaml = Path(dataset_root_path / 'data.yaml')
model = YOLO('yolo11n.pt')
results = model.train(data=str(dataset_yaml), epochs=1, imgsz=640)

print(f"Training complete. Results saved to: {model.trainer.save_dir}")

In [None]:
# Get the latest results
runs_detect_dir = Path('runs/detect')
train_dirs = [d for d in runs_detect_dir.iterdir() if d.is_dir() and d.name.startswith("train")]
train_dirs.sort(key=lambda d: d.stat().st_mtime, reverse=True)  # sort by modification time
latest_train_dir = train_dirs[0]
print(latest_train_dir)

best_model_path = latest_train_dir / 'weights' / 'best.pt'
print(f"Loading {best_model_path}")


In [None]:
# Load the model and try it out
model = YOLO(best_model_path)
train_images_path = dataset_root_path / "images" / "train"
image_files = list(train_images_path.glob('*.jpg'))

sample_images = sample(image_files, 10)

for image_path in sample_images:
    result = model(image_path)[0]
    annotated_image = result.plot()

    plt.figure(figsize=(8, 6))
    plt.imshow(annotated_image)
    plt.title(f'Predictions: {image_path.name}')
    plt.axis('off')
    plt.show()