## 1 Environment Setup

In [1]:
# Import libraries
from ultralytics import YOLO
from pathlib import Path
import torch
import yaml
import os
from IPython.display import Image, display
import matplotlib.pyplot as plt

print(" Imports successful!")

 Imports successful!


## 2 Environment Check

In [2]:
# Check GPU availability
print("=" * 60)
print("ENVIRONMENT CHECK")
print("=" * 60)

print(f"\n PyTorch version: {torch.__version__}")

if torch.cuda.is_available():
  gpu_name = torch.cuda.get_device_name(0)
  gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9
  print(f" GPU Available: {gpu_name}")
  print(f"  CUDA Version: {torch.version.cuda}")
  print(f"  GPU Memory: {gpu_memory:.1f} GB")
  device = 0
  batch_size = 16
  estimated_time = "3-4 hours"
else:
  print(" No GPU detected - Training will use CPU")
  print("  This will be significantly slower!")
  device = 'cpu'
  batch_size = 4
  estimated_time = "10-14 hours"

print(f"\n Configuration:")
print(f"  Device: {device}")
print(f"  Batch size: {batch_size}")
print(f"  Estimated time: {estimated_time}")

ENVIRONMENT CHECK

 PyTorch version: 2.9.1
 No GPU detected - Training will use CPU
  This will be significantly slower!

 Configuration:
  Device: cpu
  Batch size: 4
  Estimated time: 10-14 hours


## 3 Dataset Configuration

In [3]:
DATA_YAML = Path.home() / "waste-classification-vn" / "yolo-dataset-merged" / "data.yaml"

print(f" Dataset path: {DATA_YAML}")
print(f"  Exists: {DATA_YAML.exists()}")

if not DATA_YAML.exists():
  print("\n ERROR: data.yaml not found!")
  print("  Please update DATA_YAML path above")
else:
  print("\n Dataset configuration found!")

 Dataset path: /Users/caoduong22102004gmail.com/waste-classification-vn/yolo-dataset-merged/data.yaml
  Exists: True

 Dataset configuration found!


In [5]:
# Verify dataset structure
with open(DATA_YAML, encoding='utf-8') as f:
  data_config = yaml.safe_load(f)

print("=" * 60)
print("DATASET VERIFICATION")
print("=" * 60)

print(f"\n Classes: {data_config['nc']}")
print(f"\n Class names:")
for i, name in enumerate(data_config['names']):
  print(f"  {i}: {name}")

# Count files
dataset_path = Path(data_config['path'])
train_images = dataset_path / 'images' / 'train'
val_images = dataset_path / 'images' / 'val'

train_count = len(list(train_images.glob('*.*')))
val_count = len(list(val_images.glob('*.*')))

print(f"\n Dataset size:")
print(f"  Train: {train_count:,} images")
print(f"  Val:  {val_count:,} images")
print(f"  Total: {train_count + val_count:,} images")

if train_count == 0 or val_count == 0:
  print("\n ERROR: No images found!")
else:
  print("\n Dataset ready for training!")

DATASET VERIFICATION

 Classes: 9

 Class names:
  0: Nh·ª±a
  1: Pin
  2: V·∫£i
  3: Kim lo·∫°i
  4: R√°c th·∫£i
  5: Th·ªßy tinh
  6: Gi·∫•y
  7: H·ªôp gi·∫•y
  8: H·ªØu c∆°

 Dataset size:
  Train: 6,548 images
  Val:  1,637 images
  Total: 8,185 images

 Dataset ready for training!


## Model Configuration

In [6]:
# Training configuration
print("=" * 60)
print("MODEL CONFIGURATION")
print("=" * 60)

# Model selection
MODEL_SIZE = 'yolov8s.pt' 
print(f"\nüéØ Model: YOLOv8s")
print(f"  Parameters: 11.2M")
print(f"  File size: ~22 MB")
print(f"  Expected mAP@0.5: 85-90%")
print(f"  Inference speed: 50-70 FPS (GPU)")

# Training parameters
EPOCHS = 150
IMG_SIZE = 640
PATIENCE = 25

print(f"\n Training parameters:")
print(f"  Epochs: {EPOCHS}")
print(f"  Image size: {IMG_SIZE}x{IMG_SIZE}")
print(f"  Batch size: {batch_size}")
print(f"  Early stopping: {PATIENCE} epochs")
print(f"  Device: {device}")

MODEL CONFIGURATION

üéØ Model: YOLOv8s
  Parameters: 11.2M
  File size: ~22 MB
  Expected mAP@0.5: 85-90%
  Inference speed: 50-70 FPS (GPU)

 Training parameters:
  Epochs: 150
  Image size: 640x640
  Batch size: 4
  Early stopping: 25 epochs
  Device: cpu


## Initialize Model

In [7]:
# Load pretrained YOLOv8s
print(" Loading YOLOv8s pretrained weights...")
model = YOLO(MODEL_SIZE)
print(" Model loaded successfully!")

# Display model info
print(f"\nModel summary:")
model.info()

 Loading YOLOv8s pretrained weights...
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8s.pt to 'yolov8s.pt': 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 21.5MB 25.1MB/s 0.9s0.8s<0.1ss1
 Model loaded successfully!

Model summary:
YOLOv8s summary: 129 layers, 11,166,560 parameters, 0 gradients, 28.8 GFLOPs


(129, 11166560, 0, 28.816844800000002)

In [8]:
# PRODUCTION-OPTIMIZED TRAINING
print("=" * 70)
print(" STARTING TRAINING")
print("=" * 70)
print(f"\nEstimated time: {estimated_time}")
print("You can monitor progress below...\n")

results = model.train(
  # Data
  data=str(DATA_YAML),
  
  # Training duration
  epochs=EPOCHS,
  patience=PATIENCE,
  
  # Input
  imgsz=IMG_SIZE,
  batch=batch_size,
  
  # Hardware
  device=device,
  workers=8,
  
  # Project organization
  project='runs/detect',
  name='waste_production_v1',
  exist_ok=False,
  pretrained=True,
  
  # Checkpointing
  save=True,
  save_period=10,
  
  # Validation
  val=True,
  plots=True,
  
  # === DATA AUGMENTATION ===
  # Color augmentation
  hsv_h=0.015,
  hsv_s=0.7,
  hsv_v=0.4,
  
  # Geometric augmentation
  degrees=15.0,
  translate=0.1,
  scale=0.5,
  shear=0.0,
  perspective=0.0,
  
  # Flip augmentation
  flipud=0.0,
  fliplr=0.5,
  
  # Advanced augmentation
  mosaic=1.0,
  mixup=0.0,
  copy_paste=0.0,
  
  # === HYPERPARAMETERS ===
  lr0=0.01,
  lrf=0.01,
  optimizer='SGD',
  momentum=0.937,
  weight_decay=0.0005,
  cos_lr=True,
  warmup_epochs=3.0,
  warmup_momentum=0.8,
  warmup_bias_lr=0.1,
  
  # === LOSS WEIGHTS ===
  box=7.5,
  cls=0.5,
  dfl=1.5,
  
  # === OPTIMIZATIONS ===
  close_mosaic=10,
  amp=True,
  fraction=1.0,
  
  # === SETTINGS ===
  iou=0.7,
  verbose=True,
  seed=42,
)

print("\n" + "=" * 70)
print(" TRAINING COMPLETE!")
print("=" * 70)

 STARTING TRAINING

Estimated time: 10-14 hours
You can monitor progress below...

New https://pypi.org/project/ultralytics/8.4.6 available üòÉ Update with 'pip install -U ultralytics'
Ultralytics 8.3.252 Python-3.11.14 torch-2.9.1 CPU (Apple M4)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=4, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=True, cutmix=0.0, data=/Users/caoduong22102004gmail.com/waste-classification-vn/yolo-dataset-merged/data.yaml, degrees=15.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=150, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=3

KeyboardInterrupt: 

## Evaluate Results

In [None]:
# Validate on test set
print("=" * 70)
print("MODEL EVALUATION")
print("=" * 70)

metrics = model.val()

# Overall performance
print(f"\n Overall Performance:")
print(f"  mAP@0.5:   {metrics.box.map50:.4f} ({metrics.box.map50*100:.2f}%)")
print(f"  mAP@0.5:0.95: {metrics.box.map:.4f} ({metrics.box.map*100:.2f}%)")
print(f"  Precision:  {metrics.box.mp:.4f} ({metrics.box.mp*100:.2f}%)")
print(f"  Recall:    {metrics.box.mr:.4f} ({metrics.box.mr*100:.2f}%)")

# Per-class performance
print(f"\n Per-Class Performance (mAP@0.5):")
print("  " + "-" * 50)

for i, (name, ap) in enumerate(zip(metrics.names.values(), metrics.box.maps)):
 status = "" if ap > 0.80 else "" if ap > 0.70 else ""
  print(f"  {status} {i}: {name:15s} ‚Üí {ap:.4f} ({ap*100:.2f}%)")

# Performance assessment
print(f"\n Performance Assessment:")
if metrics.box.map50 >= 0.85:
  print(f"  EXCELLENT! (‚â•85% mAP)")
  print(f"  ‚Üí Production-ready")
elif metrics.box.map50 >= 0.75:
  print(f"  GOOD! (75-85% mAP)")
  print(f"  ‚Üí Acceptable for production")
else:
  print(f"   NEEDS IMPROVEMENT (<75% mAP)")

## Visualize Training Results

In [None]:
# Display training curves
save_dir = Path(results.save_dir)

print(" Training Visualizations:\n")

# Results plot
results_plot = save_dir / 'results.png'
if results_plot.exists():
  print("Training & Validation Metrics:")
  display(Image(filename=str(results_plot)))

# Confusion matrix
confusion_matrix = save_dir / 'confusion_matrix.png'
if confusion_matrix.exists():
  print("\nConfusion Matrix:")
  display(Image(filename=str(confusion_matrix)))

# Predictions
val_batch = save_dir / 'val_batch0_pred.jpg'
if val_batch.exists():
  print("\nSample Predictions:")
  display(Image(filename=str(val_batch)))

## Model Paths

In [None]:
# Get model file paths
best_model = save_dir / 'weights' / 'best.pt'
last_model = save_dir / 'weights' / 'last.pt'

print("=" * 70)
print(" MODEL FILES")
print("=" * 70)

print(f"\n Save directory: {save_dir}")
print(f"\n Best model: {best_model}")
print(f"  Size: {best_model.stat().st_size / 1e6:.1f} MB")

print(f"\n Last model: {last_model}")
print(f"  Size: {last_model.stat().st_size / 1e6:.1f} MB")

print(f"\n Training plots: {save_dir}")

##  Test Model on Sample Image

In [None]:
# Load best model for inference
best_model_loaded = YOLO(str(best_model))

print("üß™ Testing model on sample validation image...\n")

# Get a random validation image
val_images_list = list(val_images.glob('*.jpg'))
if val_images_list:
  test_image = val_images_list[0]
  
  # Run inference
  results_test = best_model_loaded(str(test_image))
  
  # Display results
  for r in results_test:
    print(f"Detections: {len(r.boxes)} objects found")
    
    # Show image with predictions
    im_array = r.plot()
    plt.figure(figsize=(12, 8))
    plt.imshow(im_array[..., ::-1]) # BGR to RGB
    plt.axis('off')
    plt.title('Sample Detection Result')
    plt.show()
    
    # Print detections
    if len(r.boxes) > 0:
      print("\nDetected objects:")
      for box in r.boxes:
        cls = int(box.cls[0])
        conf = float(box.conf[0])
        name = r.names[cls]
        print(f"  {name}: {conf:.2%}")
else:
  print("No validation images found!")

## Export Model for Deployment

In [None]:
# Export to different formats
print("=" * 70)
print(" EXPORTING MODEL")
print("=" * 70)

# Export to ONNX (recommended for production)
print("\n Exporting to ONNX format...")
onnx_path = best_model_loaded.export(format='onnx')
print(f"  ONNX model: {onnx_path}")

# Export to TensorFlow Lite (for mobile)
print("\n Exporting to TFLite format (for mobile)...")
try:
  tflite_path = best_model_loaded.export(format='tflite')
  print(f"  TFLite model: {tflite_path}")
except Exception as e:
  print(f"   TFLite export failed: {e}")

print("\n Export complete!")

## Summary & Next Steps

In [None]:
print("=" * 70)
print(" TRAINING PIPELINE COMPLETE!")
print("=" * 70)

print(f"\n Final Results:")
print(f"  Model: YOLOv8s")
print(f"  mAP@0.5: {metrics.box.map50*100:.2f}%")
print(f"  Precision: {metrics.box.mp*100:.2f}%")
print(f"  Recall: {metrics.box.mr*100:.2f}%")

print(f"\n Model Files:")
print(f"  PyTorch: {best_model}")
print(f"  ONNX: {onnx_path}")

