# YOLOv8 Training from Google Drive
This notebook mounts Google Drive, checks the dataset layout under `MyDrive/microplastic`, and trains a YOLOv8 model for 10 epochs. Training outputs are saved directly to your Drive so you can close the Colab session later if desired.

Expected Drive structure (already provided by you):
- MyDrive/microplastic/train/images
- MyDrive/microplastic/train/labels
- MyDrive/microplastic/valid/images
- MyDrive/microplastic/valid/labels
- MyDrive/microplastic/data.yaml

Run cells in order. Ensure Colab runtime is set to GPU.

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')
# Base dataset directory on Drive
base_drive = '/content/drive/MyDrive/microplastic'
print('Base drive path set to', base_drive)

In [None]:
# Install ultralytics (YOLOv8) and verify environment
!pip install -U ultralytics --quiet
import os, glob
import torch
from ultralytics import YOLO
print('torch:', torch.__version__, 'cuda available=', torch.cuda.is_available())
import ultralytics
print('ultralytics version =', ultralytics.__version__)

In [None]:
# Verify dataset layout and file counts under Drive paths
train_images = os.path.join(base_drive, 'train', 'images')
train_labels = os.path.join(base_drive, 'train', 'labels')
valid_images = os.path.join(base_drive, 'valid', 'images')
valid_labels = os.path.join(base_drive, 'valid', 'labels')
data_yaml = os.path.join(base_drive, 'data.yaml')

def count_files(path, patterns=['*.jpg','*.png']):
    if not os.path.exists(path):
        return 0
    total = 0
    for pat in patterns:
        total += len(glob.glob(os.path.join(path, pat)))
    return total

print('train_images exists:', os.path.exists(train_images), 'count=', count_files(train_images))
print('train_labels exists:', os.path.exists(train_labels), 'count=', len(glob.glob(os.path.join(train_labels, '*.txt'))))
print('valid_images exists:', os.path.exists(valid_images), 'count=', count_files(valid_images))
print('valid_labels exists:', os.path.exists(valid_labels), 'count=', len(glob.glob(os.path.join(valid_labels, '*.txt'))))
print('data.yaml exists:', os.path.exists(data_yaml))
if os.path.exists(data_yaml):
    print('
Contents of data.yaml:')
    print(open(data_yaml).read())
else:
    print('data.yaml not found at', data_yaml)
    print('Creating a minimal data.yaml pointing to the Drive folders...')
    data = {
        'path': base_drive,
        'train': 'train/images',
        'val': 'valid/images',
        'test': 'valid/images',
        'names': ['Microplastic']
    }
    import yaml
    with open(data_yaml, 'w') as f:
        yaml.dump(data, f, default_flow_style=False)
    print('Wrote', data_yaml)

## Train YOLOv8 for 10 epochs
This cell launches training using the Ultralytics YOLOv8 API. Outputs will be saved to Drive under `MyDrive/microplastic/runs`.
Adjust `model_size`, `batch` and `imgsz` as needed for your GPU.

In [None]:
# Training cell - set params before running
from ultralytics import YOLO
import os
data_yaml = os.path.join(base_drive, 'data.yaml')
model_size = 'yolov8s.pt'  # change to yolov8n/8m/8l if desired
epochs = 10
imgsz = 640
batch = 16
device = 'cuda' if torch.cuda.is_available() else 'cpu'
project = os.path.join(base_drive, 'runs')  # saved to Drive
name = 'yolov8_drive_exp'

print('Training with', model_size, 'for', epochs, 'epochs. Data yaml:', data_yaml)
print('Project (outputs) will be saved to', project)

model = YOLO(model_size)
model.train(data=data_yaml, epochs=epochs, imgsz=imgsz, batch=batch, device=device, project=project, name=name)

print('Training finished. Check the Drive folder:', project)

## Quick inference using the trained weights saved to Drive
After training finishes the best weights will be saved under `MyDrive/microplastic/runs/yolov8_drive_exp/weights/best.pt`. Use the cell below to run a quick prediction on a validation image.

In [None]:
# Quick inference example (update path if you changed 'name' or 'project')
from ultralytics import YOLO
import glob, os
weights_path = os.path.join(base_drive, 'runs', 'yolov8_drive_exp', 'weights', 'best.pt')
if os.path.exists(weights_path):
    print('Found weights at', weights_path)
    model = YOLO(weights_path)
    sample = glob.glob(os.path.join(base_drive, 'valid', 'images', '*'))
    if len(sample) > 0:
        print('Running inference on', sample[0])
        res = model.predict(sample[0], save=True)
        print('Saved predictions to /content/runs/predict or Drive run folder.')
    else:
        print('No validation images found to run inference on.')
else:
    print('Weights not found at', weights_path, '
Make sure the training cell has finished and outputs were written to Drive.')