# Aethr - YOLO11 Training (Google Colab Web + Google Drive)

Use this notebook directly in **Google Colab web**.

Flow:
1. Mount Google Drive.
2. Clone/pull the repo into Colab runtime.
3. Point to dataset in Drive (`data/raw/.../data.yaml`).
4. Train YOLO11 on T4 GPU.
5. Save `best.pt` and `best.onnx` back to Drive.

In [None]:
from pathlib import Path
import os
from google.colab import drive

drive.mount('/content/drive')

PROJECT_ROOT = Path('/content/aethr')
REPO_URL = 'https://github.com/VishwaJaya01/aethr.git'

if not PROJECT_ROOT.exists():
    !git clone {REPO_URL} {PROJECT_ROOT}

os.chdir(PROJECT_ROOT)
!git pull
print('PROJECT_ROOT:', PROJECT_ROOT)

In [None]:
!pip install -q -r requirements.txt
!nvidia-smi

## Dataset Configuration (Google Drive)

Recommended Drive structure:

```text
MyDrive/
  aethr/
    data/
      raw/
        solar-panel-defects-v1/
          data.yaml
          train/images
          train/labels
          valid/images
          valid/labels
          test/images (optional)
          test/labels (optional)
```

If your dataset is still zipped in Drive, set `EXTRACT_FROM_ZIP = True` in the next cell.

In [None]:
from pathlib import Path
import shutil
import zipfile

DRIVE_WORKDIR = Path('/content/drive/MyDrive/aethr')
DRIVE_WORKDIR.mkdir(parents=True, exist_ok=True)

DRIVE_DATA_ROOT = DRIVE_WORKDIR / 'data' / 'raw'
DRIVE_DATASET_DIR = DRIVE_DATA_ROOT / 'solar-panel-defects-v1'
DRIVE_DATASET_ZIP = DRIVE_WORKDIR / 'solar-panel-defects-v1.zip'  # Optional
EXTRACT_FROM_ZIP = False

if EXTRACT_FROM_ZIP:
    if not DRIVE_DATASET_ZIP.exists():
        raise FileNotFoundError(f'Dataset zip not found: {DRIVE_DATASET_ZIP}')

    if DRIVE_DATASET_DIR.exists():
        shutil.rmtree(DRIVE_DATASET_DIR)
    DRIVE_DATASET_DIR.mkdir(parents=True, exist_ok=True)

    with zipfile.ZipFile(DRIVE_DATASET_ZIP, 'r') as zf:
        zf.extractall(DRIVE_DATASET_DIR)

    # Flatten single nested folder if zip contains one top-level directory.
    nested_dirs = [p for p in DRIVE_DATASET_DIR.iterdir() if p.is_dir()]
    if len(nested_dirs) == 1 and (nested_dirs[0] / 'data.yaml').exists():
        nested = nested_dirs[0]
        for item in nested.iterdir():
            shutil.move(str(item), DRIVE_DATASET_DIR / item.name)
        nested.rmdir()

DATA_YAML = DRIVE_DATASET_DIR / 'data.yaml'
if not DATA_YAML.exists():
    raise FileNotFoundError(
        f'data.yaml not found at {DATA_YAML}. '
        'Place/export Roboflow YOLO dataset into Drive first.'
    )

print('Using data.yaml:', DATA_YAML)

In [None]:
import yaml

data_cfg = yaml.safe_load(DATA_YAML.read_text(encoding='utf-8'))
print('data.yaml keys:', sorted(data_cfg.keys()))
print('class count:', len(data_cfg.get('names', [])))
print('class names:', data_cfg.get('names', []))

In [None]:
from ultralytics import YOLO

RUNS_DIR = DRIVE_WORKDIR / 'runs'
RUNS_DIR.mkdir(parents=True, exist_ok=True)

model = YOLO('yolo11s.pt')
results = model.train(
    data=str(DATA_YAML),
    epochs=80,
    imgsz=640,
    batch=16,
    patience=20,
    project=str(RUNS_DIR),
    name='aethr_colab',
    device=0,
)

print('Training finished. Save dir:', results.save_dir)

In [None]:
from pathlib import Path
import shutil
from ultralytics import YOLO

save_dir = Path(results.save_dir)
best_pt = save_dir / 'weights' / 'best.pt'
if not best_pt.exists():
    raise FileNotFoundError(f'Trained best.pt not found: {best_pt}')

DRIVE_MODELS_DIR = DRIVE_WORKDIR / 'models'
DRIVE_MODELS_DIR.mkdir(parents=True, exist_ok=True)

# Save primary artifacts to Drive.
best_pt_drive = DRIVE_MODELS_DIR / 'best.pt'
shutil.copy2(best_pt, best_pt_drive)
print('Copied best.pt to:', best_pt_drive)

# Optional local runtime copy for API/Streamlit tests in this Colab session.
repo_models_dir = PROJECT_ROOT / 'models'
repo_models_dir.mkdir(parents=True, exist_ok=True)
shutil.copy2(best_pt, repo_models_dir / 'best.pt')

export_model = YOLO(str(best_pt))
export_path = Path(export_model.export(format='onnx', imgsz=640))
if export_path.exists():
    best_onnx_drive = DRIVE_MODELS_DIR / 'best.onnx'
    shutil.copy2(export_path, best_onnx_drive)
    shutil.copy2(export_path, repo_models_dir / 'best.onnx')
    print('Copied ONNX to:', best_onnx_drive)
else:
    print('ONNX export path not found. Check Ultralytics export logs.')

print('Drive models directory contents:')
for item in sorted(DRIVE_MODELS_DIR.glob('*')):
    print('-', item.name)