<a href="https://colab.research.google.com/github/d1vv1/ppe-detection-pipeline/blob/main/ppe_train_in_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


# **Access Secrets**
Load secrets into the environmet

In [None]:
from google.colab import userdata

# Load all secrets as environment variables
WANDB_API_KEY = userdata.get('WANDB_API_KEY')
B2_ACCESS_KEY_ID = userdata.get('B2_ACCESS_KEY_ID')
B2_SECRET_ACCESS_KEY = userdata.get('B2_SECRET_ACCESS_KEY')
GITHUB_USER = userdata.get('GITHUB_USER')
GITHUB_EMAIL = userdata.get('GITHUB_EMAIL')
GITHUB_TOKEN = userdata.get('GITHUB_TOKEN')

# Configure W&B
import os
os.environ['WANDB_API_KEY'] = WANDB_API_KEY

## Clone repo

In [None]:
# Cell 2: Clone Your Repo
# This URL format is how you use the token for auth
REPO_URL = f"https://{GITHUB_TOKEN}@github.com/{GITHUB_USER}/ppe-detection-pipeline.git"

!git clone {REPO_URL}
%cd ppe-detection-pipeline

## Install dependencies

In [None]:
!pip install "dvc[s3]"
!pip install -r requirements.txt
!pip install -r api/requirements.txt # Installs ultralytics, etc.

## Configure DVC Remote

In [None]:
!dvc remote modify --local myremote access_key_id {B2_ACCESS_KEY_ID}
!dvc remote modify --local myremote secret_access_key {B2_SECRET_ACCESS_KEY}

## Mount G-drive and unzip

In [None]:
from google.colab import drive
drive.mount('/content/drive')

print("Google Drive mounted. Unzipping ppe-dataset.zip...")

!unzip /content/drive/MyDrive/ppe-dataset.zip -d data/

print("Dataset unzipped successfully.")

## Run the training

**Options:**
- Set `resume=True` to continue from last checkpoint if training was interrupted
- Set `google_drive_backup` to backup checkpoints to Google Drive (recommended for Colab)
- Checkpoints are saved every epoch by default for Colab safety

In [None]:
# Option 1: Run with default settings (checkpoints saved every epoch)
# !python scripts/train.py

# Option 2: Run with Google Drive backup (RECOMMENDED for Colab)
# This will backup checkpoints to Google Drive so you can resume even if Colab disconnects
from scripts.train import train_model

train_model(
    model_size='m',
    epochs=25,
    imgsz=640,
    batch=8,
    save_period=1,  # Save checkpoint every epoch (default, good for Colab)
    val=True,  # Enable validation (uses valid/images from data.yaml)
    resume=False,  # Set to True to resume from last checkpoint
    google_drive_backup='/content/drive/MyDrive/ppe-models',  # Backup to Google Drive
)

# If training gets interrupted, resume with:
# train_model(resume=True, google_drive_backup='/content/drive/MyDrive/ppe-models')

## Save and puch results

In [None]:
# Configure Git with your identity
!git config --global user.name {GITHUB_USER}
!git config --global user.email {GITHUB_EMAIL}

# Version the new model file
!dvc add models/best.pt

# Add the new .dvc file to Git
!git add models/best.pt.dvc .gitignore

# Commit the change
!git commit -m "Auto-train: Colab training run"

# Push the new commit to GitHub
!git push

# Push the new model file to Backblaze
!dvc push