### Step 1: Install dependencies
##### Install opendatasets and ultralytics to enable dataset downloads and YOLO training.

In [None]:
!pip install opendatasets -q
!pip install ultralytics -q

[?25l   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m0.0/1.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m1.1/1.1 MB[0m [31m49.0 MB/s[0m eta [36m0:00:00[0m
[?25h

### Step 2: Verify installation
##### Verify Python, Torch, CUDA, GPU, RAM, and disk setup before starting training.

In [None]:
import ultralytics
ultralytics.checks()

Ultralytics 8.3.233 üöÄ Python-3.12.12 torch-2.9.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
Setup complete ‚úÖ (2 CPUs, 12.7 GB RAM, 38.1/112.6 GB disk)


### Step 3: Upload Kaggle API key
##### Upload kaggle.json to authenticate and access Kaggle datasets.

In [None]:
!pip install kaggle -q

In [None]:
from google.colab import files
files.upload()  # Select kaggle.json

In [None]:
!mkdir -p ~/.kaggle

In [None]:
!mv kaggle.json ~/.kaggle/

In [None]:
!chmod 600 ~/.kaggle/kaggle.json

### Step 4: Download dataset
##### Use opendatasets.download() to fetch the BDD100K YOLO dataset locally.

In [None]:
import opendatasets as od

# Example: BDD100K YOLO dataset
od.download("https://www.kaggle.com/datasets/a7madmostafa/bdd100k-yolo")

Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username: omersalem
Your Kaggle Key: ¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑
Dataset URL: https://www.kaggle.com/datasets/a7madmostafa/bdd100k-yolo
Downloading bdd100k-yolo.zip to ./bdd100k-yolo


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 5.33G/5.33G [01:21<00:00, 70.1MB/s]





### Step 5: Validate dataset structure
##### Creates `check_data(path, split)` to count images and labels in each split, ensuring dataset consistency.

In [None]:
from glob import glob

def check_data(path, split):
    num_images = len(glob(f"{path}/{split}/images/*"))
    num_labels = len(glob(f"{path}/{split}/labels/*"))
    return num_images, num_labels

### Step 6: Verify dataset splits
##### Iterates through **train**, **val**, and **test** folders, counting images and labels to confirm dataset integrity.

In [None]:
path = "/content/bdd100k-yolo"

for split in ['train', 'val', 'test']:
    images, labels = check_data(path, split)
    print(f"{split.capitalize()} ‚Üí Images: {images}, Labels: {labels}")

Train ‚Üí Images: 70000, Labels: 70000
Val ‚Üí Images: 10000, Labels: 10000
Test ‚Üí Images: 20000, Labels: 20000


### Step 7: Update dataset YAML paths  
##### Reads the existing `data.yaml` and replaces old Kaggle paths with Colab paths, ensuring YOLO points to the correct dataset location.  


In [None]:
yaml_path = "/content/bdd100k-yolo/data.yaml"

with open(yaml_path, "r") as f:
    data = f.read().replace("/kaggle/input/bdd100k-yolo", "/content/bdd100k-yolo")

with open(yaml_path, "w") as f:
    f.write(data)

print("‚úÖ YAML updated successfully!")

‚úÖ YAML updated successfully!


### Step 8: Resume training from last checkpoint  
##### Loads the previous YOLO model (`last.pt`) and starts a new training run with updated parameters.  


In [None]:
from ultralytics import YOLO

data_path = "/content/bdd100k-yolo/data.yaml"

# Load the last checkpoint as a pretrained model
model = YOLO("/content/last.pt")

# Start a new training run
model.train(
    data=data_path,
    epochs=9,
    imgsz=640,
    batch=16,
    device=0,
    project="/content/runs",
    name="bdd100k_full_train_v7",
    freeze=0,
    workers=4,
)

Ultralytics 8.3.233 üöÄ Python-3.12.12 torch-2.9.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/bdd100k-yolo/data.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=9, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=0, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=/content/last.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=bdd100k_full_train_v7, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, p

ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7827d6951dc0>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.0

### Step 9: Archive and download results  
##### Compresses the training output folder into a `.zip` file and downloads it for local storage.  


In [None]:
from google.colab import files
import shutil

# Zip the folder
shutil.make_archive("/content/bdd100k_full_train_v7", 'zip', "/content/runs/bdd100k_full_train_v7")

# Download the zip
files.download("/content/bdd100k_full_train_v7.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>