## Installations

In [None]:
!pip install ultralytics boto3 --quiet

## Imports

In [None]:
import os
import random
import boto3

## Set up environment and AWS credentials

In [None]:
os.environ['AWS_ACCESS_KEY_ID'] = 'YOUR_AWS_ACCESS_KEY'
os.environ['AWS_SECRET_ACCESS_KEY'] = 'YOUR_AWS_SECRET_KEY'
os.environ['AWS_DEFAULT_REGION'] = 'YOUR_AWS_REGION'  # 'us-east-1'

bucket = '<bucket-name>'
prefix = '<datset_prefix>/'  # 'vehicles/'

## Create folder structure directly

In [None]:
folders = ['images/train', 'images/val', 'labels/train', 'labels/val']
for folder in folders:
    os.makedirs(f'yolo_data/{folder}', exist_ok=True)

## Connect to S3

In [None]:
s3 = boto3.client('s3')
paginator = s3.get_paginator('list_objects_v2')

## Collect image files only (.jpg or .png)

In [None]:
image_files = []
for page in paginator.paginate(Bucket=bucket, Prefix=prefix):
    for obj in page.get('Contents', []):
        key = obj['Key']
        if key.endswith(('.jpg', '.png')):
            image_files.append(key)

## Shuffle and split images

In [None]:
random.shuffle(image_files)
split_index = int(len(image_files) * 0.9)
train_files = image_files[:split_index]
val_files = image_files[split_index:]

def download_pair(key, subset):
    # Derive file names
    filename = os.path.basename(key)
    label_key = key.rsplit('.', 1)[0] + '.txt'
    label_filename = filename.rsplit('.', 1)[0] + '.txt'

    # Download image
    s3.download_file(bucket, key, f'yolo_data/images/{subset}/{filename}')

    # Download label if exists
    try:
        s3.download_file(bucket, label_key, f'yolo_data/labels/{subset}/{label_filename}')
    except s3.exceptions.ClientError:
        print(f"Warning: No label found for {filename}")

## Download all image-label pairs directly into their folders

In [None]:
for key in train_files:
    download_pair(key, 'train')

for key in val_files:
    download_pair(key, 'val')

## Creating dataset.yaml

In [None]:
from ultralytics import YOLO

root_dir = ""
train_images = "images/train"
val_images = "images/val"

dataset_yaml = f"""
path: {root_dir}
train: {train_images}
val: {val_images}

names:
  0: Auto
  1: Bicycle
  2: Two-Wheeler
  3: Bus
  4: Hatchback
  5: LCV
  6: Mini-Bus
  7: MUV
  8: Others
  9: Sedan
  10: SUV
  11: Tempo-Traveller
  12: Truck
  13: Van
"""

with open('yolo_data/dataset.yaml', 'w') as f:
    f.write(dataset_yaml)

model = YOLO('yolov8s.pt')

model.train(
    data='yolo_data/dataset.yaml',
    epochs=50,
    imgsz=1080,
    batch=16,
    name='yolov8s_vehicle_detector_classifier'
)

## Training loop

In [None]:
from ultralytics import YOLO

# Load YOLOv8 large model
model = YOLO('yolov8l.pt')

# Start training
model.train(
    data='yolo_data/dataset.yaml',
    epochs=50,
    imgsz=1080,
    batch=16,
    name='yolov8l_vehicle_detector_classifier'
)

## Example Inference

In [None]:
results = model('yolo_data/images/val/example.jpg')
results[0].show()