In [11]:
import os
import json
import shutil
from pathlib import Path
import yaml
from sklearn.model_selection import train_test_split
import cv2
import numpy as np

In [13]:
class ASLDatasetPreparator:
    def __init__(self, dataset_path, output_path):
        """
        Initialize the dataset preparator
        dataset_path: Path to the Kaggle ASL dataset
        output_path: Path where the YOLO format dataset will be saved
        """
        self.dataset_path = Path(dataset_path)
        self.output_path = Path(output_path)
        
        # Create ASL label map
        self.asl_label_map = self._create_label_map()

    def _create_label_map(self):
        """Create label map from dataset directory structure"""
        # Get all subdirectories in the dataset path - each should be a letter/class
        class_dirs = [d for d in self.dataset_path.iterdir() if d.is_dir()]
        label_map = {}
        
        for idx, class_dir in enumerate(sorted(class_dirs)):
            label_map[class_dir.name] = idx
            
        return label_map

    def create_directory_structure(self):
        """Create YOLO directory structure"""
        dirs = ['images/train', 'images/val', 'images/test',
                'labels/train', 'labels/val', 'labels/test']
        
        for dir_path in dirs:
            (self.output_path / dir_path).mkdir(parents=True, exist_ok=True)

    def get_all_images(self):
        """Get all images from the dataset with their corresponding labels"""
        image_data = []
        
        # Iterate through each class directory
        for class_dir in self.dataset_path.iterdir():
            if class_dir.is_dir():
                class_label = class_dir.name
                class_idx = self.asl_label_map[class_label]
                
                # Get all images for this class
                for img_path in class_dir.glob('*.jpg'):  # Adjust extension if needed
                    image_data.append((img_path, class_idx))
        
        return image_data

    def create_yolo_annotation(self, img_path, label):
        """Create YOLO format annotation for a single image"""
        # Read image to get dimensions
        img = cv2.imread(str(img_path))
        if img is None:
            print(f"Warning: Could not read image {img_path}")
            return None
            
        img_height, img_width = img.shape[:2]
        
        # For ASL, we'll consider the hand takes up most of the image
        # You might want to adjust these values based on your dataset
        x_center = 0.5  # center of image
        y_center = 0.5
        width = 0.8     # 80% of image width
        height = 0.8    # 80% of image height
        
        return f"{label} {x_center} {y_center} {width} {height}"

    def process_dataset(self, train_split=0.8, val_split=0.1):
        """Process the dataset and convert to YOLO format"""
        # Create directory structure
        self.create_directory_structure()
        
        # Get all images with their labels
        image_data = self.get_all_images()
        
        if not image_data:
            raise ValueError("No images found in the dataset directory!")
        
        # Split dataset
        train_data, temp_data = train_test_split(image_data, train_size=train_split, random_state=42)
        val_data, test_data = train_test_split(temp_data, 
                                             train_size=val_split/(1-train_split), 
                                             random_state=42)
        
        # Process each split
        splits = {
            'train': train_data,
            'val': val_data,
            'test': test_data
        }
        
        # Process and save each split
        for split_name, split_data in splits.items():
            print(f"Processing {split_name} split: {len(split_data)} images")
            for img_path, label in split_data:
                # Create destination paths
                dest_img_path = self.output_path / f'images/{split_name}' / img_path.name
                dest_label_path = self.output_path / f'labels/{split_name}' / f'{img_path.stem}.txt'
                
                # Copy image
                shutil.copy2(img_path, dest_img_path)
                
                # Create and save annotation
                annotation = self.create_yolo_annotation(img_path, label)
                if annotation:
                    with open(dest_label_path, 'w') as f:
                        f.write(annotation)

    def create_data_yaml(self):
        """Create data.yaml configuration file"""
        data_yaml = {
            'path': str(self.output_path.absolute()),
            'train': 'images/train',
            'val': 'images/val',
            'test': 'images/test',
            'names': {v: k for k, v in self.asl_label_map.items()},
            'nc': len(self.asl_label_map)
        }
        
        with open(self.output_path / 'data.yaml', 'w') as f:
            yaml.dump(data_yaml, f, sort_keys=False)

In [15]:
def create_training_config():
    """Create training configuration file"""
    config = {
        'path': './asl_dataset',  # Path to data.yaml
        'train': {
            'epochs': 100,
            'batch_size': 16,
            'imgsz': 640,
            'device': '',  # auto-detect
            'workers': 8,
            'optimizer': 'SGD',
            'lr0': 0.01,
            'lrf': 0.01,
            'momentum': 0.937,
            'weight_decay': 0.0005,
            'warmup_epochs': 3.0,
            'warmup_momentum': 0.8,
            'warmup_bias_lr': 0.1,
            'box': 0.05,
            'cls': 0.5,
            'hsv_h': 0.015,
            'hsv_s': 0.7,
            'hsv_v': 0.4,
            'degrees': 0.0,
            'translate': 0.1,
            'scale': 0.5,
            'shear': 0.0,
            'perspective': 0.0,
            'flipud': 0.0,
            'fliplr': 0.5,
            'mosaic': 1.0,
            'mixup': 0.0,
            'copy_paste': 0.0
        }
    }
    
    with open('training_config.yaml', 'w') as f:
        yaml.dump(config, f, sort_keys=False)

In [17]:
# Initialize dataset preparator
dataset_path = '../dataset/asl_alphabet_train/asl_alphabet_train/'  # Update this path
output_path = '../dataset/asl_alphabet_train/output/'  # Update this path

print(f"Looking for dataset in: {dataset_path}")
print(f"Output will be saved to: {output_path}")

preparator = ASLDatasetPreparator(dataset_path, output_path)

Looking for dataset in: ../dataset/asl_alphabet_train/asl_alphabet_train/
Output will be saved to: ../dataset/asl_alphabet_train/output/


In [19]:
# Process dataset
print("Converting dataset to YOLO format...")
preparator.process_dataset()

Converting dataset to YOLO format...
Processing train split: 69600 images
Processing val split: 8700 images
Processing test split: 8700 images


In [21]:
# Create data.yaml
print("Creating data.yaml...")
preparator.create_data_yaml()

Creating data.yaml...


In [23]:
# Create training configuration
print("Creating training configuration...")
create_training_config()

Creating training configuration...


In [None]:
from ultralytics import YOLO

# Load a model
model = YOLO('YOLOv10n_gestures.pt')  # load a pretrained model (recommended for training)

# Train the model
results = model.train(
    data='../dataset/asl_alphabet_train/output/data.yaml',
    epochs=100,
    imgsz=640,
    batch=16,
    name='asl_model'
)

New https://pypi.org/project/ultralytics/8.3.22 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.18  Python-3.11.7 torch-2.4.1+cu118 CUDA:0 (NVIDIA GeForce GTX 1650, 4096MiB)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=YOLOv10n_gestures.pt, data=../dataset/asl_alphabet_train/output/data.yaml, epochs=100, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=asl_model, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=Non

100%|███████████████████████████████████████████████████████████████████████████████| 755k/755k [00:00<00:00, 1.99MB/s]

Overriding model.yaml nc=34 with nc=29

                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 





  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      7360  ultralytics.nn.modules.block.C2f             [32, 32, 1, True]             
  3                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  4                  -1  2     49664  ultralytics.nn.modules.block.C2f             [64, 64, 2, True]             
  5                  -1  1      9856  ultralytics.nn.modules.block.SCDown          [64, 128, 3, 2]               
  6                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  7                  -1  1     36096  ultralytics.nn.modules.block.SCDown          [128, 256, 3, 2]              
  8                  -1  1    460288  ultralytics.nn.modules.block.C2f             [256, 256, 1, True]           
  9                  -1  1    164608  ultralytics.nn.modules.block.SPPF            [256,

100%|█████████████████████████████████████████████████████████████████████████████| 5.35M/5.35M [00:01<00:00, 5.07MB/s]


[34m[1mAMP: [0mchecks passed 


[34m[1mtrain: [0mScanning C:\Users\abhis\Documents\981B\dataset\asl_alphabet_train\output\labels\train... 69600 images, 0 backgro[0m


[34m[1mtrain: [0mNew cache created: C:\Users\abhis\Documents\981B\dataset\asl_alphabet_train\output\labels\train.cache


[34m[1mval: [0mScanning C:\Users\abhis\Documents\981B\dataset\asl_alphabet_train\output\labels\val... 8700 images, 0 backgrounds,[0m


[34m[1mval: [0mNew cache created: C:\Users\abhis\Documents\981B\dataset\asl_alphabet_train\output\labels\val.cache
Plotting labels to runs\detect\asl_model\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m SGD(lr=0.01, momentum=0.9) with parameter groups 95 weight(decay=0.0), 108 weight(decay=0.0005), 107 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added 
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns\detect\asl_model[0m
Starting training for 100 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      1/100      3.11G        nan        nan        nan         48        640:  25%|██▌       | 1094/4350 [24:10<1:28:0