In [None]:
pip install opencv-python matplotlib torch torchvision pycocotools


In [None]:
pip install ultralytics


In [8]:
import torch
print(torch.cuda.is_available())


False


#### DATASET FOLDER STRUCTURE CREATION

In [1]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Configuration
original_folders = [r"D:\Shivani\sit\MV\MissingTSMini\task1train540p", r"D:\Shivani\sit\MV\MissingTSMini\task2train540p"]  # Your current folders
dataset_path = 'dataset'                   # New structured dataset
test_size = 0.15                           # 15% for test
val_size = 0.15                            # 15% for validation

# Create new directory structure
os.makedirs(os.path.join(dataset_path, 'train', 'images'), exist_ok=True)
os.makedirs(os.path.join(dataset_path, 'train', 'labels'), exist_ok=True)
os.makedirs(os.path.join(dataset_path, 'val', 'images'), exist_ok=True)
os.makedirs(os.path.join(dataset_path, 'val', 'labels'), exist_ok=True)
os.makedirs(os.path.join(dataset_path, 'test', 'images'), exist_ok=True)
os.makedirs(os.path.join(dataset_path, 'test', 'labels'), exist_ok=True)

# Collect all files
all_files = []
for folder in original_folders:
    for file in os.listdir(folder):
        if file.endswith('.jpg') or file.endswith('.png'):
            base_name = os.path.splitext(file)[0]
            all_files.append((
                os.path.join(folder, file),                 # Image path
                os.path.join(folder, f"{base_name}.txt")     # Label path
            ))

# Split into train/val/test
train_files, test_files = train_test_split(all_files, test_size=test_size, random_state=42)
train_files, val_files = train_test_split(train_files, test_size=val_size/(1-test_size), random_state=42)

# Function to copy files
def copy_files(file_list, split_name):
    for img_path, label_path in file_list:
        # Copy image
        shutil.copy(
            img_path,
            os.path.join(dataset_path, split_name, 'images', os.path.basename(img_path))
        )
        # Copy label
        if os.path.exists(label_path):
            shutil.copy(
                label_path,
                os.path.join(dataset_path, split_name, 'labels', os.path.basename(label_path))
            )

# Execute copying
copy_files(train_files, 'train')
copy_files(val_files, 'val')
copy_files(test_files, 'test')

print("Dataset reorganization complete!")
print(f"Train: {len(train_files)} samples")
print(f"Val: {len(val_files)} samples")
print(f"Test: {len(test_files)} samples")

Dataset reorganization complete!
Train: 1400 samples
Val: 300 samples
Test: 300 samples


#### READING ALL THE CLASSES

In [4]:
import os
from collections import defaultdict

def get_unique_classes_with_names(dataset_root):
    """
    Extract unique classes with their textual names and counts.
    
    Args:
        dataset_root (str): Path to dataset containing label files
        
    Returns:
        dict: {class_id: {'name': str, 'count': int}}
        list: Malformed lines for debugging
    """
    class_info = defaultdict(lambda: {'name': None, 'count': 0})
    malformed_lines = []
    
    for root, _, files in os.walk(dataset_root):
        for file in files:
            if file.endswith('.txt'):
                with open(os.path.join(root, file), 'r') as f:
                    for line_num, line in enumerate(f, 1):
                        line = line.strip()
                        if not line:
                            continue
                            
                        # Parse both formats:
                        # 1. "class_id x y w h" (YOLO)
                        # 2. "class_name: x,y,w,h" (IDD)
                        if ':' in line:
                            try:
                                class_part, coords = line.split(':')
                                class_name = class_part.strip()
                                # Create numeric ID from name hash
                                class_id = abs(hash(class_name)) % (10**8)
                                class_info[class_id]['name'] = class_name
                                class_info[class_id]['count'] += 1
                            except ValueError:
                                malformed_lines.append(f"{file}:{line_num} -> {line}")
                        else:
                            try:
                                parts = line.split()
                                class_id = int(parts[0])
                                class_info[class_id]['count'] += 1
                            except (ValueError, IndexError):
                                malformed_lines.append(f"{file}:{line_num} -> {line}")
    
    # Generate report
    print(f"Found {len(class_info)} unique classes:")
    for class_id, info in sorted(class_info.items()):
        name = info['name'] or f'unnamed_class_{class_id}'
        print(f"ID: {class_id:3d} | {name:20s} | Samples: {info['count']}")
    
    if malformed_lines:
        print("\nWarning: Malformed lines detected:")
        for ml in malformed_lines[:5]:  # Print first 5 malformed lines
            print(ml)
        if len(malformed_lines) > 5:
            print(f"... and {len(malformed_lines)-5} more")
    
    return dict(class_info), malformed_lines

# Usage
class_dict, errors = get_unique_classes_with_names(r'C:\Users\DELL\Documents\GitHub\AutonomousDrivingGanMV\dataset')

Found 4 unique classes:
ID: 3123981 | left-hand-curve      | Samples: 540
ID: 4899163 | side-road-left       | Samples: 380
ID: 66837424 | right-hand-curve     | Samples: 540
ID: 88135902 | gap-in-median        | Samples: 540


#### Dataset.yaml creation

In [5]:
import os
import yaml
from collections import defaultdict

def generate_yaml_config(dataset_root):
    class_counts = defaultdict(int)
    
    # Scan all label files
    for split in ['train', 'val', 'test']:
        label_dir = os.path.join(dataset_root, split, 'labels')
        if os.path.exists(label_dir):
            for label_file in os.listdir(label_dir):
                if label_file.endswith('.txt'):
                    with open(os.path.join(label_dir, label_file), 'r') as f:
                        for line in f:
                            line = line.strip()
                            if ':' in line:
                                class_name = line.split(':')[0].strip()
                                class_counts[class_name] += 1
    
    # Create class mapping
    names = {i: name for i, name in enumerate(sorted(class_counts.keys()))}
    
    # Generate YAML
    config = {
        'path': os.path.abspath(dataset_root),
        'train': 'train/images',
        'val': 'val/images',
        'test': 'test/images',
        'names': names,
        'nc': len(names),
        'stats': {
            'class_distribution': dict(sorted(class_counts.items()))
        }
    }
    
    with open('dataset.yaml', 'w') as f:
        yaml.dump(config, f, sort_keys=False)
    
    print("Generated dataset.yaml successfully!")

generate_yaml_config('./dataset')

Generated dataset.yaml successfully!


#### Model

In [7]:
from ultralytics import YOLO

model = YOLO('yolov8m.pt')  # or yolov8s.pt, yolov8m.pt
model.train(data=r'C:\Users\DELL\Documents\GitHub\AutonomousDrivingGanMV\dataset.yaml', epochs=10 , device=0 , verbose=True , imgsz=640,project=r"train\runs",name="exp1",batch=16  )



Creating new Ultralytics Settings v0.0.6 file  
View Ultralytics Settings with 'yolo settings' or at 'C:\Users\DELL\AppData\Roaming\Ultralytics\settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8m.pt to 'yolov8m.pt'...


100%|██████████| 49.7M/49.7M [00:12<00:00, 4.31MB/s]


Ultralytics 8.3.109  Python-3.11.7 torch-2.6.0+cpu 


ValueError: Invalid CUDA 'device=0' requested. Use 'device=cpu' or pass valid CUDA device(s) if available, i.e. 'device=0' or 'device=0,1,2,3' for Multi-GPU.

torch.cuda.is_available(): False
torch.cuda.device_count(): 0
os.environ['CUDA_VISIBLE_DEVICES']: None
See https://pytorch.org/get-started/locally/ for up-to-date torch install instructions if no CUDA devices are seen by torch.
