In [None]:
# https://www.kaggle.com/datasets/deeppythonist/american-sign-language-dataset

# https://www.kaggle.com/datasets/vaishnaviasonawane/indian-sign-language-dataset

In [None]:
!mkdir dataset
!pwd

# American Sign Language

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("deeppythonist/american-sign-language-dataset")

print("Path to dataset files:", path)

In [None]:
# ASL Letters: A e , M, N, S t as rock
# ASL Letters:   5 as paper
# ASL Letters: V, 2 k u h as scissors
# invalid 0 1 3 4 6 7 8 9 b c d f g i j l o p q r w x y z

In [None]:
import os
from IPython.display import Image, display

base_path = '/kaggle/input/american-sign-language-dataset/ASL_Gestures_36_Classes/test'

# List of subfolders (0-9, a-z)
subfolders = [str(i) for i in range(10)] + [chr(ord('a') + i) for i in range(26)]

for folder_name in subfolders:
    folder_path = os.path.join(base_path, folder_name)
    if os.path.exists(folder_path) and os.path.isdir(folder_path):
        # List files in the subfolder
        files = os.listdir(folder_path)
        # Filter for image files (you might want to add more extensions)
        image_files = [f for f in files if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))]

        if image_files:
            # Take the first image found
            sample_image_path = os.path.join(folder_path, image_files[0])
            print(f"Displaying image from subfolder: {folder_name}")
            display(Image(filename=sample_image_path))
        else:
            print(f"No image files found in subfolder: {folder_name}")
    else:
        print(f"Subfolder not found: {folder_name}")


In [None]:
import os
import shutil

base_path = '/kaggle/input/american-sign-language-dataset/ASL_Gestures_36_Classes'

# List of subfolders (0-9, a-z)
subfolders = [str(i) for i in range(10)] + [chr(ord('a') + i) for i in range(26)]

# Segregation logic
rock_letters = ['a', 'e', 'm', 'n', 's', 't']
paper_letters = ['5']
scissors_letters = ['v', '2', 'k', 'u', 'h']
invalid_letters = ['0', '1', '3', '4', '6', '7', '8', '9',
                   'b', 'c', 'd', 'f', 'g', 'i', 'j',
                   'l', 'o', 'p', 'q', 'r', 'w', 'x', 'y', 'z']

output_base_path = '/content/dataset/'
rock_output_path = os.path.join(output_base_path, 'rock')
paper_output_path = os.path.join(output_base_path, 'paper')
scissors_output_path = os.path.join(output_base_path, 'scissors')
invalid_output_path = os.path.join(output_base_path, 'invalid')

# Create output directories if they don't exist
os.makedirs(rock_output_path, exist_ok=True)
os.makedirs(paper_output_path, exist_ok=True)
os.makedirs(scissors_output_path, exist_ok=True)
os.makedirs(invalid_output_path, exist_ok=True)

# Loop through 'train' and 'test' folders
for split in ['train', 'test']:
    split_path = os.path.join(base_path, split)

    if not os.path.exists(split_path):
        print(f"Split folder not found: {split_path}")
        continue

    for folder_name in subfolders:
        folder_path = os.path.join(split_path, folder_name)
        if os.path.exists(folder_path) and os.path.isdir(folder_path):
            files = os.listdir(folder_path)
            image_files = [f for f in files if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

            target_folder = None
            # Determine the target folder based on the letter/number
            if folder_name in rock_letters:
                target_folder = rock_output_path
            elif folder_name in paper_letters:
                target_folder = paper_output_path
            elif folder_name in scissors_letters:
                target_folder = scissors_output_path
            elif folder_name in invalid_letters:
                target_folder = invalid_output_path

            if target_folder:
                for image_file in image_files:
                    source_path = os.path.join(folder_path, image_file)
                    # Prefix with train/ or test/ to avoid filename collisions
                    destination_filename = f"{split}_{folder_name}_{image_file}"
                    destination_path = os.path.join(target_folder, destination_filename)
                    shutil.copy(source_path, destination_path)
                print(f"Copied images from '{split}/{folder_name}' to '{target_folder}'")
            else:
                print(f"Folder '{folder_name}' does not match any known category. Skipping.")
        else:
            print(f"Source subfolder not found: {folder_path}")

print("Segregation complete.")


ASL dataset segregation completed

# Indian Sign Language

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("vaishnaviasonawane/indian-sign-language-dataset")

print("Path to dataset files:", path)

In [None]:
import os
from IPython.display import Image, display

base_path = path + '/data'
# List of subfolders (0-9, a-z)
subfolders = [str(i) for i in range(1,10)] + [chr(ord('A') + i) for i in range(26)]

for folder_name in subfolders:
    folder_path = os.path.join(base_path, folder_name)
    if os.path.exists(folder_path) and os.path.isdir(folder_path):
        # List files in the subfolder
        files = os.listdir(folder_path)
        # Filter for image files (you might want to add more extensions)
        image_files = [f for f in files if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))]

        if image_files:
            # Take the first image found
            sample_image_path = os.path.join(folder_path, image_files[0])
            print(f"Displaying image from subfolder: {folder_name}")
            display(Image(filename=sample_image_path))
        else:
            print(f"No image files found in subfolder: {folder_name}")
    else:
        print(f"Subfolder not found: {folder_name}")


In [None]:
from PIL import Image

# Load the image
image_path = '/kaggle/input/indian-sign-language-dataset/data/1/0.jpg'
with Image.open(image_path) as img:
    print(f"Image size: {img.size}")  # Output will be (width, height)


In [None]:
# rock - G
# paper - 5
# scissor V 2
# invalid rest all

# 1-9   A-Z

In [None]:
import os
import shutil

# Segregation logic for ISL
isl_rock_letters = ['G']
isl_paper_letters = ['5']
isl_scissors_letters = ['V', '2']
# Invalid ISL: 1-9 except 5,2,  A-Z except G, V
isl_invalid_letters = [str(i) for i in range(1, 10) if str(i) not in isl_paper_letters + isl_scissors_letters] + \
                      [chr(ord('A') + i) for i in range(26) if chr(ord('A') + i) not in isl_rock_letters + isl_scissors_letters]

# Ensure the output paths are created
isl_output_base_path = '/content/dataset/'
isl_rock_output_path = os.path.join(isl_output_base_path, 'rock')
isl_paper_output_path = os.path.join(isl_output_base_path, 'paper')
isl_scissors_output_path = os.path.join(isl_output_base_path, 'scissors')
isl_invalid_output_path = os.path.join(isl_output_base_path, 'invalid')

os.makedirs(isl_rock_output_path, exist_ok=True)
os.makedirs(isl_paper_output_path, exist_ok=True)
os.makedirs(isl_scissors_output_path, exist_ok=True)
os.makedirs(isl_invalid_output_path, exist_ok=True)

# Base path for the downloaded ISL dataset
isl_base_path = path + '/data'

# List of subfolders in the ISL dataset (1-9, A-Z)
isl_subfolders = [str(i) for i in range(1, 10)] + [chr(ord('A') + i) for i in range(26)]

# Loop through the subfolders of the ISL dataset
if os.path.exists(isl_base_path) and os.path.isdir(isl_base_path):
    for folder_name in isl_subfolders:
        folder_path = os.path.join(isl_base_path, folder_name)

        if os.path.exists(folder_path) and os.path.isdir(folder_path):
            files = os.listdir(folder_path)
            image_files = [f for f in files if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
            image_files = image_files[:100]  # Copy only the first 100 images

            target_folder = None
            # Determine the target folder based on the letter/number
            if folder_name in isl_rock_letters:
                target_folder = isl_rock_output_path
            elif folder_name in isl_paper_letters:
                target_folder = isl_paper_output_path
            elif folder_name in isl_scissors_letters:
                target_folder = isl_scissors_output_path
            elif folder_name in isl_invalid_letters:
                target_folder = isl_invalid_output_path
            else:
                print(f"Folder '{folder_name}' from ISL dataset does not match any known category. Skipping.")

            if target_folder:
                for image_file in image_files:
                    source_path = os.path.join(folder_path, image_file)
                    # Prefix with ISL_ to distinguish from ASL and avoid collisions
                    destination_filename = f"ISL_{folder_name}_{image_file}"
                    destination_path = os.path.join(target_folder, destination_filename)
                    shutil.copy(source_path, destination_path)
                print(f"Copied up to 100 images from ISL '{folder_name}' to '{target_folder}'")

        else:
            print(f"ISL source subfolder not found: {folder_path}")
else:
    print(f"ISL base path not found: {isl_base_path}")

print("ISL Segregation complete.")
print(f"Total rock images: {len(os.listdir(isl_rock_output_path))}")
print(f"Total paper images: {len(os.listdir(isl_paper_output_path))}")
print(f"Total scissors images: {len(os.listdir(isl_scissors_output_path))}")
print(f"Total invalid images: {len(os.listdir(isl_invalid_output_path))}")


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
import random
from PIL import Image, ImageFilter
import shutil

# Define the base path for the RPSense_Dataset
rpsense_base_path = '/content/drive/MyDrive/RPSense_Dataset/dataset/train'

# Define the output directory for invalid images
output_invalid_path = '/content/dataset/invalid'
os.makedirs(output_invalid_path, exist_ok=True)

# List of folders corresponding to rock, paper, scissors
rps_folders = ['rock', 'paper', 'scissors']

num_images_to_select = 250

print("Processing RPSense_Dataset for invalid class generation...")

for folder_name in rps_folders:
    folder_path = os.path.join(rpsense_base_path, folder_name)

    if os.path.exists(folder_path) and os.path.isdir(folder_path):
        files = os.listdir(folder_path)
        image_files = [f for f in files if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

        if not image_files:
            print(f"No image files found in {folder_path}. Skipping.")
            continue

        # Select random images
        selected_images = random.sample(image_files, min(num_images_to_select, len(image_files)))

        print(f"Selected {len(selected_images)} images from '{folder_name}' for blurring.")

        for image_file in selected_images:
            source_path = os.path.join(folder_path, image_file)
            try:
                with Image.open(source_path) as img:
                    # Convert to RGB if necessary (some operations might fail on paletted images)
                    if img.mode != 'RGB':
                        img = img.convert('RGB')

                    # Apply a blur filter
                    blurred_img = img.filter(ImageFilter.GaussianBlur(radius=5)) # Adjust radius as needed

                    # Define the destination path for the blurred image
                    # Prefix to avoid potential filename conflicts
                    destination_filename = f"RPSense_{folder_name}_{image_file}"
                    destination_path = os.path.join(output_invalid_path, destination_filename)

                    # Save the blurred image
                    blurred_img.save(destination_path)
                    # print(f"Blurred and saved {image_file} to {destination_path}")

            except Exception as e:
                print(f"Error processing image {source_path}: {e}")

    else:
        print(f"Source folder not found: {folder_path}")

print("RPSense_Dataset invalid class generation complete.")
print(f"Total invalid images in {output_invalid_path}: {len(os.listdir(output_invalid_path))}")



In [None]:
import os
import shutil
import random

# Source base folder
source_base_path = '/content/dataset'

# Destination base folder on Google Drive
dest_base_path = '/content/drive/MyDrive/RPSense_Dataset/dataset'

# Subfolders and target split sizes
subfolders = ['rock', 'paper', 'scissors', 'invalid']
split_counts = {'train': 5100, 'test': 300}  # validation = rest

# Ensure subfolders exist in destination folders, create 'invalid' folders if missing
for split in ['train', 'test', 'validation']:
    for subfolder in subfolders:
        dest_subfolder_path = os.path.join(dest_base_path, split, subfolder)
        os.makedirs(dest_subfolder_path, exist_ok=True)  # won't overwrite if it exists

# Process each subfolder
for subfolder in subfolders:
    source_path = os.path.join(source_base_path, subfolder)

    if not os.path.exists(source_path):
        print(f"Source subfolder not found: {source_path}")
        continue

    image_files = [f for f in os.listdir(source_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    random.shuffle(image_files)  # Shuffle for random split

    train_files = image_files[:split_counts['train']]
    test_files = image_files[split_counts['train']:split_counts['train'] + split_counts['test']]
    val_files = image_files[split_counts['train'] + split_counts['test']:]

    split_map = {
        'train': train_files,
        'test': test_files,
        'validation': val_files
    }

    for split_name, files in split_map.items():
        for file_name in files:
            src_file = os.path.join(source_path, file_name)
            dest_file = os.path.join(dest_base_path, split_name, subfolder, file_name)

            try:
                shutil.copy(src_file, dest_file)
            except Exception as e:
                print(f"Error copying {file_name} to {split_name}/{subfolder}: {e}")

    print(f"Copied {len(train_files)} to train, {len(test_files)} to test, {len(val_files)} to validation for '{subfolder}'.")

print("Dataset split and copy complete.")


In [None]:
base_path = '/content/drive/MyDrive/RPSense_Dataset/dataset/'
print("\nChecking file extensions in  Gdrive:")
for root, dirs, files in os.walk(base_path):
    if files:
        print(f"In directory: {root}")
        extensions = set()
        for file in files:
            _, ext = os.path.splitext(file)
            extensions.add(ext.lower())
        print(f"  File extensions found: {list(extensions)}")

In [None]:
base_path = '/content/drive/MyDrive/RPSense_Dataset/dataset/'
subfolders = ['train', 'test', 'validation']
classes = ['paper', 'rock', 'scissors', 'invalid']

for subfolder in subfolders:
    folder_path = os.path.join(base_path, subfolder)
    print(f"Files in {folder_path}:")
    if os.path.exists(folder_path):
        for class_name in classes:
            class_path = os.path.join(folder_path, class_name)
            if os.path.exists(class_path):
                num_files = len(os.listdir(class_path))
                print(f"  {class_name}: {num_files} files")
            else:
                print(f"  {class_name}: Folder not found")
    else:
        print("  Folder not found")



# 24,304 images

In [None]:
def get_dir_size_in_gb(directory_path):
    """Calculates the total size of a directory in gigabytes."""
    total_size = 0
    if os.path.exists(directory_path):
        for dirpath, dirnames, filenames in os.walk(directory_path):
            for f in filenames:
                fp = os.path.join(dirpath, f)
                # skip if it is symbolic link
                if not os.path.islink(fp):
                    total_size += os.path.getsize(fp)
        return total_size / (1024 * 1024 * 1024)  # Convert bytes to GB
    else:
        return 0

dataset_size_gb = get_dir_size_in_gb(base_path)
print(f"Size of the dataset at {base_path}: {dataset_size_gb:.2f} GB")

Move to validation from train for better split

In [None]:
import os
import shutil
import random

# Define the source and destination paths
source_train_folder = '/content/drive/MyDrive/RPSense_Dataset/dataset/train'
destination_val_folder = '/content/drive/MyDrive/RPSense_Dataset/dataset/validation'

# Number of images to move per class
num_images_to_move = 500

# List of classes (subfolders)
classes = ['paper', 'rock', 'scissors', 'invalid']

print(f"Moving {num_images_to_move} images from each class in '{source_train_folder}' to '{destination_val_folder}'...")

for class_name in classes:
    source_class_path = os.path.join(source_train_folder, class_name)
    destination_class_path = os.path.join(destination_val_folder, class_name)

    # List all files in the source class folder
    all_files = os.listdir(source_class_path)
    image_files = [f for f in all_files if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

    if not image_files:
        print(f"No image files found in {source_class_path}. Skipping.")
        continue

    # Select random images to move
    # Ensure we don't try to move more images than are available
    images_to_move = random.sample(image_files, min(num_images_to_move, len(image_files)))

    print(f"Selected {len(images_to_move)} images from '{class_name}' for moving to validation.")

    # Move the selected images
    for image_file in images_to_move:
        source_file_path = os.path.join(source_class_path, image_file)
        destination_file_path = os.path.join(destination_class_path, image_file)

        try:
            shutil.move(source_file_path, destination_file_path)
        except Exception as e:
            print(f"Error moving {image_file} from '{class_name}': {e}")

print("\nImage moving complete.")

# Verify the counts after moving
print("\nChecking counts after moving:")
base_path = '/content/drive/MyDrive/RPSense_Dataset/dataset/'
subfolders = ['train', 'test', 'validation']
classes = ['paper', 'rock', 'scissors', 'invalid']

for subfolder in subfolders:
    folder_path = os.path.join(base_path, subfolder)
    print(f"Files in {folder_path}:")
    if os.path.exists(folder_path):
        for class_name in classes:
            class_path = os.path.join(folder_path, class_name)
            if os.path.exists(class_path):
                num_files = len(os.listdir(class_path))
                print(f"  {class_name}: {num_files} files")
            else:
                print(f"  {class_name}: Folder not found")
    else:
        print("  Folder not found")

# Dataset Completed for training