In [1]:
import os
import shutil
from pathlib import Path

def organize_images():
    # Define paths
    base_dir = Path("C://Users//Access//Documents//data//data")
    images_dir = base_dir / 'sign_data'/ 'resized_images'
    train_txt = base_dir / 'train.txt'
    val_txt = base_dir / 'val.txt'
    train_dir = base_dir /'train'
    val_dir = base_dir / 'val'
    
    # Create train and val folders if they don't exist
    train_dir.mkdir(exist_ok=True)
    val_dir.mkdir(exist_ok=True)
    
    # Function to copy images based on txt file
    def copy_images(txt_path, dest_dir):
        if not txt_path.exists():
            print(f"Warning: {txt_path} not found!")
            return 0
            
        copied_count = 0
        with open(txt_path, 'r') as f:
            for line in f:
                # Get the image filename from the path in txt file
                image_name = Path(line.strip()).name
                src_path = images_dir / image_name
                
                if src_path.exists():
                    shutil.copy2(src_path, dest_dir / image_name)
                    copied_count += 1
                else:
                    print(f"Warning: {src_path} not found in images folder")
        
        return copied_count
    
    # Copy training images
    print("Organizing training images...")
    train_count = copy_images(train_txt, train_dir)
    print(f"Copied {train_count} images to {train_dir}")
    
    # Copy validation images
    print("Organizing validation images...")
    val_count = copy_images(val_txt, val_dir)
    print(f"Copied {val_count} images to {val_dir}")
    
    print(f"Organization complete! Total images organized: {train_count + val_count}")

if __name__ == "__main__":
    organize_images()

Organizing training images...
Copied 9955 images to C:\Users\Access\Documents\data\data\train
Organizing validation images...
Copied 4247 images to C:\Users\Access\Documents\data\data\val
Organization complete! Total images organized: 14202


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
df=pd.read_csv(r"C:\Users\Access\PythonImp\project_DEPI\val_label_sorted.csv")
print(df.head())
df1=df[["filename"]]
y=df[["class_name"]]
x_val,x_test,y_val,y_test=train_test_split(df1,y,test_size=0.5,stratify=y)
print(x_val.shape)
print(y_val.shape)
print(x_test.shape)
print(y_test.shape)

           filename  class_id class_name  \
0  3_55_F_ain_0.txt         0        ain   
1  3_55_F_ain_1.txt         0        ain   
2  3_55_F_ain_2.txt         0        ain   
3  3_55_F_ain_3.txt         0        ain   
4  3_55_F_ain_4.txt         0        ain   

                                        file_content  
0  0 0.45072115384615385 0.44711538461538464 0.89...  
1  0 0.3954326923076923 0.4807692307692308 0.6610...  
2  0 0.5108173076923077 0.3713942307692308 0.7475...  
3  0 0.4735576923076923 0.328125 0.50480769230769...  
4  0 0.4891826923076923 0.3125 0.4639423076923077...  
(2124, 1)
(2124, 1)
(2124, 1)
(2124, 1)


In [3]:
print(df.shape)

(4248, 4)


In [4]:
import os
import shutil
# Define paths
source_folder = r"C:\Users\Access\Documents\data\data\val"
val_dest_folder = r"C:\Users\Access\Documents\data\val_final"
test_dest_folder = r"C:\Users\Access\Documents\data\test_final"

# Create destination folders
os.makedirs(val_dest_folder, exist_ok=True)
os.makedirs(test_dest_folder, exist_ok=True)

# Function to convert .txt filename to image filename
def convert_to_image_filename(txt_filename):
    """Convert filename like '3_55_F_ain_1.txt' to '3_55_F_ain_1.jpg'"""
    # Try common image extensions
    for ext in ['.jpg', '.jpeg', '.png', '.bmp', '.JPG', '.JPEG', '.PNG']:
        image_filename = txt_filename.replace('.txt', ext)
        if os.path.exists(os.path.join(source_folder, image_filename)):
            return image_filename
    
    # If no image file found, return None
    return None

# Function to extract sort key from filename
def get_sort_key(filename):
    """
    Extract (first_number, class_name, last_number) for sorting
    Example: "11_16_M_ain_0.jpg" -> (11, "ain", 0)
    """
    # Remove file extension
    name_without_ext = os.path.splitext(filename)[0]
    parts = name_without_ext.split('_')
    
    if len(parts) >= 5:
        try:
            first_number = int(parts[0])   # First number: 11
            class_name = parts[3]          # Class name: "ain"
            last_number = int(parts[4])    # Last number: 0
            return (first_number, class_name, last_number)
        except ValueError:
            return (0, "", 0)
    return (0, "", 0)

# Process validation files
print("\nProcessing validation files...")
val_count = 0
val_filenames_sorted = []

for txt_filename in x_val['filename'].tolist():
    image_filename = convert_to_image_filename(txt_filename)
    if image_filename:
        val_filenames_sorted.append(image_filename)
    else:
        print(f"Warning: No image file found for {txt_filename}")

# Sort validation filenames
val_filenames_sorted.sort(key=get_sort_key)

# Copy validation files
for image_filename in val_filenames_sorted:
    src_path = os.path.join(source_folder, image_filename)
    dst_path = os.path.join(val_dest_folder, image_filename)
    if os.path.exists(src_path):
        shutil.copy2(src_path, dst_path)
        val_count += 1
    else:
        print(f"Warning: File not found: {src_path}")

# Process test files
print("\nProcessing test files...")
test_count = 0
test_filenames_sorted = []

for txt_filename in x_test['filename'].tolist():
    image_filename = convert_to_image_filename(txt_filename)
    if image_filename:
        test_filenames_sorted.append(image_filename)
    else:
        print(f"Warning: No image file found for {txt_filename}")

# Sort test filenames
test_filenames_sorted.sort(key=get_sort_key)

# Copy test files
for image_filename in test_filenames_sorted:
    src_path = os.path.join(source_folder, image_filename)
    dst_path = os.path.join(test_dest_folder, image_filename)
    if os.path.exists(src_path):
        shutil.copy2(src_path, dst_path)
        test_count += 1
    else:
        print(f"Warning: File not found: {src_path}")

print("\n" + "="*50)
print("PROCESSING COMPLETE!")
print("="*50)
print(f"Validation folder: {val_count} images")
print(f"Test folder: {test_count} images")
print(f"Total processed: {val_count + test_count}")

# Show what extensions were actually found
print("\nChecking file extensions in source folder...")
image_extensions = set()
for file in os.listdir(source_folder):
    if os.path.isfile(os.path.join(source_folder, file)):
        ext = os.path.splitext(file)[1].lower()
        image_extensions.add(ext)

print(f"Found these extensions: {image_extensions}")

# Show some successfully copied files
if val_count > 0:
    print("\nFirst 10 files in validation folder:")
    val_files = sorted(os.listdir(val_dest_folder), key=get_sort_key)[:10]
    for i, file in enumerate(val_files):
        print(f"  {i+1}. {file}")

if test_count > 0:
    print("\nFirst 10 files in test folder:")
    test_files = sorted(os.listdir(test_dest_folder), key=get_sort_key)[:10]
    for i, file in enumerate(test_files):
        print(f"  {i+1}. {file}")


Processing validation files...

Processing test files...

PROCESSING COMPLETE!
Validation folder: 2124 images
Test folder: 2123 images
Total processed: 4247

Checking file extensions in source folder...
Found these extensions: {'.jpg'}

First 10 files in validation folder:
  1. 3_55_F_ain_1.jpg
  2. 3_55_F_ain_2.jpg
  3. 3_55_F_ain_4.jpg
  4. 3_55_F_ain_6.jpg
  5. 3_55_F_ain_7.jpg
  6. 3_55_F_ain_8.jpg
  7. 3_55_F_al_2.jpg
  8. 3_55_F_al_3.jpg
  9. 3_55_F_al_4.jpg
  10. 3_55_F_al_5.jpg

First 10 files in test folder:
  1. 3_55_F_ain_0.jpg
  2. 3_55_F_ain_3.jpg
  3. 3_55_F_ain_5.jpg
  4. 3_55_F_al_0.jpg
  5. 3_55_F_al_1.jpg
  6. 3_55_F_aleff_2.jpg
  7. 3_55_F_aleff_3.jpg
  8. 3_55_F_aleff_4.jpg
  9. 3_55_F_aleff_6.jpg
  10. 3_55_F_aleff_7.jpg


In [5]:
# Just to see what the sorted lists look like
print("Sorted validation filenames (first 10):")
for i, filename in enumerate(val_filenames_sorted[:10]):
    sort_key = get_sort_key(filename)
    print(f"  {i+1}. {filename} -> {sort_key}")

print("\nSorted test filenames (first 10):")
for i, filename in enumerate(test_filenames_sorted[:10]):
    sort_key = get_sort_key(filename)
    print(f"  {i+1}. {filename} -> {sort_key}")

Sorted validation filenames (first 10):
  1. 3_55_F_ain_1.jpg -> (3, 'ain', 1)
  2. 3_55_F_ain_2.jpg -> (3, 'ain', 2)
  3. 3_55_F_ain_4.jpg -> (3, 'ain', 4)
  4. 3_55_F_ain_6.jpg -> (3, 'ain', 6)
  5. 3_55_F_ain_7.jpg -> (3, 'ain', 7)
  6. 3_55_F_ain_8.jpg -> (3, 'ain', 8)
  7. 3_55_F_al_2.jpg -> (3, 'al', 2)
  8. 3_55_F_al_3.jpg -> (3, 'al', 3)
  9. 3_55_F_al_4.jpg -> (3, 'al', 4)
  10. 3_55_F_al_5.jpg -> (3, 'al', 5)

Sorted test filenames (first 10):
  1. 3_55_F_ain_0.jpg -> (3, 'ain', 0)
  2. 3_55_F_ain_3.jpg -> (3, 'ain', 3)
  3. 3_55_F_ain_5.jpg -> (3, 'ain', 5)
  4. 3_55_F_al_0.jpg -> (3, 'al', 0)
  5. 3_55_F_al_1.jpg -> (3, 'al', 1)
  6. 3_55_F_aleff_2.jpg -> (3, 'aleff', 2)
  7. 3_55_F_aleff_3.jpg -> (3, 'aleff', 3)
  8. 3_55_F_aleff_4.jpg -> (3, 'aleff', 4)
  9. 3_55_F_aleff_6.jpg -> (3, 'aleff', 6)
  10. 3_55_F_aleff_7.jpg -> (3, 'aleff', 7)


In [None]:
import os
import shutil
from tqdm import tqdm  # for progress bar

# Define paths
source_folder = r"C:\Users\Access\Documents\data\data\train"
class_names = ['ain', 'al', 'aleff', 'bb', 'dal', 'dha', 'dhad', 'fa', 'gaaf', 'ghain', 
               'ha', 'haa', 'jeem', 'kaaf', 'khaa', 'la', 'laam', 'meem', 'nun', 'ra', 
               'saad', 'seen', 'sheen', 'ta', 'taa', 'thaa', 'thal', 'toot', 'waw', 'ya', 'yaa', 'zay']

def categorize_images(source_dir, class_list):
    """
    Categorize images into subfolders based on class names found in filenames
    """
    # Create class subfolders if they don't exist
    for class_name in class_list:
        class_folder = os.path.join(source_dir, class_name)
        os.makedirs(class_folder, exist_ok=True)
    
    # Get all image files
    image_files = [f for f in os.listdir(source_dir) 
                  if os.path.isfile(os.path.join(source_dir, f)) and
                  f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp'))]
    
    print(f"Found {len(image_files)} images to categorize")
    
    # Counters
    categorized_count = 0
    uncategorized_count = 0
    uncategorized_files = []
    
    # Process each image
    for filename in tqdm(image_files, desc="Categorizing images"):
        source_path = os.path.join(source_dir, filename)
        
        # Check which class this image belongs to
        found_class = None
        for class_name in class_list:
            if f"_{class_name}_" in filename.lower() or filename.lower().endswith(f"_{class_name}.jpg"):
                found_class = class_name
                break
        
        if found_class:
            # Move file to the appropriate class folder
            dest_folder = os.path.join(source_dir, found_class)
            dest_path = os.path.join(dest_folder, filename)
            
            # Only move if not already in the correct folder
            if not os.path.exists(dest_path):
                shutil.move(source_path, dest_path)
            categorized_count += 1
        else:
            uncategorized_count += 1
            uncategorized_files.append(filename)
    
    # Print results
    print(f"\n=== CATEGORIZATION RESULTS ===")
    print(f"Successfully categorized: {categorized_count} images")
    print(f"Uncategorized: {uncategorized_count} images")
    
    if uncategorized_files:
        print(f"\nUncategorized files (first 10):")
        for file in uncategorized_files[:10]:
            print(f"  - {file}")
        
        # Save list of uncategorized files
        with open(os.path.join(source_dir, "uncategorized_files.txt"), "w") as f:
            for file in uncategorized_files:
                f.write(f"{file}\n")
        print(f"\nFull list saved to: {os.path.join(source_dir, 'uncategorized_files.txt')}")
    
    # Show count per class
    print(f"\n=== IMAGES PER CLASS ===")
    for class_name in class_list:
        class_folder = os.path.join(source_dir, class_name)
        if os.path.exists(class_folder):
            count = len([f for f in os.listdir(class_folder) 
                        if os.path.isfile(os.path.join(class_folder, f))])
            print(f"{class_name}: {count} images")

# Run the categorization
categorize_images(source_folder, class_names)

# Verify the structure
print(f"\n=== FINAL FOLDER STRUCTURE ===")
for class_name in class_names:
    class_path = os.path.join(source_folder, class_name)
    if os.path.exists(class_path):
        image_count = len([f for f in os.listdir(class_path) 
                         if os.path.isfile(os.path.join(class_path, f))])
        print(f"{class_path} - {image_count} images")

Found 9955 images to categorize


Categorizing images:  99%|███████████████████████████████████████████████████████▎| 9836/9955 [00:08<00:00, 706.86it/s]

In [6]:
import os
import shutil
from tqdm import tqdm  # for progress bar

# Define paths
source_folder = r"C:\Users\Access\Documents\data\val_finalll"
class_names = ['ain', 'al', 'aleff', 'bb', 'dal', 'dha', 'dhad', 'fa', 'gaaf', 'ghain', 
               'ha', 'haa', 'jeem', 'kaaf', 'khaa', 'la', 'laam', 'meem', 'nun', 'ra', 
               'saad', 'seen', 'sheen', 'ta', 'taa', 'thaa', 'thal', 'toot', 'waw', 'ya', 'yaa', 'zay']

def categorize_images(source_dir, class_list):
    """
    Categorize images into subfolders based on class names found in filenames
    """
    # Create class subfolders if they don't exist
    for class_name in class_list:
        class_folder = os.path.join(source_dir, class_name)
        os.makedirs(class_folder, exist_ok=True)
    
    # Get all image files
    image_files = [f for f in os.listdir(source_dir) 
                  if os.path.isfile(os.path.join(source_dir, f)) and
                  f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp'))]
    
    print(f"Found {len(image_files)} images to categorize")
    
    # Counters
    categorized_count = 0
    uncategorized_count = 0
    uncategorized_files = []
    
    # Process each image
    for filename in tqdm(image_files, desc="Categorizing images"):
        source_path = os.path.join(source_dir, filename)
        
        # Check which class this image belongs to
        found_class = None
        for class_name in class_list:
            if f"_{class_name}_" in filename.lower() or filename.lower().endswith(f"_{class_name}.jpg"):
                found_class = class_name
                break
        
        if found_class:
            # Move file to the appropriate class folder
            dest_folder = os.path.join(source_dir, found_class)
            dest_path = os.path.join(dest_folder, filename)
            
            # Only move if not already in the correct folder
            if not os.path.exists(dest_path):
                shutil.move(source_path, dest_path)
            categorized_count += 1
        else:
            uncategorized_count += 1
            uncategorized_files.append(filename)
    
    # Print results
    print(f"\n=== CATEGORIZATION RESULTS ===")
    print(f"Successfully categorized: {categorized_count} images")
    print(f"Uncategorized: {uncategorized_count} images")
    
    if uncategorized_files:
        print(f"\nUncategorized files (first 10):")
        for file in uncategorized_files[:10]:
            print(f"  - {file}")
        
        # Save list of uncategorized files
        with open(os.path.join(source_dir, "uncategorized_files.txt"), "w") as f:
            for file in uncategorized_files:
                f.write(f"{file}\n")
        print(f"\nFull list saved to: {os.path.join(source_dir, 'uncategorized_files.txt')}")
    
    # Show count per class
    print(f"\n=== IMAGES PER CLASS ===")
    for class_name in class_list:
        class_folder = os.path.join(source_dir, class_name)
        if os.path.exists(class_folder):
            count = len([f for f in os.listdir(class_folder) 
                        if os.path.isfile(os.path.join(class_folder, f))])
            print(f"{class_name}: {count} images")

# Run the categorization
categorize_images(source_folder, class_names)

# Verify the structure
print(f"\n=== FINAL FOLDER STRUCTURE ===")
for class_name in class_names:
    class_path = os.path.join(source_folder, class_name)
    if os.path.exists(class_path):
        image_count = len([f for f in os.listdir(class_path) 
                         if os.path.isfile(os.path.join(class_path, f))])
        print(f"{class_path} - {image_count} images")

Found 2123 images to categorize


Categorizing images: 100%|███████████████████████████████████████████████████████| 2123/2123 [00:01<00:00, 1330.64it/s]



=== CATEGORIZATION RESULTS ===
Successfully categorized: 2123 images
Uncategorized: 0 images

=== IMAGES PER CLASS ===
ain: 66 images
al: 67 images
aleff: 67 images
bb: 68 images
dal: 55 images
dha: 68 images
dhad: 65 images
fa: 68 images
gaaf: 67 images
ghain: 67 images
ha: 68 images
haa: 68 images
jeem: 67 images
kaaf: 68 images
khaa: 67 images
la: 67 images
laam: 66 images
meem: 68 images
nun: 61 images
ra: 62 images
saad: 68 images
seen: 68 images
sheen: 67 images
ta: 67 images
taa: 67 images
thaa: 69 images
thal: 68 images
toot: 67 images
waw: 61 images
ya: 67 images
yaa: 67 images
zay: 67 images

=== FINAL FOLDER STRUCTURE ===
C:\Users\Access\Documents\data\test_finalll\ain - 66 images
C:\Users\Access\Documents\data\test_finalll\al - 67 images
C:\Users\Access\Documents\data\test_finalll\aleff - 67 images
C:\Users\Access\Documents\data\test_finalll\bb - 68 images
C:\Users\Access\Documents\data\test_finalll\dal - 55 images
C:\Users\Access\Documents\data\test_finalll\dha - 68 image

In [8]:
import os
import shutil
from tqdm import tqdm  # for progress bar

# Define paths
source_folder = r"C:\Users\Access\Documents\data\test_final"
class_names = ['ain', 'al', 'aleff', 'bb', 'dal', 'dha', 'dhad', 'fa', 'gaaf', 'ghain', 
               'ha', 'haa', 'jeem', 'kaaf', 'khaa', 'la', 'laam', 'meem', 'nun', 'ra', 
               'saad', 'seen', 'sheen', 'ta', 'taa', 'thaa', 'thal', 'toot', 'waw', 'ya', 'yaa', 'zay']

def categorize_images(source_dir, class_list):
    """
    Categorize images into subfolders based on class names found in filenames
    """
    # Create class subfolders if they don't exist
    for class_name in class_list:
        class_folder = os.path.join(source_dir, class_name)
        os.makedirs(class_folder, exist_ok=True)
    
    # Get all image files
    image_files = [f for f in os.listdir(source_dir) 
                  if os.path.isfile(os.path.join(source_dir, f)) and
                  f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp'))]
    
    print(f"Found {len(image_files)} images to categorize")
    
    # Counters
    categorized_count = 0
    uncategorized_count = 0
    uncategorized_files = []
    
    # Process each image
    for filename in tqdm(image_files, desc="Categorizing images"):
        source_path = os.path.join(source_dir, filename)
        
        # Check which class this image belongs to
        found_class = None
        for class_name in class_list:
            if f"_{class_name}_" in filename.lower() or filename.lower().endswith(f"_{class_name}.jpg"):
                found_class = class_name
                break
        
        if found_class:
            # Move file to the appropriate class folder
            dest_folder = os.path.join(source_dir, found_class)
            dest_path = os.path.join(dest_folder, filename)
            
            # Only move if not already in the correct folder
            if not os.path.exists(dest_path):
                shutil.move(source_path, dest_path)
            categorized_count += 1
        else:
            uncategorized_count += 1
            uncategorized_files.append(filename)
    
    # Print results
    print(f"\n=== CATEGORIZATION RESULTS ===")
    print(f"Successfully categorized: {categorized_count} images")
    print(f"Uncategorized: {uncategorized_count} images")
    
    if uncategorized_files:
        print(f"\nUncategorized files (first 10):")
        for file in uncategorized_files[:10]:
            print(f"  - {file}")
        
        # Save list of uncategorized files
        with open(os.path.join(source_dir, "uncategorized_files.txt"), "w") as f:
            for file in uncategorized_files:
                f.write(f"{file}\n")
        print(f"\nFull list saved to: {os.path.join(source_dir, 'uncategorized_files.txt')}")
    
    # Show count per class
    print(f"\n=== IMAGES PER CLASS ===")
    for class_name in class_list:
        class_folder = os.path.join(source_dir, class_name)
        if os.path.exists(class_folder):
            count = len([f for f in os.listdir(class_folder) 
                        if os.path.isfile(os.path.join(class_folder, f))])
            print(f"{class_name}: {count} images")

# Run the categorization
categorize_images(source_folder, class_names)

# Verify the structure
print(f"\n=== FINAL FOLDER STRUCTURE ===")
for class_name in class_names:
    class_path = os.path.join(source_folder, class_name)
    if os.path.exists(class_path):
        image_count = len([f for f in os.listdir(class_path) 
                         if os.path.isfile(os.path.join(class_path, f))])
        print(f"{class_path} - {image_count} images")

Found 2124 images to categorize


Categorizing images: 100%|███████████████████████████████████████████████████████| 2124/2124 [00:01<00:00, 1638.14it/s]



=== CATEGORIZATION RESULTS ===
Successfully categorized: 2124 images
Uncategorized: 0 images

=== IMAGES PER CLASS ===
ain: 67 images
al: 68 images
aleff: 67 images
bb: 67 images
dal: 56 images
dha: 67 images
dhad: 65 images
fa: 67 images
gaaf: 67 images
ghain: 68 images
ha: 67 images
haa: 67 images
jeem: 68 images
kaaf: 67 images
khaa: 68 images
la: 68 images
laam: 66 images
meem: 67 images
nun: 62 images
ra: 61 images
saad: 67 images
seen: 67 images
sheen: 68 images
ta: 68 images
taa: 68 images
thaa: 68 images
thal: 67 images
toot: 68 images
waw: 60 images
ya: 67 images
yaa: 68 images
zay: 68 images

=== FINAL FOLDER STRUCTURE ===
C:\Users\Access\Documents\data\val_finalll\ain - 67 images
C:\Users\Access\Documents\data\val_finalll\al - 68 images
C:\Users\Access\Documents\data\val_finalll\aleff - 67 images
C:\Users\Access\Documents\data\val_finalll\bb - 67 images
C:\Users\Access\Documents\data\val_finalll\dal - 56 images
C:\Users\Access\Documents\data\val_finalll\dha - 67 images
C:\U