In [1]:
import os
import shutil
import random

In [2]:
random.seed(42)

In [5]:
# المسار الرئيسي لمجلدات الحروف الأصلية

SRC_DIR = '../Dataset/ArASL_Database_54K_Final'  # مجلد الصور الأصلي
DEST_DIR = './dataset_split'                    # مكان التقسيم
VALID_EXT = {'.png', '.jpg', '.jpeg'}           # الامتدادات المسموحة



# النسب
train_ratio = 0.8
val_ratio = 0.1
test_ratio = 0.1
assert abs((train_ratio + val_ratio + test_ratio) - 1.0) < 1e-6, "The ratios must add up to 1"


In [6]:

# تحقق من وجود المصدر
assert os.path.exists(SRC_DIR) and os.path.isdir(SRC_DIR), f"The folder does not exist: {os.path.abspath(SRC_DIR)}"


In [7]:

# جمع الفئات (مجلدات فقط)
classes = sorted([
    d for d in os.listdir(SRC_DIR)
    if os.path.isdir(os.path.join(SRC_DIR, d)) and not d.startswith('.')
])
assert classes, "There are no categories in the Dataset"


In [8]:

# إنشاء مجلدات التقسيم
for split in ('train', 'val', 'test'):
    for cls in classes:
        dest_path = os.path.join(DEST_DIR, split, cls)
        os.makedirs(dest_path, exist_ok=True)

summary_train = summary_val = summary_test = summary_total = 0

# بدء التقسيم
for cls in classes:
    src_path = os.path.join(SRC_DIR, cls)

    # الصور فقط
    images = [
        img for img in os.listdir(src_path)
        if os.path.isfile(os.path.join(src_path, img))
        and os.path.splitext(img)[1].lower() in VALID_EXT
    ]

    random.shuffle(images)

    n_total = len(images)
    if n_total == 0:
        print(f"[warning] {cls} Contains no images — skipped")
        continue

    n_train = int(train_ratio * n_total)
    n_val   = int(val_ratio   * n_total)
    n_test  = n_total - n_train - n_val

    # دعم الفئات الصغيرة (إن أمكن)
    if n_total >= 3:
        if n_val == 0:  n_val = 1
        if n_test == 0: n_test = 1
        if n_train + n_val + n_test > n_total:
            n_train = n_total - n_val - n_test

    train_imgs = images[:n_train]
    val_imgs   = images[n_train:n_train+n_val]
    test_imgs  = images[n_train+n_val:]

    # نسخ مع تخطي الموجود
    for img_name in train_imgs:
        src = os.path.join(src_path, img_name)
        dst = os.path.join(DEST_DIR, 'train', cls, img_name)
        if not os.path.exists(dst):
            shutil.copy2(src, dst)

    for img_name in val_imgs:
        src = os.path.join(src_path, img_name)
        dst = os.path.join(DEST_DIR, 'val', cls, img_name)
        if not os.path.exists(dst):
            shutil.copy2(src, dst)

    for img_name in test_imgs:
        src = os.path.join(src_path, img_name)
        dst = os.path.join(DEST_DIR, 'test', cls, img_name)
        if not os.path.exists(dst):
            shutil.copy2(src, dst)

    print(f"Letter {cls:10}: Train={len(train_imgs)}, Validation={len(val_imgs)}, Test={len(test_imgs)}")

    summary_train += len(train_imgs)
    summary_val   += len(val_imgs)
    summary_test  += len(test_imgs)
    summary_total += n_total



Letter ain       : Train=1691, Validation=211, Test=212
Letter al        : Train=1074, Validation=134, Test=135
Letter aleff     : Train=1337, Validation=167, Test=168
Letter bb        : Train=1432, Validation=179, Test=180
Letter dal       : Train=1307, Validation=163, Test=164
Letter dha       : Train=1378, Validation=172, Test=173
Letter dhad      : Train=1336, Validation=167, Test=167
Letter fa        : Train=1564, Validation=195, Test=196
Letter gaaf      : Train=1364, Validation=170, Test=171
Letter ghain     : Train=1581, Validation=197, Test=199
Letter ha        : Train=1273, Validation=159, Test=160
Letter haa       : Train=1220, Validation=152, Test=154
Letter jeem      : Train=1241, Validation=155, Test=156
Letter kaaf      : Train=1419, Validation=177, Test=178
Letter khaa      : Train=1285, Validation=160, Test=162
Letter la        : Train=1396, Validation=174, Test=176
Letter laam      : Train=1465, Validation=183, Test=184
Letter meem      : Train=1412, Validation=176, T

In [9]:
print("\n=== SUMMARY ===")
print(f"Train: {summary_train} | Val: {summary_val} | Test: {summary_test} | TOTAL: {summary_total}")
print("Partitioning is complete! ")


=== SUMMARY ===
Train: 43225 | Val: 5391 | Test: 5433 | TOTAL: 54049
Partitioning is complete! 
