In [1]:
import os
import shutil
import random
import re
from PIL import Image
from collections import defaultdict

# CAUCAFALL

In [10]:
# Paths
img_dir = 'dataset/CAUCAFall/all_imgs'
label_dir = 'dataset/CAUCAFall/all_labels'

# Output directories
output_base = 'dataset/CAUCAFall/CAUCAFall_split_subjects_v2'
splits = ['train', 'valid', 'test']

# Crear carpetas de salida
for split in splits:
    os.makedirs(os.path.join(output_base, split, 'images'), exist_ok=True)
    os.makedirs(os.path.join(output_base, split, 'labels'), exist_ok=True)

# Expresión regular para extraer SUBJECT del nombre
pattern = re.compile(r'.*?(\d{1,2})[-]?\d{5}\.(jpg|png)$', re.IGNORECASE)

# Agrupar imágenes por SUBJECT
subject_to_files = {}

for filename in os.listdir(img_dir):
    match = pattern.match(filename)
    if match:
        subject = int(match.group(1))
        subject_to_files.setdefault(subject, []).append(filename)

# Verifica que haya exactamente 10 SUBJECTS
subjects = sorted(subject_to_files.keys())
if len(subjects) != 10:
    raise ValueError(f"Se esperaban 10 SUBJECTS, pero se encontraron {len(subjects)}: {subjects}")

# Mezclar aleatoriamente los SUBJECTS
#random.seed(42)
#random.shuffle(subjects)
train_subjects = subjects[:7]
valid_subjects = subjects[7:9]
test_subject = subjects[9]

split_map = {subj: 'train' for subj in train_subjects}
split_map.update({subj: 'valid' for subj in valid_subjects})
split_map[test_subject] = 'test'

# Mover archivos
for subject, files in subject_to_files.items():
    split = split_map[subject]
    for img_file in files:
        label_file = os.path.splitext(img_file)[0] + '.txt'

        src_img = os.path.join(img_dir, img_file)
        src_lbl = os.path.join(label_dir, label_file)

        dst_img = os.path.join(output_base, split, 'images', img_file)
        dst_lbl = os.path.join(output_base, split, 'labels', label_file)

        shutil.copy2(src_img, dst_img)
        if os.path.exists(src_lbl):
            shutil.copy2(src_lbl, dst_lbl)
        else:
            print(f"[ADVERTENCIA] No se encontró etiqueta para {img_file}")

print("✅ División completada.")
print(f"Train SUBJECTS: {train_subjects}")
print(f"Valid SUBJECT: {valid_subjects}")

✅ División completada.
Train SUBJECTS: [1, 2, 3, 4, 5, 6, 7]
Valid SUBJECT: [8, 9]
Test SUBJECT: 10


In [13]:
# Directorio base
base_dir = output_base
splits = ['train', 'valid', 'test']
class_names = {0: 'ADL', 1: 'FALL'}

# Inicializar contadores
distribution = {split: {0: 0, 1: 0} for split in splits}

for split in splits:
    label_path = os.path.join(base_dir, split, 'labels')
    for filename in os.listdir(label_path):
        if not filename.endswith('.txt'):
            continue
        with open(os.path.join(label_path, filename), 'r') as f:
            for line in f:
                class_id = int(line.strip().split()[0])
                if class_id in [0, 1]:
                    distribution[split][class_id] += 1

# Mostrar resultados
print("📊 Distribución de clases:")
for split in splits:
    print(f"\n{split.upper()}:")
    for class_id, count in distribution[split].items():
        print(f"  {class_names[class_id]} (clase {class_id}): {count}")

📊 Distribución de clases:

TRAIN:
  ADL (clase 0): 9101
  FALL (clase 1): 8284

VALID:
  ADL (clase 0): 2911
  FALL (clase 1): 1353

TEST:
  ADL (clase 0): 1595
  FALL (clase 1): 893


In [12]:
# Directorio base del dataset
base_dir = output_base
#splits_v1 = ['train', 'valid', 'test']
splits = ['train']
class_names = {0: 'ADL', 1: 'FALL'}

def count_labels(label_file):
    with open(label_file, 'r') as f:
        lines = f.readlines()
    fall = sum(1 for l in lines if l.startswith('1 '))
    adl = sum(1 for l in lines if l.startswith('0 '))
    return adl, fall

for split in splits:
    img_dir = os.path.join(base_dir, split, 'images')
    lbl_dir = os.path.join(base_dir, split, 'labels')

    adl_files = []
    fall_files = []

    for label_file in os.listdir(lbl_dir):
        if not label_file.endswith('.txt'):
            continue
        full_path = os.path.join(lbl_dir, label_file)
        adl_count, fall_count = count_labels(full_path)

        base_name = os.path.splitext(label_file)[0]
        if fall_count > 0:
            fall_files.append(base_name)
        elif adl_count > 0:
            adl_files.append(base_name)

    print(f"\n📂 {split.upper()} - FALL: {len(fall_files)}, ADL: {len(adl_files)}")

    # 1. REFLEJAR FALL
    for base_name in fall_files:
        img_path = os.path.join(img_dir, base_name + '.jpg')
        label_path = os.path.join(lbl_dir, base_name + '.txt')

        if not os.path.exists(img_path):
            img_path = os.path.join(img_dir, base_name + '.png')
            if not os.path.exists(img_path):
                continue

        # Reflejar imagen horizontalmente
        img = Image.open(img_path)
        flipped_img = img.transpose(Image.FLIP_LEFT_RIGHT)
        if flipped_img.mode == 'RGBA':
            flipped_img = flipped_img.convert('RGB')
        # Guardar imagen reflejada
        new_img_name = base_name + '_flip.jpg'
        flipped_img.save(os.path.join(img_dir, new_img_name))

        # Reflejar label
        with open(label_path, 'r') as f:
            lines = f.readlines()

        new_lines = []
        for line in lines:
            parts = line.strip().split()
            cls, x_center, y_center, width, height = parts
            x_center = str(1.0 - float(x_center))  # invertir horizontalmente
            new_lines.append(f"{cls} {x_center} {y_center} {width} {height}\n")

        with open(os.path.join(lbl_dir, base_name + '_flip.txt'), 'w') as f:
            f.writelines(new_lines)

    # 2. DUPLICAR ADL HASTA BALANCEAR
    new_fall_total = len(fall_files) * 2
    adl_needed = new_fall_total - len(adl_files)
    print(f"🔁 Se necesitan {adl_needed} duplicaciones de ADL para balancear")

    if adl_needed > 0:
        sampled = random.choices(adl_files, k=adl_needed)
        for i, base_name in enumerate(sampled):
            src_img_path = os.path.join(img_dir, base_name + '.jpg')
            if not os.path.exists(src_img_path):
                src_img_path = os.path.join(img_dir, base_name + '.png')
                if not os.path.exists(src_img_path):
                    continue

            src_lbl_path = os.path.join(lbl_dir, base_name + '.txt')
            if not os.path.exists(src_lbl_path):
                continue

            # Reflejar imagen
            img = Image.open(src_img_path)
            flipped_img = img.transpose(Image.FLIP_LEFT_RIGHT)
            if flipped_img.mode == 'RGBA':
                flipped_img = flipped_img.convert('RGB')
            new_base = f"{base_name}_flipdup{i}"
            flipped_img.save(os.path.join(img_dir, new_base + '.jpg'))

            # Reflejar label
            with open(src_lbl_path, 'r') as f:
                lines = f.readlines()

            new_lines = []
            for line in lines:
                parts = line.strip().split()
                cls, x_center, y_center, width, height = parts
                x_center = str(1.0 - float(x_center))
                new_lines.append(f"{cls} {x_center} {y_center} {width} {height}\n")

            with open(os.path.join(lbl_dir, new_base + '.txt'), 'w') as f:
                f.writelines(new_lines)

print("\n✅ Dataset balanceado con reflejo de FALL y duplicación de ADL.")


📂 TRAIN - FALL: 4142, ADL: 9101
🔁 Se necesitan -817 duplicaciones de ADL para balancear

✅ Dataset balanceado con reflejo de FALL y duplicación de ADL.


# MULTIPLE CAM

In [14]:
# Rutas
base_dir = "dataset/Multiple_Cameras"
img_dir = os.path.join(base_dir, "all_imgs")
label_dir = os.path.join(base_dir, "all_labels")

# Carpeta de salida
output_dir = os.path.join(base_dir, "Multiple_Camera_split_chutes_v2")
splits = ['train', 'valid', 'test']
for split in splits:
    os.makedirs(os.path.join(output_dir, split, 'images'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, split, 'labels'), exist_ok=True)

# Agrupar archivos por chute
chute_dict = defaultdict(list)

for filename in os.listdir(img_dir):
    if not (filename.endswith(".jpg") or filename.endswith(".png")):
        continue
    chute_id = filename.split("_")[0]
    base_name = os.path.splitext(filename)[0]
    chute_dict[chute_id].append(base_name)

# Obtener lista única de chutes
all_chutes = list(chute_dict.keys())
all_chutes.sort()
random.seed(42)
random.shuffle(all_chutes)

# Dividir chutes
train_chutes = all_chutes[:15]
valid_chutes = all_chutes[15:19]
test_chutes = all_chutes[19:24]

split_map = {
    'train': train_chutes,
    'valid': valid_chutes,
    'test': test_chutes
}

# Copiar archivos según el split asignado
for split, chutes in split_map.items():
    for chute in chutes:
        for base_name in chute_dict[chute]:
            img_src = os.path.join(img_dir, base_name + ".jpg")
            if not os.path.exists(img_src):  # prueba con png si no existe jpg
                img_src = os.path.join(img_dir, base_name + ".png")
                if not os.path.exists(img_src):
                    continue
            label_src = os.path.join(label_dir, base_name + ".txt")
            if not os.path.exists(label_src):
                continue

            img_dst = os.path.join(output_dir, split, 'images', os.path.basename(img_src))
            label_dst = os.path.join(output_dir, split, 'labels', os.path.basename(label_src))

            shutil.copy2(img_src, img_dst)
            shutil.copy2(label_src, label_dst)

print("✅ Dataset separado por CHUTE en train (16), valid (4) y test (4)")

✅ Dataset separado por CHUTE en train (16), valid (4) y test (4)


In [15]:
print("TRAIN")
print(train_chutes)
print("VALID")
print(valid_chutes)
print("TEST")
print(test_chutes)

TRAIN
['17', '16', '03', '15', '06', '14', '18', '13', '23', '07', '10', '02', '20', '12', '11']
VALID
['22', '05', '19', '08']
TEST
['09', '01', '04', '21']


In [20]:
# Directorio base
base_dir = "dataset/Multiple_Cameras/Multiple_Camera_split_chutes_v2"
splits = ['train', 'valid', 'test']
class_names = {0: 'ADL', 1: 'FALL'}

# Inicializar contadores
distribution = {split: {0: 0, 1: 0} for split in splits}

for split in splits:
    label_path = os.path.join(base_dir, split, 'labels')
    for filename in os.listdir(label_path):
        if not filename.endswith('.txt'):
            continue
        with open(os.path.join(label_path, filename), 'r') as f:
            for line in f:
                class_id = int(line.strip().split()[0])
                if class_id in [0, 1]:
                    distribution[split][class_id] += 1

# Mostrar resultados
print("📊 Distribución de clases:")
for split in splits:
    print(f"\n{split.upper()}:")
    for class_id, count in distribution[split].items():
        print(f"  {class_names[class_id]} (clase {class_id}): {count}")

📊 Distribución de clases:

TRAIN:
  ADL (clase 0): 6866
  FALL (clase 1): 6866

VALID:
  ADL (clase 0): 779
  FALL (clase 1): 784

TEST:
  ADL (clase 0): 737
  FALL (clase 1): 471


In [19]:
# Directorio base del dataset
base_dir = "dataset/Multiple_Cameras/Multiple_Camera_split_chutes_v2"
#splits_v1 = ['train', 'valid', 'test']
splits = ['train']
class_names = {0: 'ADL', 1: 'FALL'}

def count_labels(label_file):
    with open(label_file, 'r') as f:
        lines = f.readlines()
    fall = sum(1 for l in lines if l.startswith('1 '))
    adl = sum(1 for l in lines if l.startswith('0 '))
    return adl, fall

for split in splits:
    img_dir = os.path.join(base_dir, split, 'images')
    lbl_dir = os.path.join(base_dir, split, 'labels')

    adl_files = []
    fall_files = []

    for label_file in os.listdir(lbl_dir):
        if not label_file.endswith('.txt'):
            continue
        full_path = os.path.join(lbl_dir, label_file)
        adl_count, fall_count = count_labels(full_path)

        base_name = os.path.splitext(label_file)[0]
        if fall_count > 0:
            fall_files.append(base_name)
        elif adl_count > 0:
            adl_files.append(base_name)

    print(f"\n📂 {split.upper()} - FALL: {len(fall_files)}, ADL: {len(adl_files)}")

    # 1. REFLEJAR FALL
    for base_name in fall_files:
        img_path = os.path.join(img_dir, base_name + '.jpg')
        label_path = os.path.join(lbl_dir, base_name + '.txt')

        if not os.path.exists(img_path):
            img_path = os.path.join(img_dir, base_name + '.png')
            if not os.path.exists(img_path):
                continue

        # Reflejar imagen horizontalmente
        img = Image.open(img_path)
        flipped_img = img.transpose(Image.FLIP_LEFT_RIGHT)
        if flipped_img.mode == 'RGBA':
            flipped_img = flipped_img.convert('RGB')
        # Guardar imagen reflejada
        new_img_name = base_name + '_flip.jpg'
        flipped_img.save(os.path.join(img_dir, new_img_name))

        # Reflejar label
        with open(label_path, 'r') as f:
            lines = f.readlines()

        new_lines = []
        for line in lines:
            parts = line.strip().split()
            cls, x_center, y_center, width, height = parts
            x_center = str(1.0 - float(x_center))  # invertir horizontalmente
            new_lines.append(f"{cls} {x_center} {y_center} {width} {height}\n")

        with open(os.path.join(lbl_dir, base_name + '_flip.txt'), 'w') as f:
            f.writelines(new_lines)

    # 2. DUPLICAR ADL HASTA BALANCEAR
    new_fall_total = len(fall_files) * 2
    adl_needed = new_fall_total - len(adl_files)
    print(f"🔁 Se necesitan {adl_needed} duplicaciones de ADL para balancear")

    if adl_needed > 0:
        sampled = random.choices(adl_files, k=adl_needed)
        for i, base_name in enumerate(sampled):
            src_img_path = os.path.join(img_dir, base_name + '.jpg')
            if not os.path.exists(src_img_path):
                src_img_path = os.path.join(img_dir, base_name + '.png')
                if not os.path.exists(src_img_path):
                    continue

            src_lbl_path = os.path.join(lbl_dir, base_name + '.txt')
            if not os.path.exists(src_lbl_path):
                continue

            # Reflejar imagen
            img = Image.open(src_img_path)
            flipped_img = img.transpose(Image.FLIP_LEFT_RIGHT)
            if flipped_img.mode == 'RGBA':
                flipped_img = flipped_img.convert('RGB')
            new_base = f"{base_name}_flipdup{i}"
            flipped_img.save(os.path.join(img_dir, new_base + '.jpg'))

            # Reflejar label
            with open(src_lbl_path, 'r') as f:
                lines = f.readlines()

            new_lines = []
            for line in lines:
                parts = line.strip().split()
                cls, x_center, y_center, width, height = parts
                x_center = str(1.0 - float(x_center))
                new_lines.append(f"{cls} {x_center} {y_center} {width} {height}\n")

            with open(os.path.join(lbl_dir, new_base + '.txt'), 'w') as f:
                f.writelines(new_lines)
    
print("\n✅ Dataset balanceado con reflejo de FALL y duplicación de ADL.")


📂 TRAIN - FALL: 3433, ADL: 3695
🔁 Se necesitan 3171 duplicaciones de ADL para balancear

✅ Dataset balanceado con reflejo de FALL y duplicación de ADL.


# UR FALL

In [33]:
img_dir = 'dataset/UR-Fall/all_imgs'
label_dir = 'dataset/UR-Fall/all_labels'

# Directorios de salida
output_base = 'dataset/UR-Fall/UR_Fall_split_escenarios_v2'
splits = ['train', 'valid', 'test']

# Crear estructura de carpetas
for split in splits:
    os.makedirs(os.path.join(output_base, split, 'images'), exist_ok=True)
    os.makedirs(os.path.join(output_base, split, 'labels'), exist_ok=True)

# Regex para extraer escenario
def extract_scenario(filename):
    match = re.match(r'(fall|adl)-(\d+)', filename.lower())
    if match:
        scenario_type = match.group(1).upper()
        scenario_id = int(match.group(2))
        return f"{scenario_type}-{scenario_id}"
    return None

# Agrupar imágenes por escenario
scenarios = defaultdict(list)

for img_file in os.listdir(img_dir):
    if not img_file.lower().endswith(('.jpg', '.png')):
        continue
    base_name = os.path.splitext(img_file)[0]
    scenario = extract_scenario(base_name)
    if scenario:
        scenarios[scenario].append(base_name)

# Separar escenarios en FALL y ADL
fall_scenarios = sorted([s for s in scenarios if s.startswith('FALL')])
adl_scenarios = sorted([s for s in scenarios if s.startswith('ADL')])

print(f"Total escenarios FALL: {len(fall_scenarios)}")
print(f"Total escenarios ADL: {len(adl_scenarios)}")

# Dividir escenarios
def split_list(scenario_list):
    total = len(scenario_list)
    random.seed(97)
    random.shuffle(scenario_list)
    n_train = int(round(total * 0.8))
    n_valid = int(round(total * 0.1))
    n_test = total - n_train - n_valid
    return (
        scenario_list[:n_train],
        scenario_list[n_train:n_train+n_valid],
        scenario_list[n_train+n_valid:]
    )

fall_train, fall_valid, fall_test = split_list(fall_scenarios)
adl_train, adl_valid, adl_test = split_list(adl_scenarios)

split_map = {
    'train': fall_train + adl_train,
    'valid': fall_valid + adl_valid,
    'test': fall_test + adl_test,
}

# Copiar archivos a carpetas correspondientes
for split, scenario_list in split_map.items():
    for scenario in scenario_list:
        for base_name in scenarios[scenario]:
            img_ext = '.jpg' if os.path.exists(os.path.join(img_dir, base_name + '.jpg')) else '.png'
            src_img = os.path.join(img_dir, base_name + img_ext)
            src_lbl = os.path.join(label_dir, base_name + '.txt')

            dst_img = os.path.join(output_base, split, 'images', base_name + img_ext)
            dst_lbl = os.path.join(output_base, split, 'labels', base_name + '.txt')

            if os.path.exists(src_img):
                shutil.copy(src_img, dst_img)
            if os.path.exists(src_lbl):
                shutil.copy(src_lbl, dst_lbl)

print("\n✅ División de dataset completada correctamente.")
len_train = len(os.listdir('dataset/UR-Fall/UR_Fall_split_escenarios/train/images'))
len_valid =len(os.listdir('dataset/UR-Fall/UR_Fall_split_escenarios/valid/images'))
len_test = len(os.listdir('dataset/UR-Fall/UR_Fall_split_escenarios/test/images'))
len_tot = len_train + len_valid + len_test
print(f"TRAIN: {len_train}, ratio: {(len_train/len_tot)*100}")
print(f"VALID: {len_valid}, ratio: {(len_valid/len_tot)*100}")
print(f"TEST: {len_test}, ratio: {(len_test/len_tot)*100}")
print(f"TOTAL: {len_tot}")

Total escenarios FALL: 30
Total escenarios ADL: 40

✅ División de dataset completada correctamente.
TRAIN: 2305, ratio: 67.8939617083947
VALID: 586, ratio: 17.260677466863033
TEST: 504, ratio: 14.845360824742269
TOTAL: 3395


In [34]:
print("TRAIN")
print(fall_train)
print(adl_train)
print("VALID")
print(fall_valid)
print(adl_valid)
print("TEST")
print(fall_test)
print(adl_test)

TRAIN
['FALL-20', 'FALL-18', 'FALL-24', 'FALL-9', 'FALL-30', 'FALL-23', 'FALL-29', 'FALL-13', 'FALL-12', 'FALL-14', 'FALL-28', 'FALL-22', 'FALL-8', 'FALL-16', 'FALL-19', 'FALL-11', 'FALL-27', 'FALL-6', 'FALL-17', 'FALL-3', 'FALL-4', 'FALL-25', 'FALL-1', 'FALL-26']
['ADL-38', 'ADL-33', 'ADL-9', 'ADL-3', 'ADL-13', 'ADL-24', 'ADL-37', 'ADL-2', 'ADL-8', 'ADL-36', 'ADL-27', 'ADL-32', 'ADL-31', 'ADL-4', 'ADL-26', 'ADL-35', 'ADL-15', 'ADL-39', 'ADL-21', 'ADL-17', 'ADL-14', 'ADL-18', 'ADL-16', 'ADL-19', 'ADL-6', 'ADL-29', 'ADL-23', 'ADL-22', 'ADL-28', 'ADL-40', 'ADL-1', 'ADL-7']
VALID
['FALL-10', 'FALL-7', 'FALL-2']
['ADL-25', 'ADL-5', 'ADL-11', 'ADL-10']
TEST
['FALL-5', 'FALL-21', 'FALL-15']
['ADL-12', 'ADL-30', 'ADL-34', 'ADL-20']


In [35]:
# Directorio base del dataset
base_dir = "dataset/UR-Fall/UR_Fall_split_escenarios_v2"
#splits_v1 = ['train', 'valid', 'test']
splits = ['train']
class_names = {0: 'ADL', 1: 'FALL'}

def count_labels(label_file):
    with open(label_file, 'r') as f:
        lines = f.readlines()
    fall = sum(1 for l in lines if l.startswith('1 '))
    adl = sum(1 for l in lines if l.startswith('0 '))
    return adl, fall

for split in splits:
    img_dir = os.path.join(base_dir, split, 'images')
    lbl_dir = os.path.join(base_dir, split, 'labels')

    adl_files = []
    fall_files = []

    for label_file in os.listdir(lbl_dir):
        if not label_file.endswith('.txt'):
            continue
        full_path = os.path.join(lbl_dir, label_file)
        adl_count, fall_count = count_labels(full_path)

        base_name = os.path.splitext(label_file)[0]
        if fall_count > 0:
            fall_files.append(base_name)
        elif adl_count > 0:
            adl_files.append(base_name)

    print(f"\n📂 {split.upper()} - FALL: {len(fall_files)}, ADL: {len(adl_files)}")

    # 1. REFLEJAR FALL
    for base_name in fall_files:
        img_path = os.path.join(img_dir, base_name + '.jpg')
        label_path = os.path.join(lbl_dir, base_name + '.txt')

        if not os.path.exists(img_path):
            img_path = os.path.join(img_dir, base_name + '.png')
            if not os.path.exists(img_path):
                continue

        # Reflejar imagen horizontalmente
        img = Image.open(img_path)
        flipped_img = img.transpose(Image.FLIP_LEFT_RIGHT)
        if flipped_img.mode == 'RGBA':
            flipped_img = flipped_img.convert('RGB')
        # Guardar imagen reflejada
        new_img_name = base_name + '_flip.jpg'
        flipped_img.save(os.path.join(img_dir, new_img_name))

        # Reflejar label
        with open(label_path, 'r') as f:
            lines = f.readlines()

        new_lines = []
        for line in lines:
            parts = line.strip().split()
            cls, x_center, y_center, width, height = parts
            x_center = str(1.0 - float(x_center))  # invertir horizontalmente
            new_lines.append(f"{cls} {x_center} {y_center} {width} {height}\n")

        with open(os.path.join(lbl_dir, base_name + '_flip.txt'), 'w') as f:
            f.writelines(new_lines)

    # 2. DUPLICAR ADL HASTA BALANCEAR
    new_fall_total = len(fall_files) * 2
    adl_needed = new_fall_total - len(adl_files)
    print(f"🔁 Se necesitan {adl_needed} duplicaciones de ADL para balancear")

    if adl_needed > 0:
        sampled = random.choices(adl_files, k=adl_needed)
        for i, base_name in enumerate(sampled):
            src_img_path = os.path.join(img_dir, base_name + '.jpg')
            if not os.path.exists(src_img_path):
                src_img_path = os.path.join(img_dir, base_name + '.png')
                if not os.path.exists(src_img_path):
                    continue

            src_lbl_path = os.path.join(lbl_dir, base_name + '.txt')
            if not os.path.exists(src_lbl_path):
                continue

            # Reflejar imagen
            img = Image.open(src_img_path)
            flipped_img = img.transpose(Image.FLIP_LEFT_RIGHT)
            if flipped_img.mode == 'RGBA':
                flipped_img = flipped_img.convert('RGB')
            new_base = f"{base_name}_flipdup{i}"
            flipped_img.save(os.path.join(img_dir, new_base + '.jpg'))

            # Reflejar label
            with open(src_lbl_path, 'r') as f:
                lines = f.readlines()

            new_lines = []
            for line in lines:
                parts = line.strip().split()
                cls, x_center, y_center, width, height = parts
                x_center = str(1.0 - float(x_center))
                new_lines.append(f"{cls} {x_center} {y_center} {width} {height}\n")

            with open(os.path.join(lbl_dir, new_base + '.txt'), 'w') as f:
                f.writelines(new_lines)

print("\n✅ Dataset balanceado con reflejo de FALL y duplicación de ADL.")


📂 TRAIN - FALL: 481, ADL: 1343
🔁 Se necesitan -381 duplicaciones de ADL para balancear

✅ Dataset balanceado con reflejo de FALL y duplicación de ADL.


In [36]:
# Directorio base
base_dir = "dataset/UR-Fall/UR_Fall_split_escenarios_v2"
splits = ['train', 'valid', 'test']
class_names = {0: 'ADL', 1: 'FALL'}

# Inicializar contadores
distribution = {split: {0: 0, 1: 0} for split in splits}

for split in splits:
    label_path = os.path.join(base_dir, split, 'labels')
    for filename in os.listdir(label_path):
        if not filename.endswith('.txt'):
            continue
        with open(os.path.join(label_path, filename), 'r') as f:
            for line in f:
                class_id = int(line.strip().split()[0])
                if class_id in [0, 1]:
                    distribution[split][class_id] += 1

# Mostrar resultados
print("📊 Distribución de clases:")
for split in splits:
    print(f"\n{split.upper()}:")
    for class_id, count in distribution[split].items():
        print(f"  {class_names[class_id]} (clase {class_id}): {count}")

📊 Distribución de clases:

TRAIN:
  ADL (clase 0): 1356
  FALL (clase 1): 962

VALID:
  ADL (clase 0): 336
  FALL (clase 1): 125

TEST:
  ADL (clase 0): 132
  FALL (clase 1): 126


In [37]:
len_train = len(os.listdir('dataset/UR-Fall/UR_Fall_split_escenarios_v2/train/images'))
len_valid =len(os.listdir('dataset/UR-Fall/UR_Fall_split_escenarios_v2/valid/images'))
len_test = len(os.listdir('dataset/UR-Fall/UR_Fall_split_escenarios_v2/test/images'))
len_tot = len_train + len_valid + len_test
print(f"TRAIN: {len_train}, ratio: {(len_train/len_tot)*100}")
print(f"VALID: {len_valid}, ratio: {(len_valid/len_tot)*100}")
print(f"TEST: {len_test}, ratio: {(len_test/len_tot)*100}")
print(f"TOTAL: {len_tot}")

TRAIN: 2305, ratio: 76.22354497354497
VALID: 461, ratio: 15.244708994708994
TEST: 258, ratio: 8.531746031746032
TOTAL: 3024
