In [10]:
import os
import h5py

def list_h5_contents_with_details(file_path):
    contents = []
    with h5py.File(file_path, 'r') as h5_file:
        def collect_attrs(name, obj):
            if isinstance(obj, h5py.Dataset):
                contents.append(f"{name}: {obj.shape}, dtype={obj.dtype}")
        h5_file.visititems(collect_attrs)
    return contents

def display_directory_structure_with_h5_info(base_dir, indent=0):
    for item in sorted(os.listdir(base_dir)):
        item_path = os.path.join(base_dir, item)
        if os.path.isdir(item_path):
            print('│   ' * indent + '├── ' + item)
            display_directory_structure_with_h5_info(item_path, indent + 1)
        elif item.endswith('.h5'):
            print('│   ' * indent + '├── ' + item)
            h5_contents = list_h5_contents_with_details(item_path)
            for content in h5_contents:
                print('│   ' * (indent + 1) + '├── ' + content)

# Example usage
base_dir = '/home/ismael/TFM/rfchallenge/dataset_processed'
print("Directory structure with H5 file details:")
display_directory_structure_with_h5_info(base_dir)


Directory structure with H5 file details:
├── interferenceset_frame
│   ├── CommSignal2_raw_data.h5
│   │   ├── dataset: (1000, 3000, 2), dtype=float64
│   │   ├── sig_type: (), dtype=object
│   ├── CommSignal3_raw_data.h5
│   │   ├── dataset: (1000, 3000, 2), dtype=float64
│   │   ├── sig_type: (), dtype=object
│   ├── CommSignal5G1_raw_data.h5
│   │   ├── dataset: (1000, 3000, 2), dtype=float64
│   │   ├── sig_type: (), dtype=object
│   ├── EMISignal1_raw_data.h5
│   │   ├── dataset: (1000, 3000, 2), dtype=float64
│   │   ├── sig_type: (), dtype=object
├── testset1_frame
│   ├── CommSignal2_test1_raw_data.h5
│   │   ├── dataset: (700, 3000, 2), dtype=float64
│   │   ├── sig_type: (), dtype=object
│   ├── CommSignal3_test1_raw_data.h5
│   │   ├── dataset: (1000, 3000, 2), dtype=float64
│   │   ├── sig_type: (), dtype=object
│   ├── CommSignal5G1_test1_raw_data.h5
│   │   ├── dataset: (1000, 3000, 2), dtype=float64
│   │   ├── sig_type: (), dtype=object
│   ├── EMISignal1_test1_raw_dat

In [7]:
import os
import h5py
import numpy as np

# Función para crear la nueva estructura de directorios
def create_new_directory_structure(base_path, new_base_path):
    for root, dirs, files in os.walk(base_path):
        for dir in dirs:
            new_dir = root.replace(base_path, new_base_path, 1)
            new_dir = os.path.join(new_dir, dir)
            os.makedirs(new_dir, exist_ok=True)

# Función para procesar un archivo h5
def process_h5_file(file_path, new_file_path, trim_length, num_samples):
    with h5py.File(file_path, 'r') as f:
        dataset = f['dataset'][:]
        sig_type = f['sig_type'][()]
        
        num_original_samples, original_length = dataset.shape
        new_samples = []

        # Crear nuevas muestras dividiendo las existentes
        for i in range(num_original_samples):
            start_idx = 0
            while start_idx + trim_length <= original_length and len(new_samples) < num_samples:
                new_samples.append(dataset[i, start_idx:start_idx + trim_length])
                start_idx += trim_length

        new_samples = np.array(new_samples)
        
        # Añadir una dimensión para la parte real e imaginaria
        dataset_expanded = np.stack((new_samples.real, new_samples.imag), axis=-1)

    with h5py.File(new_file_path, 'w') as f:
        f.create_dataset('dataset', data=dataset_expanded, dtype=np.float64)
        f.create_dataset('sig_type', data=sig_type)

# Función principal para recorrer la estructura de directorios y procesar los archivos
def main(base_path, new_base_path, trim_length, num_samples):
    create_new_directory_structure(base_path, new_base_path)
    
    for root, dirs, files in os.walk(base_path):
        for file in files:
            if file.endswith('.h5'):
                file_path = os.path.join(root, file)
                new_root = root.replace(base_path, new_base_path, 1)
                new_file_path = os.path.join(new_root, file)
                print(f"Processing {file_path}...")
                process_h5_file(file_path, new_file_path, trim_length, num_samples)
                print(f"Finished processing {file_path}")

if __name__ == "__main__":
    base_path = '/home/ismael/TFM/rfchallenge/dataset'
    new_base_path = '/home/ismael/TFM/rfchallenge/dataset_processed'
    trim_length = 3000
    num_samples = 1000
    main(base_path, new_base_path, trim_length, num_samples)


Processing /home/ismael/TFM/rfchallenge/dataset/interferenceset_frame/CommSignal2_raw_data.h5...
Finished processing /home/ismael/TFM/rfchallenge/dataset/interferenceset_frame/CommSignal2_raw_data.h5
Processing /home/ismael/TFM/rfchallenge/dataset/interferenceset_frame/CommSignal3_raw_data.h5...
Finished processing /home/ismael/TFM/rfchallenge/dataset/interferenceset_frame/CommSignal3_raw_data.h5
Processing /home/ismael/TFM/rfchallenge/dataset/interferenceset_frame/EMISignal1_raw_data.h5...
Finished processing /home/ismael/TFM/rfchallenge/dataset/interferenceset_frame/EMISignal1_raw_data.h5
Processing /home/ismael/TFM/rfchallenge/dataset/interferenceset_frame/CommSignal5G1_raw_data.h5...
Finished processing /home/ismael/TFM/rfchallenge/dataset/interferenceset_frame/CommSignal5G1_raw_data.h5
Processing /home/ismael/TFM/rfchallenge/dataset/testset1_frame/EMISignal1_test1_raw_data.h5...
Finished processing /home/ismael/TFM/rfchallenge/dataset/testset1_frame/EMISignal1_test1_raw_data.h5
Pr