In [1]:
#Importar las librerias
import os
import pandas as pd
from core.SimularDataset import SimuladorDataset
import matplotlib.pyplot as plt

In [2]:
# Función que crea la carpeta data_simulada si no existe.
# Por cada paciente y fase, genere el dataset. Además, guarda un CSV por paciente y fase en una subcarpeta
# data_simulada/paciente_id/.
# Genera un CSV combinado con todos los pacientes y fases

def generar_y_guardar_datasets(pacientes, fases, duracion_por_fase=600, carpeta_base="data_simulada"):
    os.makedirs(carpeta_base, exist_ok=True)
    dfs_combinados = []

    for paciente in pacientes:
        carpeta_paciente = os.path.join(carpeta_base, paciente)
        os.makedirs(carpeta_paciente, exist_ok=True)

        for fase in fases:
            simulador = SimuladorDataset(paciente_id=paciente, fase_ela=fase, duracion=duracion_por_fase)
            df = simulador.generar()

            # Añadir columnas para identificar paciente y fase si no están explícitas
            if 'paciente_id' not in df.columns:
                df['paciente_id'] = paciente
            if 'fase_ela' not in df.columns:
                df['fase_ela'] = fase

            dfs_combinados.append(df)

            # Guardar CSV individual por paciente y fase
            nombre_archivo = f"{paciente}_fase{fase}.csv"
            ruta_archivo = os.path.join(carpeta_paciente, nombre_archivo)
            df.to_csv(ruta_archivo, index=False)
            print(f"Guardado: {ruta_archivo}")

    # Guardar un CSV combinado con todos los datos
    df_todos = pd.concat(dfs_combinados, ignore_index=True)
    ruta_combinado = os.path.join(carpeta_base, "dataset_sintetico_completo.csv")
    df_todos.to_csv(ruta_combinado, index=False)
    print(f"Guardado dataset combinado: {ruta_combinado}")

    return df_todos


In [3]:
pacientes = ["PAC_001", "PAC_002", "PAC_003", "PAC_004"]
fases = [1, 2, 3]
df_final = generar_y_guardar_datasets(pacientes, fases)

Guardado: data_simulada\PAC_001\PAC_001_fase1.csv
Guardado: data_simulada\PAC_001\PAC_001_fase2.csv
Guardado: data_simulada\PAC_001\PAC_001_fase3.csv
Guardado: data_simulada\PAC_002\PAC_002_fase1.csv
Guardado: data_simulada\PAC_002\PAC_002_fase2.csv
Guardado: data_simulada\PAC_002\PAC_002_fase3.csv
Guardado: data_simulada\PAC_003\PAC_003_fase1.csv
Guardado: data_simulada\PAC_003\PAC_003_fase2.csv
Guardado: data_simulada\PAC_003\PAC_003_fase3.csv
Guardado: data_simulada\PAC_004\PAC_004_fase1.csv
Guardado: data_simulada\PAC_004\PAC_004_fase2.csv
Guardado: data_simulada\PAC_004\PAC_004_fase3.csv
Guardado dataset combinado: data_simulada\dataset_sintetico_completo.csv


In [4]:
df = pd.read_csv("data_simulada/dataset_sintetico_completo.csv")

In [5]:
df.head()

Unnamed: 0,timestamp,paciente_id,fase_ela,aceleracion_x,aceleracion_y,aceleracion_z,magnitud_movimiento,actividad_estimada,evento_inmovilidad,estado,...,hipovent_sostenido,senal_sueno,fase_sueno,evento_fragmentacion,frag_sueno_sostenido,inmovilidad_sostenida_etiqueta,hipoxia_sostenida_etiqueta,hipovent_sostenida,frag_sueno_sostenido_etiqueta,empeoramiento
0,2025-07-17 20:18:08.597041,PAC_001,1,-0.028677,0.054443,0.985007,0.986927,0.038601,0,reposo,...,0.0,-0.035001,LIGHT,0.0,0.0,0,0,0,0,0
1,2025-07-17 20:18:08.697041,PAC_001,1,0.070169,0.048663,1.021967,1.025529,0.026384,0,reposo,...,0.0,-0.035001,LIGHT,0.0,0.0,0,0,0,0,0
2,2025-07-17 20:18:08.797041,PAC_001,1,0.003036,0.026635,0.933774,0.934159,0.023509,0,reposo,...,0.0,-0.035001,LIGHT,0.0,0.0,0,0,0,0,0
3,2025-07-17 20:18:08.897041,PAC_001,1,0.020881,0.129828,0.969634,0.97851,0.032391,0,reposo,...,0.0,-0.035001,LIGHT,0.0,0.0,0,0,0,0,0
4,2025-07-17 20:18:08.997041,PAC_001,1,0.007007,0.013257,0.998828,0.998941,0.037896,0,reposo,...,0.0,-0.035001,LIGHT,0.0,0.0,0,0,0,0,0


In [6]:
df.shape

(72000, 32)

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72000 entries, 0 to 71999
Data columns (total 32 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   timestamp                       72000 non-null  object 
 1   paciente_id                     72000 non-null  object 
 2   fase_ela                        72000 non-null  int64  
 3   aceleracion_x                   72000 non-null  float64
 4   aceleracion_y                   72000 non-null  float64
 5   aceleracion_z                   72000 non-null  float64
 6   magnitud_movimiento             72000 non-null  float64
 7   actividad_estimada              72000 non-null  float64
 8   evento_inmovilidad              72000 non-null  int64  
 9   estado                          72000 non-null  object 
 10  inmovilidad_sostenida           72000 non-null  int64  
 11  spo2                            71940 non-null  float64
 12  frecuencia_cardiaca             

In [8]:
df.describe()

Unnamed: 0,fase_ela,aceleracion_x,aceleracion_y,aceleracion_z,magnitud_movimiento,actividad_estimada,evento_inmovilidad,inmovilidad_sostenida,spo2,frecuencia_cardiaca,...,evento_brp,hipovent_sostenido,senal_sueno,evento_fragmentacion,frag_sueno_sostenido,inmovilidad_sostenida_etiqueta,hipoxia_sostenida_etiqueta,hipovent_sostenida,frag_sueno_sostenido_etiqueta,empeoramiento
count,72000.0,72000.0,72000.0,72000.0,72000.0,72000.0,72000.0,72000.0,71940.0,71940.0,...,71947.0,71947.0,71952.0,71952.0,71952.0,72000.0,72000.0,72000.0,72000.0,72000.0
mean,2.0,0.000114,0.000327,0.999688,1.016191,0.03356738,0.319208,0.157111,92.332515,81.100988,...,0.147942,0.308838,0.002118,0.022793,0.577051,0.157111,0.217778,0.308847,0.577333,0.787236
std,0.816502,0.062249,0.174003,0.062152,0.068893,0.03335319,0.466173,0.363908,3.835872,10.343772,...,0.355045,0.462018,0.301513,0.149244,0.494031,0.363908,0.412738,0.462021,0.493987,0.409265
min,1.0,-0.389794,-0.820581,0.609362,0.650849,1.279498e-07,0.0,0.0,82.984905,59.433778,...,0.0,0.0,-0.937606,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,1.0,-0.029579,-0.051592,0.970545,0.983827,0.007131594,0.0,0.0,90.092656,72.755905,...,0.0,0.0,-0.201395,0.0,0.0,0.0,0.0,0.0,0.0,1.0
50%,2.0,3.2e-05,8.4e-05,1.000113,1.006184,0.02306747,0.0,0.0,92.937748,80.81466,...,0.0,0.0,0.001389,0.0,1.0,0.0,0.0,0.0,1.0,1.0
75%,3.0,0.029888,0.052356,1.029472,1.052793,0.05063751,1.0,0.0,95.450053,89.533622,...,0.0,1.0,0.20813,0.0,1.0,0.0,0.0,1.0,1.0,1.0
max,3.0,0.356798,0.755711,1.428216,1.441011,0.2939815,1.0,1.0,99.537796,104.097303,...,1.0,1.0,0.979251,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [9]:
print("Eventos de hipoxia:", df['hipoxia_sostenida'].sum())
print("Eventos de hipoventilación:", df['hipovent_sostenido'].sum())
print("Eventos de inmovilidad:", df['inmovilidad_sostenida'].sum())
print("Eventos de fragmentación de sueño:", df['frag_sueno_sostenido'].sum())
print("Eventos de empeoramiento global:", df['empeoramiento'].sum())


Eventos de hipoxia: 15660.0
Eventos de hipoventilación: 22220.0
Eventos de inmovilidad: 11312
Eventos de fragmentación de sueño: 41520.0
Eventos de empeoramiento global: 56681


In [13]:
if 'timestamp' not in df.columns:
    df['timestamp'] = pd.to_datetime(df.index, unit='s')

# Configuración del gráfico
plt.figure(figsize=(15, 6))


# Graficar cada evento como área binaria
plt.fill_between(df['timestamp'], df['evento_hipoxia'], step='pre', alpha=0.4, label='Hipoxia')
plt.fill_between(df['timestamp'], df['evento_hipoventilacion'], step='pre', alpha=0.4, label='Hipoventilación')
plt.fill_between(df['timestamp'], df['evento_inmovilidad'], step='pre', alpha=0.4, label='Inmovilidad')
plt.fill_between(df['timestamp'], df['evento_fragmentacion'], step='pre', alpha=0.4, label='Fragmentación de sueño')
plt.fill_between(df['timestamp'], df['empeoramiento'], step='pre', alpha=0.6, color='black', label='Empeoramiento global')

# Detalles de la gráfica
plt.title('Distribución temporal de eventos clínicos simulados en pacientes con ELA', fontsize=14)
plt.xlabel('Tiempo', fontsize=12)
plt.ylabel('Presencia del evento (1 = sí)', fontsize=12)
plt.legend(loc='upper right', fontsize=10)
plt.tight_layout()
plt.show()


KeyboardInterrupt: 