Generación de datos fisiológicos sintéticos que respeten patrones estables, como ritmo circadiano y estados de sueño.

In [1]:
import numpy as np
import pandas as pd
import random
import os
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, Tuple, List
from textwrap import fill

## User profile

In [2]:
def get_demographic_distributions():
    """Define distribuciones demográficas base"""
    return {
        'age_groups': ['young', 'middle_aged', 'senior'],
        'age_probabilities': [0.30, 0.45, 0.25],
        
        'genders': ['male', 'female'],
        'gender_probabilities': [0.49, 0.51],
        
        'activity_levels': ['sedentary', 'light', 'moderate', 'high'],
        'activity_probabilities': [0.30, 0.35, 0.25, 0.10],
        
        # Probabilidades de tabaquismo por grupo de edad
        'smoking_by_age': {
            'young': {'non_smoker': 0.85, 'occasional': 0.06, 'regular': 0.06, 'heavy': 0.03},
            'middle_aged': {'non_smoker': 0.80, 'occasional': 0.08, 'regular': 0.08, 'heavy': 0.04},
            'senior': {'non_smoker': 0.90, 'occasional': 0.04, 'regular': 0.04, 'heavy': 0.02}
        },
        
        # Probabilidades de consumo de alcohol por grupo de edad
        'alcohol_by_age': {
            'young': {'none': 0.25, 'light': 0.40, 'moderate': 0.25, 'heavy': 0.10},
            'middle_aged': {'none': 0.35, 'light': 0.35, 'moderate': 0.20, 'heavy': 0.10},
            'senior': {'none': 0.50, 'light': 0.30, 'moderate': 0.15, 'heavy': 0.05}
        },
        
        # Probabilidades de diabetes por grupo de edad
        'diabetes_by_age': {
            'young': {'no': 0.95, 'yes': 0.05},
            'middle_aged': {'no': 0.85, 'yes': 0.15},
            'senior': {'no': 0.70, 'yes': 0.30}
        },
        
        # Probabilidades de hipertensión por grupo de edad
        'hypertension_by_age': {
            'young': {'no': 0.90, 'yes': 0.10},
            'middle_aged': {'no': 0.65, 'yes': 0.35},
            'senior': {'no': 0.40, 'yes': 0.60}
        }
    }

In [3]:
def generate_demographic_profile():
    """Genera un perfil demográfico aleatorio"""
    demo = get_demographic_distributions()
    
    # Seleccionar edad
    age_group = np.random.choice(demo['age_groups'], p=demo['age_probabilities'])
    
    # Seleccionar género
    gender = np.random.choice(demo['genders'], p=demo['gender_probabilities'])
    
    # Seleccionar nivel de actividad
    activity_level = np.random.choice(demo['activity_levels'], p=demo['activity_probabilities'])
    
    # Seleccionar tabaquismo basado en edad
    smoking_dist = demo['smoking_by_age'][age_group]
    smoking_status = np.random.choice(list(smoking_dist.keys()), p=list(smoking_dist.values()))
    
    # Seleccionar alcohol basado en edad
    alcohol_dist = demo['alcohol_by_age'][age_group]
    alcohol_consumption = np.random.choice(list(alcohol_dist.keys()), p=list(alcohol_dist.values()))
    
    # Seleccionar diabetes basado en edad
    diabetes_dist = demo['diabetes_by_age'][age_group]
    diabetes = np.random.choice(list(diabetes_dist.keys()), p=list(diabetes_dist.values()))
    
    # Seleccionar hipertensión basado en edad
    hyper_dist = demo['hypertension_by_age'][age_group]
    hypertension = np.random.choice(list(hyper_dist.keys()), p=list(hyper_dist.values()))
    
    return {
        'age_group': age_group,
        'gender': gender,
        'physical_activity_level': activity_level,
        'smoking_status': smoking_status,
        'alcohol_consumption': alcohol_consumption,
        'diabetes': diabetes,
        'hypertension': hypertension
    }

In [4]:
def generate_user_profiles_dataset(num_users: int, filename: str = "user_profiles.csv", 
                                 overwrite: bool = True) -> pd.DataFrame:
    """
    Genera un dataset con perfiles demográficos de usuarios
    """
    
    # Verificar si el archivo existe
    if os.path.exists(filename):
        if not overwrite:
            response = input(f"El archivo '{filename}' ya existe. ¿Desea reemplazarlo? (s/n): ")
            if response.lower() not in ['s', 'si', 'sí', 'y', 'yes']:
                print("Operación cancelada.")
                return None
        
        print(f"Reemplazando archivo existente: {filename}")
    
    print(f"Generando {num_users} perfiles de usuario...")
    
    # Generar perfiles
    profiles = []
    for user_id in range(1, num_users + 1):
        profile = generate_demographic_profile()
        profile['user_id'] = user_id
        profiles.append(profile)
        
        # Mostrar progreso cada 1000 usuarios
        if user_id % 1000 == 0 or user_id == num_users:
            print(f"Progreso: {user_id}/{num_users} usuarios generados")
    
    # Crear DataFrame
    df = pd.DataFrame(profiles)
    
    # Reordenar columnas para que user_id sea la primera
    columns_order = ['user_id', 'age_group', 'gender', 'physical_activity_level', 
                    'smoking_status', 'alcohol_consumption', 'diabetes', 'hypertension']
    df = df[columns_order]
    
    # Guardar en CSV
    try:
        df.to_csv(filename, index=False, encoding='utf-8')
        print(f"Dataset guardado exitosamente en: {filename}")
    except Exception as e:
        print(f"Error al guardar el archivo: {e}")
        return df
    
    return df

In [5]:
df_test = generate_user_profiles_dataset(
        num_users=100, 
        filename="test_profiles.csv",
        overwrite=True
    )
df_test.head(20)

Reemplazando archivo existente: test_profiles.csv
Generando 100 perfiles de usuario...
Progreso: 100/100 usuarios generados
Dataset guardado exitosamente en: test_profiles.csv


Unnamed: 0,user_id,age_group,gender,physical_activity_level,smoking_status,alcohol_consumption,diabetes,hypertension
0,1,senior,female,moderate,non_smoker,none,yes,yes
1,2,middle_aged,male,sedentary,non_smoker,none,no,no
2,3,middle_aged,female,light,regular,none,no,no
3,4,senior,male,high,non_smoker,none,no,no
4,5,middle_aged,male,moderate,non_smoker,heavy,no,yes
5,6,middle_aged,male,moderate,non_smoker,none,no,yes
6,7,young,male,light,non_smoker,light,no,no
7,8,young,female,sedentary,non_smoker,heavy,no,no
8,9,senior,male,sedentary,non_smoker,moderate,no,yes
9,10,middle_aged,female,light,non_smoker,light,yes,no


================================================================================================================

## Heart Rate Simulation

In [6]:
plot_transition_heatmap_with_profile(
    user_id=random.choice(df_test.user_id.tolist()),
    matrices_by_user=matrices,
    df_profiles=df_test,
    id_col="user_id",
    states=states_day_exercise,
    hour=14,
    time_in_state=10,
    figsize=(12, 8)
)

NameError: name 'plot_transition_heatmap_with_profile' is not defined

================================================================================================================

In [6]:
calibration_params = {
    'population_stats': {
        'smoking_rates_by_age': {
            'young': 0.15,
            'middle_aged': 0.20,
            'senior': 0.10
        },
        'hypertension_by_age': {
            'young': 0.10,
            'middle_aged': 0.35,
            'senior': 0.60
        }
    },
    'validation_targets': {
        'avg_sleep_duration': (7, 9),
        'rem_percentage': (20, 25),
        'deep_sleep_percentage': (15, 20),
        'awakenings_per_night': (2, 5)
    }
}


In [9]:
physiological_constraints = {
    'min_deep_sleep_duration': 2,
    'max_consecutive_awake': 18,
    'rem_cycle_timing': 90,
    'exercise_recovery_time': 2
}

## Actividad Física
Aguda (post-ejercicio):
- ↑ temperatura corporal → retraso inicio sueño si es tardío
- ↑ adenosina muscular → ↑ presión sueño profundo
- ↑ hormona crecimiento → ↑ sueño N3

Crónica (entrenamiento regular):
- ↑ eficiencia sueño, ↑ sueño profundo
- ↓ latencia sueño, ↓ despertares nocturnos
- Mejor termorregulación nocturna

## Ciclos de Sueño Normal
- Duración del ciclo: 90-120 minutos
- Progresión típica: Vigilia → N1 (sueño ligero) → N2 → N3 (sueño profundo) → REM
- Distribución nocturna:
    - Primera mitad: predomina sueño profundo
    - Segunda mitad: predomina sueño REM
    - REM aumenta hacia la madrugada (pico de cortisol y temperatura corporal mínima)

In [None]:
# Matriz de transición fisiológicamente correcta

sleep_transitions = {
    'awake': {
        'awake': 0.70, 
        'light': 0.30,
        'deep': 0.00,
        'rem': 0.00
    },
    'light': {
        'awake': 0.20, 
        'light': 0.35, 
        'deep': 0.35,
        'rem': 0.10
    },
    'deep': {
        'awake': 0.10,
        'light': 0.85,
        'deep': 0.05, 
        'rem': 0.00
    },
    'rem': {
        'awake': 0.25,
        'light': 0.70,
        'deep': 0.00,
        'rem': 0.05
    }
}

Cambios arquitectura sueño:
- ↓ sueño profundo: 2% por década después 30 años
- ↑ despertares nocturnos
- ↓ eficiencia sueño
- Adelanto fase circadiana (↑ matutinidad)

In [None]:
transition_modifiers = {
    'heart_rate_influence': {
        'high_hr_threshold': 100,
        'very_high_hr_threshold': 130,
        'high_hr_effects': {
            'awake': {'awake': 0.1, 'light': -0.05},
            'light': {'awake': 0.1, 'light': 0.05, 'deep': -0.1, 'rem': -0.05},
            'deep': {'awake': 0.1, 'light': 0.1, 'deep': -0.2},
            'rem': {'awake': 0.15, 'light': 0.1, 'rem': -0.25}
        }
    },
    
    'static_influence': {
        'smoking_regular_heavy': {
            'deep': {'deep': -0.1, 'light': 0.05, 'awake': 0.05}
        },
        'alcohol_moderate_heavy': {
            'rem': {'rem': -0.1, 'light': 0.1}
        },
        'hypertension': {
            'all_states': {'awake': 0.05}
        }
    },
    
    'circadian_influence': {
        'night_hours': (22, 6),
        'day_hours': (6, 22),
        'night_sleep_boost': 1.5,
        'day_wake_boost': 1.3
    }
}

In [19]:
def generate_age_adjusted_probabilities(age_group: str) -> Dict:
        """Genera probabilidades ajustadas por edad para tabaquismo e hipertensión"""
        # Tabaquismo ajustado por edad
        smoking_base_rate = calibration_params['population_stats']['smoking_rates_by_age'][age_group]
        smoking_probs = [
            1 - smoking_base_rate,  # non_smoker
            smoking_base_rate * 0.4,  # occasional
            smoking_base_rate * 0.4,  # regular
            smoking_base_rate * 0.2   # heavy
        ]
        
        # Hipertensión ajustada por edad
        hypertension_rate = calibration_params['population_stats']['hypertension_by_age'][age_group]
        hypertension_probs = [1 - hypertension_rate, hypertension_rate]  # no, yes
        
        return {
            'smoking_status': {
                'states': ['non_smoker', 'occasional', 'regular', 'heavy'],
                'probabilities': smoking_probs
            },
            'hypertension': {
                'states': ['no', 'yes'],
                'probabilities': hypertension_probs
            }
        }
    

In [20]:
def generate_static_profile() -> Dict:
        """Genera perfil estático de un usuario con dependencias por edad"""
        profile = {}
        
        # Primero generar edad (independiente)
        profile['age_group'] = np.random.choice(
            static_variables_base['age_group']['states'],
            p=static_variables_base['age_group']['probabilities']
        )
        
        # Generar variables independientes de la edad
        for var_name in ['gender', 'physical_activity_level', 'alcohol_consumption']:
            config = static_variables_base[var_name]
            profile[var_name] = np.random.choice(config['states'], p=config['probabilities'])
        
        # Generar variables dependientes de la edad
        age_adjusted = generate_age_adjusted_probabilities(profile['age_group'])
        
        for var_name, config in age_adjusted.items():
            profile[var_name] = np.random.choice(config['states'], p=config['probabilities'])
        
        return profile

In [21]:
def calculate_hr_modifier(profile: Dict, sleep_state: str = 'rest') -> float:
        """Calcula modificador de FC basado en perfil estático"""
        modifier = 0
        
        for var_name, value in profile.items():
            if var_name in heart_rate_config['static_modifiers']:
                var_modifiers = heart_rate_config['static_modifiers'][var_name]
                if value in var_modifiers:
                    if sleep_state in ['light', 'deep', 'rem']:
                        modifier += var_modifiers[value].get('sleep', 0)
                    else:
                        modifier += var_modifiers[value].get('rest', 0)
        
        return modifier

In [22]:
def get_circadian_modifier(hour: int) -> Dict:
        """Obtiene modificadores circadianos según la hora"""
        night_start, night_end = transition_modifiers['circadian_influence']['night_hours']
        
        if night_start <= hour or hour <= night_end:  # Horario nocturno
            return {
                'sleep_propensity': transition_modifiers['circadian_influence']['night_sleep_boost'],
                'exercise_probability': 0.01,
                'hr_reduction': 0.9
            }
        else:  # Horario diurno
            return {
                'sleep_propensity': 1.0 / transition_modifiers['circadian_influence']['day_wake_boost'],
                'exercise_probability': 0.08,
                'hr_reduction': 1.0
            }

In [23]:
def modify_transition_matrix(base_matrix: Dict, profile: Dict, hour: int, current_hr: float) -> Dict:
        """Modifica matriz de transición basada en contexto"""
        modified = {}
        
        # Copiar matriz base
        for from_state, transitions in base_matrix.items():
            modified[from_state] = transitions.copy()
        
        # Aplicar modificadores de FC
        hr_threshold = transition_modifiers['heart_rate_influence']['high_hr_threshold']
        if current_hr > hr_threshold:
            hr_effects = transition_modifiers['heart_rate_influence']['high_hr_effects']
            for from_state in modified:
                if from_state in hr_effects:
                    for to_state, effect in hr_effects[from_state].items():
                        if to_state in modified[from_state]:
                            modified[from_state][to_state] += effect
        
        # Aplicar modificadores circadianos
        circadian = get_circadian_modifier(hour)
        night_start, night_end = transition_modifiers['circadian_influence']['night_hours']
        
        if night_start <= hour or hour <= night_end:  # Noche
            for from_state in modified:
                if from_state == 'awake':
                    modified[from_state]['light'] *= circadian['sleep_propensity']
                elif from_state in ['light', 'deep', 'rem']:
                    modified[from_state]['awake'] *= (1.0 / circadian['sleep_propensity'])
        
        # Aplicar modificadores por perfil estático
        static_effects = transition_modifiers['static_influence']
        
        # Tabaquismo
        if profile['smoking_status'] in ['regular', 'heavy']:
            if 'deep' in modified:
                for to_state, effect in static_effects['smoking_regular_heavy']['deep'].items():
                    if to_state in modified['deep']:
                        modified['deep'][to_state] += effect
        
        # Alcohol
        if profile['alcohol_consumption'] in ['moderate', 'heavy']:
            if 'rem' in modified:
                for to_state, effect in static_effects['alcohol_moderate_heavy']['rem'].items():
                    if to_state in modified['rem']:
                        modified['rem'][to_state] += effect
        
        # Hipertensión
        if profile['hypertension'] == 'yes':
            awake_boost = static_effects['hypertension']['all_states']['awake']
            for from_state in modified:
                modified[from_state]['awake'] += awake_boost
        
        # Normalizar probabilidades
        for from_state in modified:
            # Asegurar valores no negativos
            for to_state in modified[from_state]:
                modified[from_state][to_state] = max(0, modified[from_state][to_state])
            
            # Normalizar para que sumen 1
            total = sum(modified[from_state].values())
            if total > 0:
                for to_state in modified[from_state]:
                    modified[from_state][to_state] /= total
            else:
                # Si todas son 0, usar distribución uniforme
                n_states = len(modified[from_state])
                for to_state in modified[from_state]:
                    modified[from_state][to_state] = 1.0 / n_states
        
        return modified

In [24]:
def generate_hr_value(sleep_state: str, profile: Dict, hour: int, 
                         is_exercise: bool = False, sensor_failure: bool = False) -> float:
        """Genera valor de FC basado en estado y contexto"""
        if sensor_failure:
            return np.nan
        
        # Determinar rango base según estado
        if sleep_state == 'awake':
            if is_exercise:
                base_range = heart_rate_config['base_ranges']['awake_high_activity']
            else:
                base_range = heart_rate_config['base_ranges']['awake_rest']
        elif sleep_state == 'light':
            base_range = heart_rate_config['base_ranges']['light_sleep']
        elif sleep_state == 'deep':
            base_range = heart_rate_config['base_ranges']['deep_sleep']
        else:  # rem
            base_range = heart_rate_config['base_ranges']['rem_sleep']
        
        # Valor base
        base_hr = np.random.normal(
            (base_range[0] + base_range[1]) / 2,
            (base_range[1] - base_range[0]) / 4
        )
        
        # Aplicar modificadores estáticos
        static_modifier = calculate_hr_modifier(profile, sleep_state)
        
        # Aplicar modificadores circadianos
        circadian = get_circadian_modifier(hour)
        
        # Calcular FC final
        final_hr = (base_hr + static_modifier) * circadian['hr_reduction']
        
        # Añadir ruido
        final_hr += np.random.normal(0, 2)
        
        # Límites fisiológicos
        final_hr = max(35, min(220, final_hr))
        
        return round(final_hr)

In [25]:
def simulate_user(user_id: int, hours: int = 168) -> Tuple[pd.DataFrame, pd.DataFrame]:
        """Simula un usuario completo"""
        # Generar perfil estático
        profile = generate_static_profile()
        
        # Inicializar series temporales
        timestamps = []
        sleep_states = []
        heart_rates = []
        
        # Estado inicial
        current_sleep_state = 'awake'
        start_time = datetime(2024, 1, 1, 8, 0)
        
        # Variables para eventos especiales
        exercise_remaining = 0
        sensor_failure_remaining = 0
        
        for hour in range(hours):
            current_time = start_time + timedelta(hours=hour)
            timestamps.append(current_time)
            
            # Determinar eventos especiales
            circadian = get_circadian_modifier(current_time.hour)
            
            # Ejercicio
            is_exercise = False
            if exercise_remaining > 0:
                is_exercise = True
                exercise_remaining -= 1
            elif np.random.random() < circadian['exercise_probability']:
                duration_range = heart_rate_config['anomalies']['exercise_duration']
                exercise_duration = np.random.uniform(duration_range[0], duration_range[1])
                exercise_remaining = max(0, int(exercise_duration) - 1)
                is_exercise = True
            
            # Fallo del sensor
            sensor_failure = False
            if sensor_failure_remaining > 0:
                sensor_failure = True
                sensor_failure_remaining -= 1
            elif np.random.random() < heart_rate_config['anomalies']['sensor_failure']:
                duration_range = heart_rate_config['anomalies']['duration_range']
                failure_duration = np.random.randint(duration_range[0], duration_range[1] + 1)
                sensor_failure_remaining = failure_duration - 1
                sensor_failure = True
            
            # Generar FC
            recent_hr = 70  # Valor por defecto
            if len(heart_rates) > 0 and not np.isnan(heart_rates[-1]):
                recent_hr = heart_rates[-1]
            
            hr_value = generate_hr_value(
                current_sleep_state, profile, current_time.hour, 
                is_exercise, sensor_failure
            )
            heart_rates.append(hr_value)
            
            # Transición de estado de sueño
            modified_transitions = modify_transition_matrix(
                sleep_transitions, profile, current_time.hour, 
                hr_value if not np.isnan(hr_value) else recent_hr
            )
            
            # Ejecutar transición
            current_transitions = modified_transitions[current_sleep_state]
            next_state = np.random.choice(
                list(current_transitions.keys()),
                p=list(current_transitions.values())
            )
            
            sleep_states.append(current_sleep_state)
            current_sleep_state = next_state
        
        # Crear DataFrames
        profile_df = pd.DataFrame([profile])
        profile_df['user_id'] = user_id
        
        timeseries_df = pd.DataFrame({
            'user_id': user_id,
            'timestamp': timestamps,
            'sleep_state': sleep_states,
            'heart_rate': heart_rates
        })
        
        return profile_df, timeseries_df

In [26]:
def simulate_multiple_users(n_users: int = 5, hours: int = 168) -> Tuple[pd.DataFrame, pd.DataFrame]:
        """Simula múltiples usuarios"""
        all_profiles = []
        all_timeseries = []
        
        for user_id in range(1, n_users + 1):
            print(f"Simulando usuario {user_id}...")
            profile_df, timeseries_df = simulate_user(user_id, hours)
            all_profiles.append(profile_df)
            all_timeseries.append(timeseries_df)
        
        profiles_combined = pd.concat(all_profiles, ignore_index=True)
        timeseries_combined = pd.concat(all_timeseries, ignore_index=True)
        
        return profiles_combined, timeseries_combined

In [27]:
def validate_simulation(timeseries_df: pd.DataFrame) -> Dict:
        """Valida que la simulación cumpla con los targets fisiológicos"""
        validation_results = {}
        
        for user_id in timeseries_df['user_id'].unique():
            user_data = timeseries_df[timeseries_df['user_id'] == user_id]
            
            # Calcular métricas de sueño por día
            daily_metrics = []
            for day in range(7):  # 7 días
                day_start = day * 24
                day_end = (day + 1) * 24
                day_data = user_data.iloc[day_start:day_end]
                
                sleep_data = day_data[day_data['sleep_state'] != 'awake']
                if len(sleep_data) > 0:
                    sleep_duration = len(sleep_data)
                    rem_hours = len(day_data[day_data['sleep_state'] == 'rem'])
                    deep_hours = len(day_data[day_data['sleep_state'] == 'deep'])
                    
                    daily_metrics.append({
                        'sleep_duration': sleep_duration,
                        'rem_percentage': (rem_hours / sleep_duration * 100) if sleep_duration > 0 else 0,
                        'deep_percentage': (deep_hours / sleep_duration * 100) if sleep_duration > 0 else 0
                    })
            
            if daily_metrics:
                avg_sleep = np.mean([d['sleep_duration'] for d in daily_metrics])
                avg_rem = np.mean([d['rem_percentage'] for d in daily_metrics])
                avg_deep = np.mean([d['deep_percentage'] for d in daily_metrics])
                
                validation_results[f'user_{user_id}'] = {
                    'avg_sleep_duration': avg_sleep,
                    'rem_percentage': avg_rem,
                    'deep_percentage': avg_deep,
                    'meets_sleep_target': (
                        calibration_params['validation_targets']['avg_sleep_duration'][0] <= 
                        avg_sleep <= 
                        calibration_params['validation_targets']['avg_sleep_duration'][1]
                    ),
                    'meets_rem_target': (
                        calibration_params['validation_targets']['rem_percentage'][0] <= 
                        avg_rem <= 
                        calibration_params['validation_targets']['rem_percentage'][1]
                    ),
                    'meets_deep_target': (
                        calibration_params['validation_targets']['deep_sleep_percentage'][0] <= 
                        avg_deep <= 
                        calibration_params['validation_targets']['deep_sleep_percentage'][1]
                    )
                }
        
        return validation_results

In [None]:
# Generar datos para 5 usuarios durante 7 días
print("Generando datos sintéticos fisiológicamente precisos...")
profiles_df, timeseries_df = simulate_multiple_users(n_users=5, hours=168)

# Mostrar perfiles estáticos
print("\n=== PERFILES ESTÁTICOS ===")
print(profiles_df.to_string(index=False))

# Validar simulación
print("\n=== VALIDACIÓN FISIOLÓGICA ===")
validation_results = validate_simulation(timeseries_df)

for user, metrics in validation_results.items():
    print(f"\n{user.upper()}:")
    print(f"  Duración sueño promedio: {metrics['avg_sleep_duration']:.1f}h ✓" if metrics['meets_sleep_target'] else f"  Duración sueño promedio: {metrics['avg_sleep_duration']:.1f}h ✗")
    print(f"  % REM: {metrics['rem_percentage']:.1f}% ✓" if metrics['meets_rem_target'] else f"  % REM: {metrics['rem_percentage']:.1f}% ✗")
    print(f"  % Profundo: {metrics['deep_percentage']:.1f}% ✓" if metrics['meets_deep_target'] else f"  % Profundo: {metrics['deep_percentage']:.1f}% ✗")

# Mostrar estadísticas de FC
print("\n=== ESTADÍSTICAS FRECUENCIA CARDÍACA ===")
for user_id in range(1, 6):
    user_data = timeseries_df[timeseries_df['user_id'] == user_id]
    hr_stats = user_data['heart_rate'].describe()
    
    print(f"\nUsuario {user_id}:")
    print(f"  FC promedio: {hr_stats['mean']:.1f} ± {hr_stats['std']:.1f} lpm")
    print(f"  FC rango: {hr_stats['min']:.0f} - {hr_stats['max']:.0f} lpm")
    print(f"  Valores perdidos: {user_data['heart_rate'].isna().sum()}")

# Guardar datos
profiles_df.to_csv('physiological_user_profiles.csv', index=False)
timeseries_df.to_csv('physiological_timeseries_data.csv', index=False)
print(f"Datos guardados exitosamente")

Generando datos sintéticos fisiológicamente precisos...
Simulando usuario 1...


KeyError: 'circadian_influence'

In [29]:
import numpy as np
import pandas as pd
import random
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, Tuple, List

class PhysiologicalSleepHRSimulator:
    def __init__(self):
        # Parámetros de calibración corregidos
        self.calibration_params = {
            'population_stats': {
                'smoking_rates_by_age': {
                    'young': 0.15,
                    'middle_aged': 0.20,
                    'senior': 0.10
                },
                'hypertension_by_age': {
                    'young': 0.10,
                    'middle_aged': 0.35,
                    'senior': 0.60
                }
            },
            'validation_targets': {
                'avg_sleep_duration': (7, 9),
                'rem_percentage': (20, 25),
                'deep_sleep_percentage': (15, 20),
                'awakenings_per_night': (2, 5)
            }
        }
        
        # Variables estáticas base (se ajustarán por edad)
        self.static_variables_base = {
            'gender': {
                'states': ['male', 'female'],
                'probabilities': [0.49, 0.51]
            },
            'physical_activity_level': {
                'states': ['sedentary', 'light', 'moderate', 'high'],
                'probabilities': [0.30, 0.35, 0.25, 0.10]
            },
            'alcohol_consumption': {
                'states': ['none', 'light', 'moderate', 'heavy'],
                'probabilities': [0.25, 0.45, 0.25, 0.05]
            },
            'age_group': {
                'states': ['young', 'middle_aged', 'senior'],
                'probabilities': [0.30, 0.45, 0.25]
            }
        }
        
        # Configuración de FC corregida
        self.heart_rate_config = {
            'base_ranges': {
                'awake_rest': (60, 80),
                'awake_light_activity': (80, 120),
                'awake_moderate_activity': (120, 150),
                'awake_high_activity': (150, 180),
                'light_sleep': (50, 70),
                'deep_sleep': (45, 60),
                'rem_sleep': (55, 75)
            },
            
            'static_modifiers': {
                'smoking_status': {
                    'regular': {'rest': +5, 'activity': +10, 'sleep': +3},
                    'heavy': {'rest': +10, 'activity': +15, 'sleep': +5},
                    'occasional': {'rest': +2, 'activity': +3, 'sleep': +1},
                    'non_smoker': {'rest': 0, 'activity': 0, 'sleep': 0}
                },
                'physical_activity_level': {
                    'high': {'rest': -5, 'recovery': 0.8, 'sleep': -3},
                    'moderate': {'rest': -2, 'recovery': 0.9, 'sleep': -1},
                    'light': {'rest': 0, 'recovery': 1.0, 'sleep': 0},
                    'sedentary': {'rest': +3, 'recovery': 1.2, 'sleep': +2}
                },
                'hypertension': {
                    'yes': {'rest': +8, 'activity': +12, 'sleep': +5},
                    'no': {'rest': 0, 'activity': 0, 'sleep': 0}
                },
                'age_group': {
                    'young': {'rest': -3, 'sleep': -2},
                    'middle_aged': {'rest': 0, 'sleep': 0},
                    'senior': {'rest': +5, 'sleep': +3}
                }
            },
            
            'anomalies': {
                'sensor_failure': 0.02,
                'duration_range': (1, 4),
                'exercise_bout': 0.05,
                'exercise_duration': (0.5, 2.0)
            }
        }
        
        # Matriz de transición fisiológicamente correcta
        self.sleep_transitions = {
            'awake': {
                'awake': 0.70, 
                'light': 0.30,
                'deep': 0.00,
                'rem': 0.00
            },
            'light': {
                'awake': 0.20, 
                'light': 0.35, 
                'deep': 0.35,
                'rem': 0.10
            },
            'deep': {
                'awake': 0.10,
                'light': 0.85,
                'deep': 0.05, 
                'rem': 0.00
            },
            'rem': {
                'awake': 0.25,
                'light': 0.70,
                'deep': 0.00,
                'rem': 0.05
            }
        }
        
        # Modificadores de transición
        self.transition_modifiers = {
            'heart_rate_influence': {
                'high_hr_threshold': 100,
                'very_high_hr_threshold': 130,
                'high_hr_effects': {
                    'awake': {'awake': 0.1, 'light': -0.05},
                    'light': {'awake': 0.1, 'light': 0.05, 'deep': -0.1, 'rem': -0.05},
                    'deep': {'awake': 0.1, 'light': 0.1, 'deep': -0.2},
                    'rem': {'awake': 0.15, 'light': 0.1, 'rem': -0.25}
                }
            },
            
            'static_influence': {
                'smoking_regular_heavy': {
                    'deep': {'deep': -0.1, 'light': 0.05, 'awake': 0.05}
                },
                'alcohol_moderate_heavy': {
                    'rem': {'rem': -0.1, 'light': 0.1}
                },
                'hypertension': {
                    'all_states': {'awake': 0.05}
                }
            },
            
            'circadian_influence': {
                'night_hours': (22, 6),
                'day_hours': (6, 22),
                'night_sleep_boost': 1.5,
                'day_wake_boost': 1.3
            }
        }
        
        # Restricciones fisiológicas
        self.physiological_constraints = {
            'min_deep_sleep_duration': 2,
            'max_consecutive_awake': 18,
            'rem_cycle_timing': 90,
            'exercise_recovery_time': 2
        }
    
    def generate_age_adjusted_probabilities(self, age_group: str) -> Dict:
        """Genera probabilidades ajustadas por edad para tabaquismo e hipertensión"""
        # Tabaquismo ajustado por edad
        smoking_base_rate = self.calibration_params['population_stats']['smoking_rates_by_age'][age_group]
        smoking_probs = [
            1 - smoking_base_rate,  # non_smoker
            smoking_base_rate * 0.4,  # occasional
            smoking_base_rate * 0.4,  # regular
            smoking_base_rate * 0.2   # heavy
        ]
        
        # Hipertensión ajustada por edad
        hypertension_rate = self.calibration_params['population_stats']['hypertension_by_age'][age_group]
        hypertension_probs = [1 - hypertension_rate, hypertension_rate]  # no, yes
        
        return {
            'smoking_status': {
                'states': ['non_smoker', 'occasional', 'regular', 'heavy'],
                'probabilities': smoking_probs
            },
            'hypertension': {
                'states': ['no', 'yes'],
                'probabilities': hypertension_probs
            }
        }
    
    def generate_static_profile(self) -> Dict:
        """Genera perfil estático de un usuario con dependencias por edad"""
        profile = {}
        
        # Primero generar edad (independiente)
        profile['age_group'] = np.random.choice(
            self.static_variables_base['age_group']['states'],
            p=self.static_variables_base['age_group']['probabilities']
        )
        
        # Generar variables independientes de la edad
        for var_name in ['gender', 'physical_activity_level', 'alcohol_consumption']:
            config = self.static_variables_base[var_name]
            profile[var_name] = np.random.choice(config['states'], p=config['probabilities'])
        
        # Generar variables dependientes de la edad
        age_adjusted = self.generate_age_adjusted_probabilities(profile['age_group'])
        
        for var_name, config in age_adjusted.items():
            profile[var_name] = np.random.choice(config['states'], p=config['probabilities'])
        
        return profile
    
    def calculate_hr_modifier(self, profile: Dict, sleep_state: str = 'rest') -> float:
        """Calcula modificador de FC basado en perfil estático"""
        modifier = 0
        
        for var_name, value in profile.items():
            if var_name in self.heart_rate_config['static_modifiers']:
                var_modifiers = self.heart_rate_config['static_modifiers'][var_name]
                if value in var_modifiers:
                    if sleep_state in ['light', 'deep', 'rem']:
                        modifier += var_modifiers[value].get('sleep', 0)
                    else:
                        modifier += var_modifiers[value].get('rest', 0)
        
        return modifier
    
    def get_circadian_modifier(self, hour: int) -> Dict:
        """Obtiene modificadores circadianos según la hora"""
        night_start, night_end = self.transition_modifiers['circadian_influence']['night_hours']
        
        if night_start <= hour or hour <= night_end:  # Horario nocturno
            return {
                'sleep_propensity': self.transition_modifiers['circadian_influence']['night_sleep_boost'],
                'exercise_probability': 0.01,
                'hr_reduction': 0.9
            }
        else:  # Horario diurno
            return {
                'sleep_propensity': 1.0 / self.transition_modifiers['circadian_influence']['day_wake_boost'],
                'exercise_probability': 0.08,
                'hr_reduction': 1.0
            }
    
    def modify_transition_matrix(self, base_matrix: Dict, profile: Dict, hour: int, current_hr: float) -> Dict:
        """Modifica matriz de transición basada en contexto"""
        modified = {}
        
        # Copiar matriz base
        for from_state, transitions in base_matrix.items():
            modified[from_state] = transitions.copy()
        
        # Aplicar modificadores de FC
        hr_threshold = self.transition_modifiers['heart_rate_influence']['high_hr_threshold']
        if current_hr > hr_threshold:
            hr_effects = self.transition_modifiers['heart_rate_influence']['high_hr_effects']
            for from_state in modified:
                if from_state in hr_effects:
                    for to_state, effect in hr_effects[from_state].items():
                        if to_state in modified[from_state]:
                            modified[from_state][to_state] += effect
        
        # Aplicar modificadores circadianos
        circadian = self.get_circadian_modifier(hour)
        night_start, night_end = self.transition_modifiers['circadian_influence']['night_hours']
        
        if night_start <= hour or hour <= night_end:  # Noche
            for from_state in modified:
                if from_state == 'awake':
                    modified[from_state]['light'] *= circadian['sleep_propensity']
                elif from_state in ['light', 'deep', 'rem']:
                    modified[from_state]['awake'] *= (1.0 / circadian['sleep_propensity'])
        
        # Aplicar modificadores por perfil estático
        static_effects = self.transition_modifiers['static_influence']
        
        # Tabaquismo
        if profile['smoking_status'] in ['regular', 'heavy']:
            if 'deep' in modified:
                for to_state, effect in static_effects['smoking_regular_heavy']['deep'].items():
                    if to_state in modified['deep']:
                        modified['deep'][to_state] += effect
        
        # Alcohol
        if profile['alcohol_consumption'] in ['moderate', 'heavy']:
            if 'rem' in modified:
                for to_state, effect in static_effects['alcohol_moderate_heavy']['rem'].items():
                    if to_state in modified['rem']:
                        modified['rem'][to_state] += effect
        
        # Hipertensión
        if profile['hypertension'] == 'yes':
            awake_boost = static_effects['hypertension']['all_states']['awake']
            for from_state in modified:
                modified[from_state]['awake'] += awake_boost
        
        # Normalizar probabilidades
        for from_state in modified:
            # Asegurar valores no negativos
            for to_state in modified[from_state]:
                modified[from_state][to_state] = max(0, modified[from_state][to_state])
            
            # Normalizar para que sumen 1
            total = sum(modified[from_state].values())
            if total > 0:
                for to_state in modified[from_state]:
                    modified[from_state][to_state] /= total
            else:
                # Si todas son 0, usar distribución uniforme
                n_states = len(modified[from_state])
                for to_state in modified[from_state]:
                    modified[from_state][to_state] = 1.0 / n_states
        
        return modified
    
    def generate_hr_value(self, sleep_state: str, profile: Dict, hour: int, 
                         is_exercise: bool = False, sensor_failure: bool = False) -> float:
        """Genera valor de FC basado en estado y contexto"""
        if sensor_failure:
            return np.nan
        
        # Determinar rango base según estado
        if sleep_state == 'awake':
            if is_exercise:
                base_range = self.heart_rate_config['base_ranges']['awake_high_activity']
            else:
                base_range = self.heart_rate_config['base_ranges']['awake_rest']
        elif sleep_state == 'light':
            base_range = self.heart_rate_config['base_ranges']['light_sleep']
        elif sleep_state == 'deep':
            base_range = self.heart_rate_config['base_ranges']['deep_sleep']
        else:  # rem
            base_range = self.heart_rate_config['base_ranges']['rem_sleep']
        
        # Valor base
        base_hr = np.random.normal(
            (base_range[0] + base_range[1]) / 2,
            (base_range[1] - base_range[0]) / 4
        )
        
        # Aplicar modificadores estáticos
        static_modifier = self.calculate_hr_modifier(profile, sleep_state)
        
        # Aplicar modificadores circadianos
        circadian = self.get_circadian_modifier(hour)
        
        # Calcular FC final
        final_hr = (base_hr + static_modifier) * circadian['hr_reduction']
        
        # Añadir ruido
        final_hr += np.random.normal(0, 2)
        
        # Límites fisiológicos
        final_hr = max(35, min(220, final_hr))
        
        return round(final_hr)
    
    def simulate_user(self, user_id: int, hours: int = 168) -> Tuple[pd.DataFrame, pd.DataFrame]:
        """Simula un usuario completo"""
        # Generar perfil estático
        profile = self.generate_static_profile()
        
        # Inicializar series temporales
        timestamps = []
        sleep_states = []
        heart_rates = []
        
        # Estado inicial
        current_sleep_state = 'awake'
        start_time = datetime(2024, 1, 1, 8, 0)
        
        # Variables para eventos especiales
        exercise_remaining = 0
        sensor_failure_remaining = 0
        
        for hour in range(hours):
            current_time = start_time + timedelta(hours=hour)
            timestamps.append(current_time)
            
            # Determinar eventos especiales
            circadian = self.get_circadian_modifier(current_time.hour)
            
            # Ejercicio
            is_exercise = False
            if exercise_remaining > 0:
                is_exercise = True
                exercise_remaining -= 1
            elif np.random.random() < circadian['exercise_probability']:
                duration_range = self.heart_rate_config['anomalies']['exercise_duration']
                exercise_duration = np.random.uniform(duration_range[0], duration_range[1])
                exercise_remaining = max(0, int(exercise_duration) - 1)
                is_exercise = True
            
            # Fallo del sensor
            sensor_failure = False
            if sensor_failure_remaining > 0:
                sensor_failure = True
                sensor_failure_remaining -= 1
            elif np.random.random() < self.heart_rate_config['anomalies']['sensor_failure']:
                duration_range = self.heart_rate_config['anomalies']['duration_range']
                failure_duration = np.random.randint(duration_range[0], duration_range[1] + 1)
                sensor_failure_remaining = failure_duration - 1
                sensor_failure = True
            
            # Generar FC
            recent_hr = 70  # Valor por defecto
            if len(heart_rates) > 0 and not np.isnan(heart_rates[-1]):
                recent_hr = heart_rates[-1]
            
            hr_value = self.generate_hr_value(
                current_sleep_state, profile, current_time.hour, 
                is_exercise, sensor_failure
            )
            heart_rates.append(hr_value)
            
            # Transición de estado de sueño
            modified_transitions = self.modify_transition_matrix(
                self.sleep_transitions, profile, current_time.hour, 
                hr_value if not np.isnan(hr_value) else recent_hr
            )
            
            # Ejecutar transición
            current_transitions = modified_transitions[current_sleep_state]
            next_state = np.random.choice(
                list(current_transitions.keys()),
                p=list(current_transitions.values())
            )
            
            sleep_states.append(current_sleep_state)
            current_sleep_state = next_state
        
        # Crear DataFrames
        profile_df = pd.DataFrame([profile])
        profile_df['user_id'] = user_id
        
        timeseries_df = pd.DataFrame({
            'user_id': user_id,
            'timestamp': timestamps,
            'sleep_state': sleep_states,
            'heart_rate': heart_rates
        })
        
        return profile_df, timeseries_df
    
    def simulate_multiple_users(self, n_users: int = 5, hours: int = 168) -> Tuple[pd.DataFrame, pd.DataFrame]:
        """Simula múltiples usuarios"""
        all_profiles = []
        all_timeseries = []
        
        for user_id in range(1, n_users + 1):
            print(f"Simulando usuario {user_id}...")
            profile_df, timeseries_df = self.simulate_user(user_id, hours)
            all_profiles.append(profile_df)
            all_timeseries.append(timeseries_df)
        
        profiles_combined = pd.concat(all_profiles, ignore_index=True)
        timeseries_combined = pd.concat(all_timeseries, ignore_index=True)
        
        return profiles_combined, timeseries_combined
    
    def validate_simulation(self, timeseries_df: pd.DataFrame) -> Dict:
        """Valida que la simulación cumpla con los targets fisiológicos"""
        validation_results = {}
        
        for user_id in timeseries_df['user_id'].unique():
            user_data = timeseries_df[timeseries_df['user_id'] == user_id]
            
            # Calcular métricas de sueño por día
            daily_metrics = []
            for day in range(7):  # 7 días
                day_start = day * 24
                day_end = (day + 1) * 24
                day_data = user_data.iloc[day_start:day_end]
                
                sleep_data = day_data[day_data['sleep_state'] != 'awake']
                if len(sleep_data) > 0:
                    sleep_duration = len(sleep_data)
                    rem_hours = len(day_data[day_data['sleep_state'] == 'rem'])
                    deep_hours = len(day_data[day_data['sleep_state'] == 'deep'])
                    
                    daily_metrics.append({
                        'sleep_duration': sleep_duration,
                        'rem_percentage': (rem_hours / sleep_duration * 100) if sleep_duration > 0 else 0,
                        'deep_percentage': (deep_hours / sleep_duration * 100) if sleep_duration > 0 else 0
                    })
            
            if daily_metrics:
                avg_sleep = np.mean([d['sleep_duration'] for d in daily_metrics])
                avg_rem = np.mean([d['rem_percentage'] for d in daily_metrics])
                avg_deep = np.mean([d['deep_percentage'] for d in daily_metrics])
                
                validation_results[f'user_{user_id}'] = {
                    'avg_sleep_duration': avg_sleep,
                    'rem_percentage': avg_rem,
                    'deep_percentage': avg_deep,
                    'meets_sleep_target': (
                        self.calibration_params['validation_targets']['avg_sleep_duration'][0] <= 
                        avg_sleep <= 
                        self.calibration_params['validation_targets']['avg_sleep_duration'][1]
                    ),
                    'meets_rem_target': (
                        self.calibration_params['validation_targets']['rem_percentage'][0] <= 
                        avg_rem <= 
                        self.calibration_params['validation_targets']['rem_percentage'][1]
                    ),
                    'meets_deep_target': (
                        self.calibration_params['validation_targets']['deep_sleep_percentage'][0] <= 
                        avg_deep <= 
                        self.calibration_params['validation_targets']['deep_sleep_percentage'][1]
                    )
                }
        
        return validation_results

# Ejecutar simulación
if __name__ == "__main__":
    # Crear simulador
    simulator = PhysiologicalSleepHRSimulator()
    
    # Generar datos para 5 usuarios durante 7 días
    print("Generando datos sintéticos fisiológicamente precisos...")
    profiles_df, timeseries_df = simulator.simulate_multiple_users(n_users=5, hours=168)
    
    # Mostrar perfiles estáticos
    print("\n=== PERFILES ESTÁTICOS ===")
    print(profiles_df.to_string(index=False))
    
    # Validar simulación
    print("\n=== VALIDACIÓN FISIOLÓGICA ===")
    validation_results = simulator.validate_simulation(timeseries_df)
    
    for user, metrics in validation_results.items():
        print(f"\n{user.upper()}:")
        print(f"  Duración sueño promedio: {metrics['avg_sleep_duration']:.1f}h ✓" if metrics['meets_sleep_target'] else f"  Duración sueño promedio: {metrics['avg_sleep_duration']:.1f}h ✗")
        print(f"  % REM: {metrics['rem_percentage']:.1f}% ✓" if metrics['meets_rem_target'] else f"  % REM: {metrics['rem_percentage']:.1f}% ✗")
        print(f"  % Profundo: {metrics['deep_percentage']:.1f}% ✓" if metrics['meets_deep_target'] else f"  % Profundo: {metrics['deep_percentage']:.1f}% ✗")
    
    # Mostrar estadísticas de FC
    print("\n=== ESTADÍSTICAS FRECUENCIA CARDÍACA ===")
    for user_id in range(1, 6):
        user_data = timeseries_df[timeseries_df['user_id'] == user_id]
        hr_stats = user_data['heart_rate'].describe()
        
        print(f"\nUsuario {user_id}:")
        print(f"  FC promedio: {hr_stats['mean']:.1f} ± {hr_stats['std']:.1f} lpm")
        print(f"  FC rango: {hr_stats['min']:.0f} - {hr_stats['max']:.0f} lpm")
        print(f"  Valores perdidos: {user_data['heart_rate'].isna().sum()}")
    
    # Guardar datos
    profiles_df.to_csv('physiological_user_profiles.csv', index=False)
    timeseries_df.to_csv('physiological_timeseries_data.csv', index=False)
    print(f"\nDatos guardados exitosamente")


Generando datos sintéticos fisiológicamente precisos...
Simulando usuario 1...
Simulando usuario 2...
Simulando usuario 3...
Simulando usuario 4...
Simulando usuario 5...

=== PERFILES ESTÁTICOS ===
  age_group gender physical_activity_level alcohol_consumption smoking_status hypertension  user_id
middle_aged female               sedentary               light        regular           no        1
      young   male               sedentary            moderate     non_smoker           no        2
     senior female               sedentary                none     non_smoker           no        3
middle_aged female               sedentary               light        regular           no        4
     senior female                    high               light     non_smoker          yes        5

=== VALIDACIÓN FISIOLÓGICA ===

USER_1:
  Duración sueño promedio: 12.3h ✗
  % REM: 9.1% ✗
  % Profundo: 21.0% ✗

USER_2:
  Duración sueño promedio: 15.9h ✗
  % REM: 6.6% ✗
  % Profundo: 21.1% ✗

USER

In [30]:
import numpy as np
import pandas as pd
import random
from datetime import datetime, timedelta
from typing import Dict, Tuple, List
import matplotlib.pyplot as plt
import seaborn as sns

class PhysiologicalSleepHRSimulatorWithDurations:
    def __init__(self):
        # Parámetros de calibración
        self.calibration_params = {
            'population_stats': {
                'smoking_rates_by_age': {
                    'young': 0.15,
                    'middle_aged': 0.20,
                    'senior': 0.10
                },
                'hypertension_by_age': {
                    'young': 0.10,
                    'middle_aged': 0.35,
                    'senior': 0.60
                }
            },
            'validation_targets': {
                'avg_sleep_duration': (7, 9),
                'rem_percentage': (20, 25),
                'deep_sleep_percentage': (15, 20),
                'awakenings_per_night': (2, 5)
            }
        }
        
        # **NUEVO: Duraciones fisiológicas por fase de sueño (en minutos)**
        self.sleep_stage_durations = {
            'awake': {
                'min_duration': 1,
                'max_duration': 480,  # Hasta 8 horas despierto
                'typical_duration': (15, 60),  # Despertares típicos
                'circadian_modifier': {
                    'day': 1.0,
                    'night': 0.3  # Despertares nocturnos más cortos
                }
            },
            'light': {
                'min_duration': 2,
                'max_duration': 45,
                'typical_duration': (5, 20),  # N1 + N2
                'cycle_position_modifier': {
                    'early_night': 1.2,  # Más sueño ligero al inicio
                    'late_night': 0.8
                }
            },
            'deep': {
                'min_duration': 5,
                'max_duration': 60,
                'typical_duration': (10, 40),  # N3 - Sueño de ondas lentas
                'cycle_position_modifier': {
                    'first_cycle': 1.5,    # Más sueño profundo en primer ciclo
                    'later_cycles': 0.6
                },
                'age_modifier': {
                    'young': 1.3,
                    'middle_aged': 1.0,
                    'senior': 0.7  # Menos sueño profundo en mayores
                }
            },
            'rem': {
                'min_duration': 3,
                'max_duration': 45,
                'typical_duration': (8, 25),
                'cycle_position_modifier': {
                    'first_cycle': 0.5,    # REM corto al inicio
                    'later_cycles': 1.4    # REM más largo hacia el amanecer
                },
                'circadian_modifier': {
                    'early_morning': 1.6,  # Más REM en madrugada
                    'evening': 0.8
                }
            }
        }
        
        # Configuración de ciclos de sueño
        self.sleep_cycle_config = {
            'cycle_length': {
                'mean': 90,      # 90 minutos promedio
                'std': 15,       # Variabilidad individual
                'min': 70,       # Mínimo 70 minutos
                'max': 110       # Máximo 110 minutos
            },
            'cycles_per_night': {
                'typical': (4, 6),
                'age_modifier': {
                    'young': 1.1,
                    'middle_aged': 1.0,
                    'senior': 0.9
                }
            }
        }
        
        # Variables estáticas (simplificado para el ejemplo)
        self.static_variables_base = {
            'gender': {
                'states': ['male', 'female'],
                'probabilities': [0.49, 0.51]
            },
            'physical_activity_level': {
                'states': ['sedentary', 'light', 'moderate', 'high'],
                'probabilities': [0.30, 0.35, 0.25, 0.10]
            },
            'age_group': {
                'states': ['young', 'middle_aged', 'senior'],
                'probabilities': [0.30, 0.45, 0.25]
            }
        }
        
        # Configuración de FC
        self.heart_rate_config = {
            'base_ranges': {
                'awake_rest': (60, 80),
                'awake_activity': (80, 120),
                'light_sleep': (50, 70),
                'deep_sleep': (45, 60),
                'rem_sleep': (55, 75)
            }
        }
    
    def generate_age_adjusted_probabilities(self, age_group: str) -> Dict:
        """Genera probabilidades ajustadas por edad"""
        smoking_base_rate = self.calibration_params['population_stats']['smoking_rates_by_age'][age_group]
        smoking_probs = [
            1 - smoking_base_rate,
            smoking_base_rate * 0.4,
            smoking_base_rate * 0.4,
            smoking_base_rate * 0.2
        ]
        
        hypertension_rate = self.calibration_params['population_stats']['hypertension_by_age'][age_group]
        hypertension_probs = [1 - hypertension_rate, hypertension_rate]
        
        return {
            'smoking_status': {
                'states': ['non_smoker', 'occasional', 'regular', 'heavy'],
                'probabilities': smoking_probs
            },
            'hypertension': {
                'states': ['no', 'yes'],
                'probabilities': hypertension_probs
            }
        }
    
    def generate_static_profile(self) -> Dict:
        """Genera perfil estático de un usuario"""
        profile = {}
        
        # Generar edad primero
        profile['age_group'] = np.random.choice(
            self.static_variables_base['age_group']['states'],
            p=self.static_variables_base['age_group']['probabilities']
        )
        
        # Variables independientes
        for var_name in ['gender', 'physical_activity_level']:
            config = self.static_variables_base[var_name]
            profile[var_name] = np.random.choice(config['states'], p=config['probabilities'])
        
        # Variables dependientes de edad
        age_adjusted = self.generate_age_adjusted_probabilities(profile['age_group'])
        for var_name, config in age_adjusted.items():
            profile[var_name] = np.random.choice(config['states'], p=config['probabilities'])
        
        return profile
    
    def calculate_stage_duration(self, stage: str, profile: Dict, hour: int, 
                               cycle_number: int = 1, total_cycles: int = 5) -> int:
        """
        Calcula duración realista para una fase de sueño específica
        Basado en investigación fisiológica
        """
        stage_config = self.sleep_stage_durations[stage]
        
        # Duración base
        min_dur, max_dur = stage_config['typical_duration']
        base_duration = np.random.uniform(min_dur, max_dur)
        
        # Modificadores circadianos
        if 'circadian_modifier' in stage_config:
            if 22 <= hour or hour <= 6:  # Noche
                if 'night' in stage_config['circadian_modifier']:
                    base_duration *= stage_config['circadian_modifier']['night']
                elif 'early_morning' in stage_config['circadian_modifier'] and 4 <= hour <= 6:
                    base_duration *= stage_config['circadian_modifier']['early_morning']
            else:  # Día
                if 'day' in stage_config['circadian_modifier']:
                    base_duration *= stage_config['circadian_modifier']['day']
        
        # Modificadores por posición en ciclo
        if 'cycle_position_modifier' in stage_config:
            if cycle_number == 1 and 'first_cycle' in stage_config['cycle_position_modifier']:
                base_duration *= stage_config['cycle_position_modifier']['first_cycle']
            elif cycle_number > 2 and 'later_cycles' in stage_config['cycle_position_modifier']:
                base_duration *= stage_config['cycle_position_modifier']['later_cycles']
        
        # Modificadores por edad
        if 'age_modifier' in stage_config and profile['age_group'] in stage_config['age_modifier']:
            base_duration *= stage_config['age_modifier'][profile['age_group']]
        
        # Aplicar límites fisiológicos
        final_duration = max(
            stage_config['min_duration'],
            min(stage_config['max_duration'], int(base_duration))
        )
        
        return final_duration
    
    def generate_hr_for_stage(self, stage: str, profile: Dict) -> float:
        """Genera FC apropiada para la fase de sueño"""
        if stage == 'awake':
            base_range = self.heart_rate_config['base_ranges']['awake_rest']
        else:
            base_range = self.heart_rate_config['base_ranges'][f'{stage}_sleep']
        
        # Valor base con distribución normal
        mean_hr = (base_range[0] + base_range[1]) / 2
        std_hr = (base_range[1] - base_range[0]) / 4
        
        hr = np.random.normal(mean_hr, std_hr)
        
        # Modificadores por perfil (simplificado)
        if profile['age_group'] == 'senior':
            hr += 3
        elif profile['age_group'] == 'young':
            hr -= 2
            
        if profile['physical_activity_level'] == 'high':
            hr -= 5
        elif profile['physical_activity_level'] == 'sedentary':
            hr += 3
        
        # Límites fisiológicos
        return max(40, min(120, round(hr)))
    
    def simulate_sleep_architecture(self, profile: Dict, total_hours: int = 168) -> List[Dict]:
        """
        Simula arquitectura de sueño con duraciones realistas
        Basado en ciclos de sueño fisiológicos
        """
        events = []
        current_time = datetime(2024, 1, 1, 8, 0)  # Inicio a las 8 AM
        
        day = 0
        while day < (total_hours // 24):
            # **PERÍODO DIURNO (8 AM - 10 PM)**
            wake_start = current_time.replace(hour=8, minute=0)
            wake_end = current_time.replace(hour=22, minute=0)
            
            # Vigilia diurna con despertares ocasionales
            current_time = wake_start
            while current_time < wake_end:
                # Duración de vigilia (1-4 horas)
                wake_duration = self.calculate_stage_duration('awake', profile, current_time.hour)
                wake_duration = min(wake_duration, 240)  # Máximo 4 horas
                
                # Generar eventos para este período de vigilia
                for minute in range(wake_duration):
                    events.append({
                        'timestamp': current_time + timedelta(minutes=minute),
                        'sleep_state': 'awake',
                        'heart_rate': self.generate_hr_for_stage('awake', profile),
                        'stage_duration_minutes': wake_duration,
                        'cycle_number': 0,  # Vigilia diurna
                        'hour': (current_time + timedelta(minutes=minute)).hour
                    })
                
                current_time += timedelta(minutes=wake_duration)
                
                # Pequeña siesta ocasional (5% probabilidad)
                if np.random.random() < 0.05 and current_time.hour >= 13:
                    nap_duration = np.random.randint(15, 45)  # 15-45 min
                    for minute in range(nap_duration):
                        events.append({
                            'timestamp': current_time + timedelta(minutes=minute),
                            'sleep_state': 'light',
                            'heart_rate': self.generate_hr_for_stage('light', profile),
                            'stage_duration_minutes': nap_duration,
                            'cycle_number': 0,  # Siesta
                            'hour': (current_time + timedelta(minutes=minute)).hour
                        })
                    current_time += timedelta(minutes=nap_duration)
            
            # **PERÍODO NOCTURNO (10 PM - 8 AM)**
            sleep_start = current_time.replace(hour=22, minute=0)
            sleep_end = current_time.replace(hour=8, minute=0) + timedelta(days=1)
            
            current_time = sleep_start
            cycle_number = 1
            
            # Generar ciclos de sueño nocturno
            while current_time < sleep_end and cycle_number <= 6:
                cycle_start_time = current_time
                
                # **FASE 1: Transición a sueño ligero**
                light_duration = self.calculate_stage_duration('light', profile, current_time.hour, cycle_number)
                for minute in range(light_duration):
                    events.append({
                        'timestamp': current_time + timedelta(minutes=minute),
                        'sleep_state': 'light',
                        'heart_rate': self.generate_hr_for_stage('light', profile),
                        'stage_duration_minutes': light_duration,
                        'cycle_number': cycle_number,
                        'hour': (current_time + timedelta(minutes=minute)).hour
                    })
                current_time += timedelta(minutes=light_duration)
                
                # **FASE 2: Sueño profundo (más en primeros ciclos)**
                if cycle_number <= 3:  # Más sueño profundo en primeros ciclos
                    deep_duration = self.calculate_stage_duration('deep', profile, current_time.hour, cycle_number)
                    for minute in range(deep_duration):
                        events.append({
                            'timestamp': current_time + timedelta(minutes=minute),
                            'sleep_state': 'deep',
                            'heart_rate': self.generate_hr_for_stage('deep', profile),
                            'stage_duration_minutes': deep_duration,
                            'cycle_number': cycle_number,
                            'hour': (current_time + timedelta(minutes=minute)).hour
                        })
                    current_time += timedelta(minutes=deep_duration)
                
                # **FASE 3: Vuelta a sueño ligero**
                light_duration_2 = self.calculate_stage_duration('light', profile, current_time.hour, cycle_number) // 2
                for minute in range(light_duration_2):
                    events.append({
                        'timestamp': current_time + timedelta(minutes=minute),
                        'sleep_state': 'light',
                        'heart_rate': self.generate_hr_for_stage('light', profile),
                        'stage_duration_minutes': light_duration_2,
                        'cycle_number': cycle_number,
                        'hour': (current_time + timedelta(minutes=minute)).hour
                    })
                current_time += timedelta(minutes=light_duration_2)
                
                # **FASE 4: REM (más en ciclos tardíos)**
                rem_duration = self.calculate_stage_duration('rem', profile, current_time.hour, cycle_number)
                for minute in range(rem_duration):
                    events.append({
                        'timestamp': current_time + timedelta(minutes=minute),
                        'sleep_state': 'rem',
                        'heart_rate': self.generate_hr_for_stage('rem', profile),
                        'stage_duration_minutes': rem_duration,
                        'cycle_number': cycle_number,
                        'hour': (current_time + timedelta(minutes=minute)).hour
                    })
                current_time += timedelta(minutes=rem_duration)
                
                # **Despertar breve ocasional entre ciclos (30% probabilidad)**
                if np.random.random() < 0.3 and cycle_number < 5:
                    wake_duration = np.random.randint(1, 5)  # 1-5 minutos
                    for minute in range(wake_duration):
                        events.append({
                            'timestamp': current_time + timedelta(minutes=minute),
                            'sleep_state': 'awake',
                            'heart_rate': self.generate_hr_for_stage('awake', profile),
                            'stage_duration_minutes': wake_duration,
                            'cycle_number': cycle_number,
                            'hour': (current_time + timedelta(minutes=minute)).hour
                        })
                    current_time += timedelta(minutes=wake_duration)
                
                cycle_number += 1
                
                # Verificar si hemos completado un ciclo típico (70-110 min)
                cycle_duration = (current_time - cycle_start_time).total_seconds() / 60
                if cycle_duration < 70:
                    # Añadir más sueño ligero para completar ciclo
                    additional_light = 70 - int(cycle_duration)
                    for minute in range(additional_light):
                        events.append({
                            'timestamp': current_time + timedelta(minutes=minute),
                            'sleep_state': 'light',
                            'heart_rate': self.generate_hr_for_stage('light', profile),
                            'stage_duration_minutes': additional_light,
                            'cycle_number': cycle_number - 1,
                            'hour': (current_time + timedelta(minutes=minute)).hour
                        })
                    current_time += timedelta(minutes=additional_light)
            
            # Avanzar al siguiente día
            current_time = current_time.replace(hour=8, minute=0)
            day += 1
        
        return events
    
    def simulate_user_with_durations(self, user_id: int, hours: int = 168) -> Tuple[pd.DataFrame, pd.DataFrame]:
        """Simula un usuario con duraciones realistas por fase"""
        # Generar perfil
        profile = self.generate_static_profile()
        
        # Simular arquitectura de sueño
        events = self.simulate_sleep_architecture(profile, hours)
        
        # Crear DataFrames
        profile_df = pd.DataFrame([profile])
        profile_df['user_id'] = user_id
        
        timeseries_df = pd.DataFrame(events)
        timeseries_df['user_id'] = user_id
        
        return profile_df, timeseries_df
    
    def simulate_multiple_users_with_durations(self, n_users: int = 5, hours: int = 168) -> Tuple[pd.DataFrame, pd.DataFrame]:
        """Simula múltiples usuarios con duraciones por fase"""
        all_profiles = []
        all_timeseries = []
        
        for user_id in range(1, n_users + 1):
            print(f"Simulando usuario {user_id} con duraciones fisiológicas...")
            profile_df, timeseries_df = self.simulate_user_with_durations(user_id, hours)
            all_profiles.append(profile_df)
            all_timeseries.append(timeseries_df)
        
        profiles_combined = pd.concat(all_profiles, ignore_index=True)
        timeseries_combined = pd.concat(all_timeseries, ignore_index=True)
        
        return profiles_combined, timeseries_combined
    
    def analyze_sleep_architecture(self, timeseries_df: pd.DataFrame) -> Dict:
        """Analiza la arquitectura de sueño generada"""
        analysis = {}
        
        for user_id in timeseries_df['user_id'].unique():
            user_data = timeseries_df[timeseries_df['user_id'] == user_id]
            
            # Análisis por día
            daily_analysis = []
            for day in range(7):
                day_data = user_data[
                    (user_data['timestamp'].dt.day == (day + 1))
                ]
                
                if len(day_data) > 0:
                    # Calcular duraciones por fase
                    stage_durations = {}
                    for stage in ['awake', 'light', 'deep', 'rem']:
                        stage_data = day_data[day_data['sleep_state'] == stage]
                        stage_durations[f'{stage}_minutes'] = len(stage_data)
                        stage_durations[f'{stage}_percentage'] = (len(stage_data) / len(day_data)) * 100
                    
                    # Análisis de ciclos nocturnos
                    night_data = day_data[
                        (day_data['hour'] >= 22) | (day_data['hour'] <= 8)
                    ]
                    
                    if len(night_data) > 0:
                        cycles = night_data['cycle_number'].unique()
                        cycles = cycles[cycles > 0]  # Excluir vigilia diurna
                        
                        stage_durations['total_sleep_cycles'] = len(cycles)
                        stage_durations['total_sleep_minutes'] = len(night_data[night_data['sleep_state'] != 'awake'])
                        stage_durations['sleep_efficiency'] = (
                            stage_durations['total_sleep_minutes'] / len(night_data) * 100
                        )
                    
                    daily_analysis.append(stage_durations)
            
            if daily_analysis:
                # Promedios semanales
                avg_analysis = {}
                for key in daily_analysis[0].keys():
                    values = [day.get(key, 0) for day in daily_analysis if key in day]
                    if values:
                        avg_analysis[f'avg_{key}'] = np.mean(values)
                        avg_analysis[f'std_{key}'] = np.std(values)
                
                analysis[f'user_{user_id}'] = avg_analysis
        
        return analysis

# Ejecutar simulación con duraciones
if __name__ == "__main__":
    # Crear simulador con duraciones
    simulator = PhysiologicalSleepHRSimulatorWithDurations()
    
    # Generar datos para 5 usuarios durante 7 días
    print("Generando datos con duraciones realistas por fase de sueño...")
    profiles_df, timeseries_df = simulator.simulate_multiple_users_with_durations(n_users=5, hours=168)
    
    # Mostrar perfiles
    print("\n=== PERFILES ESTÁTICOS ===")
    print(profiles_df.to_string(index=False))
    
    # Analizar arquitectura de sueño
    print("\n=== ANÁLISIS DE ARQUITECTURA DE SUEÑO ===")
    sleep_analysis = simulator.analyze_sleep_architecture(timeseries_df)
    
    for user, metrics in sleep_analysis.items():
        print(f"\n{user.upper()}:")
        print(f"  Sueño total promedio: {metrics.get('avg_total_sleep_minutes', 0):.1f} min/noche")
        print(f"  Eficiencia del sueño: {metrics.get('avg_sleep_efficiency', 0):.1f}%")
        print(f"  Ciclos por noche: {metrics.get('avg_total_sleep_cycles', 0):.1f}")
        print(f"  Sueño ligero: {metrics.get('avg_light_percentage', 0):.1f}%")
        print(f"  Sueño profundo: {metrics.get('avg_deep_percentage', 0):.1f}%")
        print(f"  Sueño REM: {metrics.get('avg_rem_percentage', 0):.1f}%")
    
    # Mostrar ejemplo de duraciones por fase
    print("\n=== EJEMPLO DE DURACIONES POR FASE (Usuario 1, Día 1) ===")
    user1_day1 = timeseries_df[
        (timeseries_df['user_id'] == 1) & 
        (timeseries_df['timestamp'].dt.day == 1) &
        (timeseries_df['cycle_number'] > 0)  # Solo ciclos nocturnos
    ].head(20)
    
    print(user1_day1[['timestamp', 'sleep_state', 'stage_duration_minutes', 'cycle_number', 'heart_rate']].to_string(index=False))
    
    # Guardar datos
    profiles_df.to_csv('sleep_profiles_with_durations.csv', index=False)
    timeseries_df.to_csv('sleep_timeseries_with_durations.csv', index=False)
    print(f"\n✅ Datos con duraciones realistas guardados exitosamente")
    
    # Estadísticas de validación
    print(f"\n=== ESTADÍSTICAS DE VALIDACIÓN ===")
    print(f"Total de registros generados: {len(timeseries_df):,}")
    print(f"Rango de fechas: {timeseries_df['timestamp'].min()} a {timeseries_df['timestamp'].max()}")
    print(f"Distribución de fases de sueño:")
    stage_counts = timeseries_df['sleep_state'].value_counts()
    for stage, count in stage_counts.items():
        percentage = (count / len(timeseries_df)) * 100
        print(f"  {stage}: {count:,} ({percentage:.1f}%)")


Generando datos con duraciones realistas por fase de sueño...
Simulando usuario 1 con duraciones fisiológicas...
Simulando usuario 2 con duraciones fisiológicas...
Simulando usuario 3 con duraciones fisiológicas...
Simulando usuario 4 con duraciones fisiológicas...
Simulando usuario 5 con duraciones fisiológicas...

=== PERFILES ESTÁTICOS ===
  age_group gender physical_activity_level smoking_status hypertension  user_id
      young female               sedentary     occasional           no        1
     senior female                   light     non_smoker          yes        2
middle_aged   male                   light     non_smoker           no        3
middle_aged   male                   light     non_smoker           no        4
      young   male                    high     non_smoker           no        5

=== ANÁLISIS DE ARQUITECTURA DE SUEÑO ===

USER_1:
  Sueño total promedio: 388.1 min/noche
  Eficiencia del sueño: 81.2%
  Ciclos por noche: 5.4
  Sueño ligero: 17.4%
  Sueño