In [2]:
# ============================================================
# C√âLULA 1 ‚Äî Leitura do arquivo RAW
# ============================================================
import numpy as np
import os

DATA_PATH = "/Users/edmundobrown/Documents/MLGeral/AI-HealthCare/HREstimation/data/raw/mhealth24_data_public.npy"

if not os.path.exists(DATA_PATH):
    raise FileNotFoundError(f"Arquivo n√£o encontrado: {DATA_PATH}")

print(f"üì• Carregando arquivo RAW: {DATA_PATH}")
data = np.load(DATA_PATH, allow_pickle=True).item()

print("‚úÖ Base carregada com sucesso!")
print("üìå Fases detectadas:", list(data.keys()))

üì• Carregando arquivo RAW: /Users/edmundobrown/Documents/MLGeral/AI-HealthCare/HREstimation/data/raw/mhealth24_data_public.npy
‚úÖ Base carregada com sucesso!
üìå Fases detectadas: ['phase 0', 'phase 1', 'phase 2', 'phase 3', 'phase 4', 'phase 5']


In [5]:
# ============================================================
# NOTEBOOK: Assessment ‚Äî Auditoria Estrutural (CORRIGIDO)
# ============================================================

import numpy as np
import pandas as pd
import os

# ------------------------------------------------------------
# CONFIGURA√á√ïES
# ------------------------------------------------------------
FS = 128               # Hz
HR_WINDOW_S = 30.0     # segundos
SAVE_CSV = True
CSV_OUT = "assessment/eda_structural.csv"
DATA_PATH = "/Users/edmundobrown/Documents/MLGeral/AI-HealthCare/HREstimation/data/raw/mhealth24_data_public.npy"

os.makedirs("assessment", exist_ok=True)

# ------------------------------------------------------------
# FUN√á√ïES AUXILIARES
# ------------------------------------------------------------
def find_key(d, contains):
    if isinstance(contains, str):
        contains = [contains]
    for k in d.keys():
        kl = k.lower().strip()
        if all(t in kl for t in contains):
            return k
    return None


def find_all(d, contains):
    if isinstance(contains, str):
        contains = [contains]
    return [k for k in d.keys() if all(t in k.lower().strip() for t in contains)]


def arr_stats(name, arr):
    """Estat√≠sticas robustas com NaN-aware percentis."""
    arr = np.asarray(arr)
    n = arr.size
    arr_f = arr.astype(float)

    quant = np.nanpercentile(arr_f, [0, 1, 25, 50, 75, 99, 100])

    return {
        f"{name}_len": n,
        f"{name}_dtype": str(arr.dtype),
        f"{name}_min": quant[0],
        f"{name}_p01": quant[1],
        f"{name}_p25": quant[2],
        f"{name}_p50": quant[3],
        f"{name}_p75": quant[4],
        f"{name}_p99": quant[5],
        f"{name}_max": quant[6],
        f"{name}_mean": np.nanmean(arr_f),
        f"{name}_std": np.nanstd(arr_f),
        f"{name}_nan_count": np.isnan(arr_f).sum(),
        f"{name}_nan_ratio": np.isnan(arr_f).mean(),
    }


def check_lengths_equal(**arrays):
    lens = {k: (v.size if v is not None else None) for k, v in arrays.items()}
    values = [l for l in lens.values() if l is not None]
    return (len(set(values)) == 1), lens


def imu_energy(x, y, z):
    mag = np.sqrt(x**2 + y**2 + z**2)
    return float(np.nanmean(mag))


# ============================================================
# 1) CARREGAR O ARQUIVO BRUTO (data_raw)
# ============================================================
if not os.path.exists(DATA_PATH):
    raise FileNotFoundError(f"Arquivo n√£o encontrado: {DATA_PATH}")

print(f"üì• Carregando arquivo RAW: {DATA_PATH}")
data_raw = np.load(DATA_PATH, allow_pickle=True).item()
print("‚úÖ Base carregada com sucesso!")
print("üìå Fases detectadas:", list(data_raw.keys()))


# ============================================================
# 2) AUDITORIA ESTRUTURAL (com base SOMENTE no data_raw)
# ============================================================
print("\n===================== AUDITORIA ESTRUTURAL =====================")

rows = []

for phase_name, d in data_raw.items():

    print("\n" + "="*70)
    print(f"üîç ANALISANDO {phase_name.upper()}")
    print("Chaves encontradas:", list(d.keys()))

    # Detectar chaves de sensores
    ppg_key = find_key(d, "ppg")
    imu_keys = sorted(find_all(d, "imu"))
    hr_key = find_key(d, ["ground", "hr"])

    ppg = d.get(ppg_key)
    imu_x = d.get(imu_keys[0]) if len(imu_keys) >= 1 else None
    imu_y = d.get(imu_keys[1]) if len(imu_keys) >= 2 else None
    imu_z = d.get(imu_keys[2]) if len(imu_keys) >= 3 else None
    hr = d.get(hr_key)

    ok_len, lens = check_lengths_equal(ppg=ppg, imu_x=imu_x, imu_y=imu_y, imu_z=imu_z)

    duration_sec = ppg.size / FS
    expected_hr_windows = int(round(duration_sec / HR_WINDOW_S))
    n_hr = hr.size if isinstance(hr, np.ndarray) else 0
    hr_ok = abs(n_hr - expected_hr_windows) <= 1

    stats = {
        "phase": phase_name,
        "ppg_key": ppg_key,
        "imu_keys": ", ".join(imu_keys),
        "hr_key": hr_key,
        "duration_sec": duration_sec,
        "expected_hr_windows": expected_hr_windows,
        "has_hr": hr is not None,
        "n_hr": n_hr,
        "hr_windows_match": hr_ok,
        "len_consistent": ok_len,
        "len_detail": str(lens),
    }

    # Estat√≠sticas PPG
    if ppg is not None:
        stats.update(arr_stats("ppg", ppg))

    # Estat√≠sticas IMU
    if imu_x is not None:
        stats.update(arr_stats("acc_x", imu_x))
        stats.update(arr_stats("acc_y", imu_y))
        stats.update(arr_stats("acc_z", imu_z))
        stats["acc_energy"] = imu_energy(imu_x, imu_y, imu_z)

    # Estat√≠sticas HR
    if hr is not None:
        stats.update(arr_stats("hr", hr))

    # Windowing integrity
    window_size = int(FS * HR_WINDOW_S)
    samples = ppg.size
    stats["window_size"] = window_size
    stats["expected_windows_signal"] = samples // window_size
    stats["remainder_signal"] = samples % window_size
    stats["windowing_ok"] = (samples % window_size == 0)

    rows.append(stats)

    print(f"‚Ä¢ Dura√ß√£o: {duration_sec:.1f}s | HR esperada: {expected_hr_windows}")
    print(f"‚Ä¢ HR presente? {hr is not None} | HR len: {n_hr} | Confere? {hr_ok}")
    print(f"‚Ä¢ Tamanhos consistentes PPG/IMU? {ok_len} ‚Üí {lens}")
    print(f"‚Ä¢ Windowing: {stats['expected_windows_signal']} janelas | Resto={stats['remainder_signal']}")


# ============================================================
# 3) RESULTADO FINAL DA AUDITORIA
# ============================================================
df = pd.DataFrame(rows).sort_values("phase").reset_index(drop=True)

print("\n" + "="*70)
print("üìä RESUMO FINAL DA AUDITORIA")
display(df)

if SAVE_CSV:
    df.to_csv(CSV_OUT, index=False)
    print(f"üíæ Arquivo salvo em: {CSV_OUT}")


# ============================================================
# 4) INSPE√á√ÉO REAL DO PPG DO ARQUIVO BRUTO
# ============================================================
print("\n===============================================")
print("üìå INSPE√á√ÉO REAL DO PPG DO ARQUIVO BRUTO")
print("===============================================\n")

for phase, d in data_raw.items():

    ppg_keys = [k for k in d.keys() if "ppg" in k.lower()]

    if not ppg_keys:
        print(f"{phase}: ‚ùå Nenhum PPG encontrado")
        continue

    for key in ppg_keys:
        ppg = np.asarray(d[key])
        print(f"\nüìç {phase} ‚Äî chave: '{key}'")
        print(f"  ‚Ä¢ shape: {ppg.shape}")
        print(f"  ‚Ä¢ dtype: {ppg.dtype}")
        print(f"  ‚Ä¢ min: {ppg.min()}")
        print(f"  ‚Ä¢ max: {ppg.max()}")
        print(f"  ‚Ä¢ mean: {ppg.mean()}")
        print(f"  ‚Ä¢ std: {ppg.std()}")
        print(f"  ‚Ä¢ flat? {ppg.min() == ppg.max()}")
        print(f"  ‚Üí primeiros 10 valores: {ppg[:10]}")
        print(f"  ‚Üí √∫ltimos 10 valores:   {ppg[-10:]}")

üì• Carregando arquivo RAW: /Users/edmundobrown/Documents/MLGeral/AI-HealthCare/HREstimation/data/raw/mhealth24_data_public.npy
‚úÖ Base carregada com sucesso!
üìå Fases detectadas: ['phase 0', 'phase 1', 'phase 2', 'phase 3', 'phase 4', 'phase 5']


üîç ANALISANDO PHASE 0
Chaves encontradas: ['ground truth HR', 'PPG wrist', 'IMU X wrist', 'IMU Y wrist', 'IMU Z wrist']
‚Ä¢ Dura√ß√£o: 11880.0s | HR esperada: 396
‚Ä¢ HR presente? True | HR len: 396 | Confere? True
‚Ä¢ Tamanhos consistentes PPG/IMU? True ‚Üí {'ppg': 1520640, 'imu_x': 1520640, 'imu_y': 1520640, 'imu_z': 1520640}
‚Ä¢ Windowing: 396 janelas | Resto=0

üîç ANALISANDO PHASE 1
Chaves encontradas: ['PPG head', 'IMU X head', 'IMU Y head', 'IMU Z head']
‚Ä¢ Dura√ß√£o: 11880.0s | HR esperada: 396
‚Ä¢ HR presente? False | HR len: 0 | Confere? False
‚Ä¢ Tamanhos consistentes PPG/IMU? True ‚Üí {'ppg': 1520640, 'imu_x': 1520640, 'imu_y': 1520640, 'imu_z': 1520640}
‚Ä¢ Windowing: 396 janelas | Resto=0

üîç ANALISANDO PHASE 2
Chaves

  mag = np.sqrt(x**2 + y**2 + z**2)


‚Ä¢ Dura√ß√£o: 11880.0s | HR esperada: 396
‚Ä¢ HR presente? True | HR len: 396 | Confere? True
‚Ä¢ Tamanhos consistentes PPG/IMU? True ‚Üí {'ppg': 1520640, 'imu_x': 1520640, 'imu_y': 1520640, 'imu_z': 1520640}
‚Ä¢ Windowing: 396 janelas | Resto=0

üîç ANALISANDO PHASE 3
Chaves encontradas: ['PPG wrist', 'IMU X wrist', 'IMU Y wrist', 'IMU Z wrist']
‚Ä¢ Dura√ß√£o: 11880.0s | HR esperada: 396
‚Ä¢ HR presente? False | HR len: 0 | Confere? False
‚Ä¢ Tamanhos consistentes PPG/IMU? True ‚Üí {'ppg': 1520640, 'imu_x': 1520640, 'imu_y': 1520640, 'imu_z': 1520640}
‚Ä¢ Windowing: 396 janelas | Resto=0

üîç ANALISANDO PHASE 4
Chaves encontradas: ['ground truth HR', 'PPG head', 'IMU X head', 'IMU Y head', 'IMU Z head']
‚Ä¢ Dura√ß√£o: 1710.0s | HR esperada: 57
‚Ä¢ HR presente? True | HR len: 57 | Confere? True
‚Ä¢ Tamanhos consistentes PPG/IMU? True ‚Üí {'ppg': 218880, 'imu_x': 218880, 'imu_y': 218880, 'imu_z': 218880}
‚Ä¢ Windowing: 57 janelas | Resto=0

üîç ANALISANDO PHASE 5
Chaves encontradas:

Unnamed: 0,phase,ppg_key,imu_keys,hr_key,duration_sec,expected_hr_windows,has_hr,n_hr,hr_windows_match,len_consistent,...,hr_p99,hr_max,hr_mean,hr_std,hr_nan_count,hr_nan_ratio,window_size,expected_windows_signal,remainder_signal,windowing_ok
0,phase 0,PPG wrist,"IMU X wrist, IMU Y wrist, IMU Z wrist",ground truth HR,11880.0,396,True,396,True,True,...,109.091103,122.190923,76.711321,9.36306,0.0,0.0,3840,396,0,True
1,phase 1,PPG head,"IMU X head, IMU Y head, IMU Z head",,11880.0,396,False,0,False,True,...,,,,,,,3840,396,0,True
2,phase 2,PPG head,"IMU X head, IMU Y head, IMU Z head",ground truth HR,11880.0,396,True,396,True,True,...,117.017878,126.740717,83.69928,9.684084,0.0,0.0,3840,396,0,True
3,phase 3,PPG wrist,"IMU X wrist, IMU Y wrist, IMU Z wrist",,11880.0,396,False,0,False,True,...,,,,,,,3840,396,0,True
4,phase 4,PPG head,"IMU X head, IMU Y head, IMU Z head",ground truth HR,1710.0,57,True,57,True,True,...,173.986973,174.26821,153.45101,12.132161,0.0,0.0,3840,57,0,True
5,phase 5,PPG head,"IMU X head, IMU Y head, IMU Z head",,1710.0,57,False,0,False,True,...,,,,,,,3840,57,0,True


üíæ Arquivo salvo em: assessment/eda_structural.csv

üìå INSPE√á√ÉO REAL DO PPG DO ARQUIVO BRUTO


üìç phase 0 ‚Äî chave: 'PPG wrist'
  ‚Ä¢ shape: (1520640,)
  ‚Ä¢ dtype: int32
  ‚Ä¢ min: 68896
  ‚Ä¢ max: 425393
  ‚Ä¢ mean: 186564.55591724536
  ‚Ä¢ std: 29521.75674657334
  ‚Ä¢ flat? False
  ‚Üí primeiros 10 valores: [78769 78824 78734 78746 78783 78699 78829 78725 78803 78731]
  ‚Üí √∫ltimos 10 valores:   [201629 201621 201542 201463 201780 201814 201610 201529 201433 201529]

üìç phase 1 ‚Äî chave: 'PPG head'
  ‚Ä¢ shape: (1520640,)
  ‚Ä¢ dtype: int32
  ‚Ä¢ min: 164229
  ‚Ä¢ max: 411056
  ‚Ä¢ mean: 248826.41387310607
  ‚Ä¢ std: 18644.1891688982
  ‚Ä¢ flat? False
  ‚Üí primeiros 10 valores: [248148 248599 248404 248548 248845 248504 248846 248600 248640 248619]
  ‚Üí √∫ltimos 10 valores:   [214144 214370 214377 214571 214587 214850 214549 214514 214665 214902]

üìç phase 2 ‚Äî chave: 'PPG head'
  ‚Ä¢ shape: (1520640,)
  ‚Ä¢ dtype: int32
  ‚Ä¢ min: 144466
  ‚Ä¢ max: 339767
  ‚Ä¢ me

In [4]:
import numpy as np

data_raw = np.load(DATA_PATH, allow_pickle=True).item()

print("Fases detectadas:", list(data_raw.keys()))
print("\n==================== PPG ‚Äî INSPE√á√ÉO COMPLETA ====================\n")

for phase, d in data_raw.items():
    # localizar qualquer chave contendo "ppg"
    ppg_keys = [k for k in d.keys() if "ppg" in k.lower()]
    
    if not ppg_keys:
        print(f"{phase}: ‚ùå Nenhum PPG encontrado")
        continue
    
    for key in ppg_keys:
        ppg = np.asarray(d[key])
        print(f"\nüìç {phase} ‚Äî chave: '{key}'")
        print(f"  ‚Ä¢ shape: {ppg.shape}")
        print(f"  ‚Ä¢ dtype: {ppg.dtype}")

        # estat√≠sticas principais
        print(f"  ‚Ä¢ min: {ppg.min()}")
        print(f"  ‚Ä¢ max: {ppg.max()}")
        print(f"  ‚Ä¢ mean: {ppg.mean()}")
        print(f"  ‚Ä¢ std: {ppg.std()}")
        
        # checar ‚Äúflatness‚Äù
        is_flat = (ppg.min() == ppg.max())
        print(f"  ‚Ä¢ flat (min == max)? {is_flat}")

        # mostrar conte√∫do
        print("  ‚Ä¢ primeiros 10 valores:", ppg[:10])
        print("  ‚Ä¢ √∫ltimos 10 valores:  ", ppg[-10:])

Fases detectadas: ['phase 0', 'phase 1', 'phase 2', 'phase 3', 'phase 4', 'phase 5']



üìç phase 0 ‚Äî chave: 'PPG wrist'
  ‚Ä¢ shape: (1520640,)
  ‚Ä¢ dtype: int32
  ‚Ä¢ min: 68896
  ‚Ä¢ max: 425393
  ‚Ä¢ mean: 186564.55591724536
  ‚Ä¢ std: 29521.75674657334
  ‚Ä¢ flat (min == max)? False
  ‚Ä¢ primeiros 10 valores: [78769 78824 78734 78746 78783 78699 78829 78725 78803 78731]
  ‚Ä¢ √∫ltimos 10 valores:   [201629 201621 201542 201463 201780 201814 201610 201529 201433 201529]

üìç phase 1 ‚Äî chave: 'PPG head'
  ‚Ä¢ shape: (1520640,)
  ‚Ä¢ dtype: int32
  ‚Ä¢ min: 164229
  ‚Ä¢ max: 411056
  ‚Ä¢ mean: 248826.41387310607
  ‚Ä¢ std: 18644.1891688982
  ‚Ä¢ flat (min == max)? False
  ‚Ä¢ primeiros 10 valores: [248148 248599 248404 248548 248845 248504 248846 248600 248640 248619]
  ‚Ä¢ √∫ltimos 10 valores:   [214144 214370 214377 214571 214587 214850 214549 214514 214665 214902]

üìç phase 2 ‚Äî chave: 'PPG head'
  ‚Ä¢ shape: (1520640,)
  ‚Ä¢ dtype: int32
  ‚Ä¢ min: 144466
  ‚Ä¢ max: 33