In [1]:
import os
import io
import sys
import zipfile
import tempfile
import subprocess

import librosa
import parselmouth

import math
import numpy as np
import scipy.signal
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

from typing import List, Dict, Tuple, Callable, Any

# Entendimento das bibliotecas

### Tentativa 1

In [2]:
# PRAAT
def extract_features_praat(audio_path):
    y, sr = librosa.load(audio_path, sr=None)
    
    f0 = librosa.yin(y, fmin=50, fmax=800, sr=sr)
    f0_voiced = f0[~np.isnan(f0)]
    
    f0_voiced_filtered = f0_voiced[(f0_voiced > 50) & (f0_voiced < 500)]
    
    if len(f0_voiced_filtered) == 0:
        f0_mean = f0_max = f0_min = f0_std = np.nan
    else:
        f0_mean = np.mean(f0_voiced_filtered)
        f0_max  = np.max(f0_voiced_filtered)
        f0_min  = np.min(f0_voiced_filtered)
        f0_std  = np.std(f0_voiced_filtered)

    snd = parselmouth.Sound(audio_path)
    try:
        pointProcess = parselmouth.praat.call(snd, "To PointProcess (periodic, cc)", 75, 500)
        jitter_local = parselmouth.praat.call(pointProcess, "Get jitter (local)", 0, 0, 1.3)
        shimmer_local = parselmouth.praat.call([snd, pointProcess], "Get shimmer (local)", 0, 0, 1.3, 1.6)
        hnr = parselmouth.praat.call(snd, "To Harmonicity (cc)", 0.01, 75, 0.1, 1.0)
        hnr_value = parselmouth.praat.call(hnr, "Get mean", 0, 0)
        nhr = 10 ** (-hnr_value / 10) if hnr_value > 0 else 0
    except:
        jitter_local = shimmer_local = hnr_value = nhr = np.nan

    features = [
        f0_mean,
        f0_max,
        f0_min,
        f0_std,
        jitter_local,
        shimmer_local,
        hnr_value,
        nhr
    ]
    return np.array(features, dtype=np.float64)

# LIBROSA
def extract_features_librosa(audio_path):
    y, sr = librosa.load(audio_path, sr=None)
    
    f0 = librosa.yin(y, fmin=50, fmax=500, sr=sr)
    f0_voiced = f0[~np.isnan(f0)]
    
    if len(f0_voiced) == 0:
        f0_mean = f0_max = f0_min = f0_std = np.nan
    else:
        f0_voiced_filtered = f0_voiced[(f0_voiced > 50) & (f0_voiced < 500)]
        f0_mean = np.mean(f0_voiced_filtered)
        f0_max  = np.max(f0_voiced_filtered)
        f0_min  = np.min(f0_voiced_filtered)
        f0_std  = np.std(f0_voiced_filtered)
    
    if len(f0_voiced_filtered) > 1:
        periods = 1 / f0_voiced_filtered
        jitter_local = np.mean(np.abs(np.diff(periods))) / np.mean(periods) * 100
    else:
        jitter_local = np.nan
    
    shimmer_vals = []
    for i, f in enumerate(f0_voiced_filtered[:-1]):
        start = int(i * sr / f)
        end   = int((i+1) * sr / f)
        if end > len(y):
            break
        cycle_amp = np.max(y[start:end]) - np.min(y[start:end])
        shimmer_vals.append(cycle_amp)
    shimmer_vals = np.array(shimmer_vals)
    if len(shimmer_vals) > 1:
        shimmer_local = np.mean(np.abs(np.diff(shimmer_vals))) / np.mean(shimmer_vals) * 100
    else:
        shimmer_local = np.nan
    
    frame_len = int(0.05 * sr)
    hop_len   = int(0.025 * sr)
    autocorr_energy = []
    total_energy = []
    for i in range(0, len(y)-frame_len, hop_len):
        frame = y[i:i+frame_len]
        total_energy.append(np.sum(frame**2))
        autocorr = np.correlate(frame, frame, mode='full')
        mid = len(autocorr)//2
        harmonic_energy = np.max(autocorr[mid:])
        autocorr_energy.append(harmonic_energy)
    total_energy = np.sum(total_energy)
    harmonic_energy = np.sum(autocorr_energy)
    if total_energy > 0:
        hnr_db = 10 * np.log10(harmonic_energy / (total_energy - harmonic_energy + 1e-8))
        nhr = 10 ** (-hnr_db / 10)
    else:
        hnr_db = np.nan
        nhr = np.nan

    return np.array([
        f0_mean, f0_max, f0_min, f0_std,
        jitter_local, shimmer_local, hnr_db, nhr
    ], dtype=np.float64)

### Teste 1:

In [37]:
file_path = r'C:\Users\joaov_zm1q2wh\python\icassp_challenge\joao\data\ID023_phonationA.wav'

features_names = [
    "Fao(Hz)",
    "Fhi(Hz)",
    "Flo(Hz)",
    "sigma Fo(Hz)",
    "Jitter(%)",
    "Shimmer(%)",
    "HNR(dB)",
    "NHR"
]

try:
    features_librosa = extract_features_librosa(file_path)
    features_praat   = extract_features_praat(file_path)

    print(f"{'Feature':<15} {'Librosa':>12} {'Praat':>12}")
    print("-" * 40)

    for name, val_lib, val_praat in zip(features_names, features_librosa, features_praat):
        str_lib = f"{val_lib:.5f}" if not np.isnan(val_lib) else "N/A"
        str_praat = f"{val_praat:.5f}" if not np.isnan(val_praat) else "N/A"
        print(f"{name:<15} {str_lib:>12} {str_praat:>12}")

except Exception as e:
    print(f"Erro: {e}")

Feature              Librosa        Praat
----------------------------------------
Fao(Hz)            181.41720    181.41720
Fhi(Hz)            243.76916    243.76916
Flo(Hz)             63.44466     63.44466
sigma Fo(Hz)        33.77668     33.77668
Jitter(%)            4.09915          N/A
Shimmer(%)          54.73909          N/A
HNR(dB)            113.74284          N/A
NHR                  0.00000          N/A


Peguei alguns valores prÃ³ximos na base do problema de Parkinson que achei Ãºtil para servir como base:

| Type           | Value
| -------------- | ---------
| Fo(Hz)         | 119.992
| Fhi(Hz)        | 157.302
| Flo(Hz)        | 74.997
| Jitter(%)      | 0.00784
| Jitter:DDP     | 0.01109
| Shimmer(%)     | 0.04374
| Shimmer(dB)    | 0.426
| NHR            | 0.02211
| HNR            | 21.033

JÃ¡ deu para perceber que metade nem rodou e a outra metade estÃ¡ sÃ³ a desgraÃ§a.

"Ah JoÃ£o, tenha paciÃªncia." PaciÃªncia o que rapaz, te orienta cabra safado, isso aqui Ã© sÃ³ a peste, castigo divino.

Depois de tanto ler a API do Praat e a documentaÃ§Ã£o da librosa, percebi que o problema com os cÃ¡lculos originais residia no seguinte: 
    
| Feature | Problema Original | SoluÃ§Ã£o Aplicada (Praat) |
| :--- | :--- | :--- |
| **F0 (Praat)** | Acessar valores de Pitch incorretamente. | Uso do mÃ©todo `pitch.selected_array['frequency']` apÃ³s `snd.to_pitch()` para extrair os valores de F0. |
| **Jitter (Praat)** | Chamada incompleta da funÃ§Ã£o `parselmouth.praat.call` para `Get jitter (local)`. | Uso da chamada completa com os 5 argumentos esperados pelo Praat: `0, 0, 0.0001, 0.02, 1.3`.|
| **Shimmer (Praat)** | Chamada incompleta da funÃ§Ã£o `parselmouth.praat.call` para `Get shimmer (local)`. | Uso da chamada completa com os 6 argumentos esperados pelo Praat: `0, 0, 0.0001, 0.02, 1.3, 1.6`.|
| **HNR (Praat)** | Chamada incorreta da funÃ§Ã£o `to_harmonicity()`. | Uso da chamada `parselmouth.praat.call(snd, "To Harmonicity (cc)", 0.01, 75.0, 0.1, 1.0)` com os 4 argumentos esperados.|
| **F0 (Librosa)** | InconsistÃªncia nos limites de `fmax` (800 vs 500 Hz). | Ajuste para `fmax=600` para consistÃªncia com o Praat e manutenÃ§Ã£o da filtragem original (50-500 Hz).|
| **Jitter/Shimmer/HNR (Librosa)** | ImplementaÃ§Ãµes manuais de Shimmer e HNR sÃ£o aproximaÃ§Ãµes grosseiras e incorretas para as definiÃ§Ãµes de Praat. | Mantidas as implementaÃ§Ãµes originais para fins de comparaÃ§Ã£o, mas com a ressalva de que **nÃ£o fornecem a exatidÃ£o** desejada. O Praat Ã© o padrÃ£o de referÃªncia para estas features.

### Tentativa 2

In [3]:
### PRAAT
def extract_features_praat_corrected(audio_path):
    snd = parselmouth.Sound(audio_path)
    
    try:
        pitch = snd.to_pitch(pitch_floor=75.0, pitch_ceiling=600.0)
    except Exception as e:
        print(f"Erro ao calcular Pitch (to_pitch): {e}", file=sys.stderr)
        return np.full(8, np.nan, dtype=np.float64)

    f0_values = np.array(pitch.selected_array['frequency'], dtype=np.float64)
    f0_values = f0_values[f0_values > 0]
    f0_values_filtered = f0_values[(f0_values > 50) & (f0_values < 500)]
    
    if len(f0_values_filtered) == 0:
        f0_mean = f0_max = f0_min = f0_std = np.nan
    else:
        f0_mean = np.mean(f0_values_filtered)
        f0_max  = np.max(f0_values_filtered)
        f0_min  = np.min(f0_values_filtered)
        f0_std  = np.std(f0_values_filtered)

    try:
        pointProcess = parselmouth.praat.call(snd, "To PointProcess (periodic, cc)", 75.0, 600.0)
        jitter_local = parselmouth.praat.call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3)

        shimmer_local_percent = parselmouth.praat.call([snd, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
        
        hnr = parselmouth.praat.call(snd, "To Harmonicity (cc)", 0.01, 75.0, 0.1, 1.0)
        hnr_value = parselmouth.praat.call(hnr, "Get mean", 0, 0)
        
        nhr = 10 ** (-hnr_value / 10) if hnr_value > 0 else np.nan

    except Exception as e:
        print(f"Erro na extraÃ§Ã£o Praat (Jitter/Shimmer/HNR): {e}", file=sys.stderr)
        jitter_local = shimmer_local_percent = hnr_value = nhr = np.nan
        
    features = [
        f0_mean,
        f0_max,
        f0_min,
        f0_std,
        jitter_local,
        shimmer_local_percent,
        hnr_value,
        nhr
    ]
    return np.array(features, dtype=np.float64)

# LIBROSA
def extract_features_librosa_corrected(audio_path):
    y, sr = librosa.load(audio_path, sr=None)
    
    f0 = librosa.yin(y, fmin=75, fmax=600, sr=sr)
    f0_voiced = f0[~np.isnan(f0)]
    f0_voiced_filtered = f0_voiced[(f0_voiced > 50) & (f0_voiced < 500)]
    
    if len(f0_voiced_filtered) == 0:
        f0_mean = f0_max = f0_min = f0_std = np.nan
    else:
        f0_mean = np.mean(f0_voiced_filtered)
        f0_max  = np.max(f0_voiced_filtered)
        f0_min  = np.min(f0_voiced_filtered)
        f0_std  = np.std(f0_voiced_filtered)
    
    if len(f0_voiced_filtered) > 1:
        periods = 1 / f0_voiced_filtered
        jitter_local = np.mean(np.abs(np.diff(periods))) / np.mean(periods) * 100
    else:
        jitter_local = np.nan
    
    shimmer_local = np.nan
    try:
        if len(f0_voiced_filtered) > 1:
            shimmer_vals = []
            for i, f in enumerate(f0_voiced_filtered[:-1]):
                start = int(i * sr / f)
                end   = int((i+1) * sr / f)
                if end > len(y):
                    break
                cycle_amp = np.max(y[start:end]) - np.min(y[start:end])
                shimmer_vals.append(cycle_amp)
            
            shimmer_vals = np.array(shimmer_vals)
            if len(shimmer_vals) > 1:
                shimmer_local = np.mean(np.abs(np.diff(shimmer_vals))) / np.mean(shimmer_vals) * 100
            else:
                shimmer_local = np.nan
        else:
            shimmer_local = np.nan
    except Exception as e:
        shimmer_local = np.nan
    
    hnr_db = np.nan
    nhr = np.nan
    try:
        frame_len = int(0.05 * sr)
        hop_len   = int(0.025 * sr)
        autocorr_energy = []
        total_energy = []
        for i in range(0, len(y)-frame_len, hop_len):
            frame = y[i:i+frame_len]
            total_energy.append(np.sum(frame**2))
            autocorr = np.correlate(frame, frame, mode='full')
            mid = len(autocorr)//2
            harmonic_energy = np.max(autocorr[mid:])
            autocorr_energy.append(harmonic_energy)
            
        total_energy_sum = np.sum(total_energy)
        harmonic_energy_sum = np.sum(autocorr_energy)
        
        if total_energy_sum > 0 and harmonic_energy_sum < total_energy_sum:
            hnr_db = 10 * np.log10(harmonic_energy_sum / (total_energy_sum - harmonic_energy_sum + 1e-8))
            nhr = 10 ** (-hnr_db / 10)
        else:
            hnr_db = np.nan
            nhr = np.nan
            
    except Exception as e:
        hnr_db = np.nan
        nhr = np.nan

    features = [
        f0_mean, f0_max, f0_min, f0_std,
        jitter_local, shimmer_local, hnr_db, nhr
    ]
    return np.array(features, dtype=np.float64)

### TESTE 2

In [49]:
audio_path = r'C:\Users\joaov_zm1q2wh\python\icassp_challenge\joao\data\ID023_phonationA.wav'

features_names = [
    "Fao(Hz)",
    "Fhi(Hz)",
    "Flo(Hz)",
    "sigma Fo(Hz)",
    "Jitter(%)",
    "Shimmer(%)",
    "HNR(dB)",
    "NHR"
]

try:
    features_librosa_corr = extract_features_librosa_corrected(audio_path)
    features_praat_corr   = extract_features_praat_corrected(audio_path)

    print(f"{'Feature':<15} {'Librosa':>18} {'Praat':>15}")
    print("-" * 50)

    for name, val_lib, val_praat in zip(features_names, features_librosa_corr, features_praat_corr):
        str_lib = f"{val_lib:.5f}" if not np.isnan(val_lib) else "N/A"
        str_praat = f"{val_praat:.5f}" if not np.isnan(val_praat) else "N/A"
        print(f"{name:<15} {str_lib:>18} {str_praat:>15}")

except Exception as e:
    print(f"Erro na execuÃ§Ã£o do teste corrigido: {e}", file=sys.stderr)

Feature                    Librosa           Praat
--------------------------------------------------
Fao(Hz)                  182.14816       192.03097
Fhi(Hz)                  243.76916       217.10861
Flo(Hz)                   79.62720       149.49022
sigma Fo(Hz)              32.59501         4.79336
Jitter(%)                  2.09470         0.00368
Shimmer(%)                28.54700         0.02479
HNR(dB)                        N/A        22.78959
NHR                            N/A         0.00526


A comparaÃ§Ã£o demonstra que a funÃ§Ã£o corrigida do Praat fornece valores que PARECEM ser coerentes para todas as features, enquanto a implementaÃ§Ã£o do Librosa falha em calcular `HNR` e fornece valores de `Jitter` e `Shimmer` significativamente diferentes, confirmando a necessidade de avaliar melhor o dados que esperamos de fato. Segue novamente os valores usados como base:

| Type           | Value
| -------------- | ---------
| Fo(Hz)         | 119.992
| Fhi(Hz)        | 157.302
| Flo(Hz)        | 74.997
| Jitter(%)      | 0.00784
| Jitter:DDP     | 0.01109
| Shimmer(%)     | 0.04374
| Shimmer(dB)    | 0.426
| NHR            | 0.02211
| HNR            | 21.033

### ExtraÃ§Ã£o

Baseado na avaliaÃ§Ã£o anterior, vou seguir com a funÃ§Ã£o do Praat e ver o que conseguimos...

In [None]:
zip_path = r"C:\Users\joaov_zm1q2wh\python\icassp_challenge\joao\data\SAND_Challenge_task1_dataset.zip"
target_folder = "task1/training/rhythmTA"
metadata_path = "task1/sand_task_1.xlsx"

features_names = [
    "F0_mean_Hz", 
    "F0_max_Hz", 
    "F0_min_Hz", 
    "F0_std_Hz",
    "Jitter_percent", 
    "Shimmer_percent", 
    "HNR_dB", 
    "NHR"
]

data = []

with zipfile.ZipFile(zip_path, 'r') as zipf:
    with zipf.open(metadata_path) as meta_file:
        metadata_df = pd.read_excel(meta_file)
    
    for file in zipf.namelist():
        if file.startswith(target_folder) and file.endswith(".wav"):
            try:
                with zipf.open(file) as audio_file:
                    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
                        tmp.write(audio_file.read())
                        tmp_path = tmp.name

                features = extract_features_praat_corrected(tmp_path)

                os.remove(tmp_path)

                filename = os.path.basename(file)
                sample_id = filename.split("_")[0]

                meta_row = metadata_df.loc[metadata_df["ID"] == sample_id]
                if not meta_row.empty:
                    age  = int(meta_row["Age"].values[0])
                    sex  = meta_row["Sex"].values[0]
                    clas = int(meta_row["Class"].values[0])
                else:
                    age = sex = clas = np.nan

                row = [sample_id, age, sex, clas] + list(features)
                data.append(row)

            except Exception as e:
                print(f"Erro no arquivo {file}: {e}", file=sys.stderr)


columns = ["ID", "Age", "Sex", "Class"] + features_names
df = pd.DataFrame(data, columns=columns)

output_csv = r"C:\Users\joaov_zm1q2wh\python\icassp_challenge\joao\data\features_praat_dataset.csv"
df.to_csv(output_csv, index=False, encoding="utf-8-sig")

# O arquivos precisa ter 272 linhas, sem contar com o cabeÃ§alho Ã© claro
print(f"\nâœ… ExtraÃ§Ã£o concluÃ­da! CSV salvo em:\n{output_csv}")
print(df.head())


âœ… ExtraÃ§Ã£o concluÃ­da! CSV salvo em:
C:\Users\joaov_zm1q2wh\python\icassp_challenge\joao\data\features_praat_dataset.csv
      ID  Age Sex  Class  F0_mean_Hz   F0_max_Hz  F0_min_Hz  F0_std_Hz  \
0  ID302   76   F      5  203.650991  257.013782  91.565732  11.491251   
1  ID275   41   F      5  158.636831  223.278832  76.377658  24.308702   
2  ID227   81   F      3  174.898673  317.496039  74.809947  63.765307   
3  ID092   65   M      3  113.833209  333.168834  81.697761  17.878683   
4  ID038   69   F      4   91.013141  127.638661  74.771950  13.778429   

   Jitter_percent  Shimmer_percent     HNR_dB       NHR  
0        0.008911         0.062493  15.608192  0.027490  
1        0.008113         0.086885  12.516582  0.056020  
2        0.023304         0.093255  10.397849  0.091246  
3        0.024890         0.098045   9.616296  0.109237  
4        0.075140         0.220215   0.203258  0.954277  


# Uso oficial

In [23]:
ZIP_PATH = r'C:\Users\joaov_zm1q2wh\python\icassp_challenge\data\SAND_Challenge_task1_test_dataset.zip'
METADATA_PATH = r'C:\Users\joaov_zm1q2wh\python\icassp_challenge\data\task1\sand_task1_test.xlsx'
OUTPUT_DIR = r'C:\Users\joaov_zm1q2wh\python\icassp_challenge\data'
BASE_FOLDER = 'task1/test/'

TARGET_FOLDERS = [
    "phonationA", "phonationE", "phonationI", "phonationO", "phonationU",
    "rhythmKA", "rhythmPA", "rhythmTA"
]

FEATURES_BASE_NAMES = [
    "Fo_mean_Hz", "Fhi_max_Hz", "Flo_min_Hz", "F0_std_Hz",
    "Jitter_percent", "Jitter_Abs", "RAP", "PPQ", "DDP",
    "Shimmer_local", "Shimmer_dB", "Shimmer_APQ3", "Shimmer_APQ5", "Shimmer_APQ11", "Shimmer_DDA",
    "NHR", "HNR",
    "RPDE", "DFA", "spread1", "spread2", "D2", "PPE"
]

METADATA_COLUMNS = ['ID', 'Age', 'Sex', 'Class']

# ==============================================================================
# FUNÃ‡Ã•ES DE EXTRAÃ‡ÃƒO
# ==============================================================================

def create_sound_from_path(audio_path: str, normalize_signal_minus_one_to_one: bool = False) -> parselmouth.Sound:
    """
    Cria um objeto parselmouth.Sound.
    Se 'normalize_signal_minus_one_to_one' for True, normaliza o sinal para [-1, 1] antes da criaÃ§Ã£o.
    """
    if not normalize_signal_minus_one_to_one:
        return parselmouth.Sound(audio_path)
    else:
        y, sr = librosa.load(audio_path, sr=None)

        abs_max = np.max(np.abs(y))

        if abs_max == 0:
            y_norm = np.zeros_like(y)
        else:
            y_norm = y / abs_max
        
        snd = parselmouth.Sound(y_norm, sampling_frequency=sr)
        return snd

def extract_features_base(audio_path: str, normalize_signal: bool = False) -> np.ndarray:
    """
    Extrai 23 features de voz usando Parselmouth.
    A flag `normalize_signal` define se o sinal de Ã¡udio deve ser normalizado para [-1, 1]
    antes da extraÃ§Ã£o.
    """
    try:
        snd = create_sound_from_path(audio_path, normalize_signal)
    except Exception as e:
        print(f"Erro ao criar Sound em {audio_path}: {e}", file=sys.stderr)
        return np.array([np.nan] * len(FEATURES_BASE_NAMES), dtype=np.float64)


    Fo = Fhi = Flo = F0_std = np.nan
    jitter_percent = jitter_abs = rap = ppq = ddp = np.nan
    shimmer_local = shimmer_db = apq3 = apq5 = apq11 = dda = np.nan
    HNR = NHR = np.nan
    rpde = dfa = spread1 = spread2 = d2 = ppe = np.nan
    
    try:
        pitch = snd.to_pitch(pitch_floor=75.0, pitch_ceiling=600.0)
        f0_values = np.array(pitch.selected_array['frequency'], dtype=np.float64)
        f0_values = f0_values[f0_values > 0]

        if len(f0_values) > 0:
            Fo = np.mean(f0_values)
            Fhi = np.max(f0_values)
            Flo = np.min(f0_values)
            F0_std = np.std(f0_values)
    except Exception:
        pass

    try:
        pointProcess = parselmouth.praat.call(snd, "To PointProcess (periodic, cc)", 75.0, 600.0)

        jitter_percent = parselmouth.praat.call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3) * 100
        jitter_abs = parselmouth.praat.call(pointProcess, "Get jitter (local, absolute)", 0, 0, 0.0001, 0.02, 1.3)
        rap = parselmouth.praat.call(pointProcess, "Get jitter (rap)", 0, 0, 0.0001, 0.02, 1.3)
        ppq = parselmouth.praat.call(pointProcess, "Get jitter (ppq5)", 0, 0, 0.0001, 0.02, 1.3)
        ddp = parselmouth.praat.call(pointProcess, "Get jitter (ddp)", 0, 0, 0.0001, 0.02, 1.3)

        shimmer_local = parselmouth.praat.call([snd, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
        shimmer_db = parselmouth.praat.call([snd, pointProcess], "Get shimmer (local_dB)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
        apq3 = parselmouth.praat.call([snd, pointProcess], "Get shimmer (apq3)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
        apq5 = parselmouth.praat.call([snd, pointProcess], "Get shimmer (apq5)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
        apq11 = parselmouth.praat.call([snd, pointProcess], "Get shimmer (apq11)", 0, 0, 0.0001, 0.02, 1.3, 1.6) 
        dda = parselmouth.praat.call([snd, pointProcess], "Get shimmer (dda)", 0, 0, 0.0001, 0.02, 1.3, 1.6)

        harmonicity = parselmouth.praat.call(snd, "To Harmonicity (cc)", 0.01, 75.0, 0.1, 1.0)
        HNR = parselmouth.praat.call(harmonicity, "Get mean", 0, 0)
        NHR = 10 ** (-HNR / 10) if HNR > 0 else np.nan

    except Exception:
        pass

    try:
        y, sr = librosa.load(audio_path, sr=None) 
        if normalize_signal:
            abs_max = np.max(np.abs(y))
            if abs_max != 0:
                y = y / abs_max
            else:
                y = np.zeros_like(y)
        
        rpde = dfa = d2 = np.nan 

        S = np.abs(librosa.stft(y))
        centroid = librosa.feature.spectral_centroid(S=S)[0]
        bandwidth = librosa.feature.spectral_bandwidth(S=S)[0]
        spread1 = np.mean(centroid)
        spread2 = np.mean(bandwidth)

        if 'f0_values' in locals() and len(f0_values) > 0:
            pitch_periods = 1.0 / f0_values
            prob, _ = np.histogram(pitch_periods, bins='auto', density=True)
            prob = prob[prob > 0]
            ppe = -np.sum(prob * np.log2(prob)) if len(prob) > 0 else np.nan
        else:
            ppe = np.nan

    except Exception:
        pass

    features = [
        Fo, Fhi, Flo, F0_std,
        jitter_percent, jitter_abs, rap, ppq, ddp,
        shimmer_local, shimmer_db, apq3, apq5, apq11, dda,
        NHR, HNR,
        rpde, dfa, spread1, spread2, d2, ppe
    ]

    return np.array(features, dtype=np.float64)

def extract_features_original_signal(audio_path: str) -> np.ndarray:
    return extract_features_base(audio_path, normalize_signal=False)

def extract_features_signal_normalized(audio_path: str) -> np.ndarray:
    return extract_features_base(audio_path, normalize_signal=True)

def normalize_features(df: pd.DataFrame, method: str, feature_cols: List[str]) -> pd.DataFrame:
    """
    Normaliza as colunas de features do DataFrame.
    method: 'z_score' ou 'min_max_0_1'.
    """
    df_normalized = df.copy()

    for column in feature_cols:
        x = df_normalized[column]
        x_min = x.min()
        x_max = x.max()
        x_mean = x.mean()
        x_std = x.std()

        if (method == 'z_score' and x_std == 0) or (method == 'min_max_0_1' and x_max == x_min):
            df_normalized[column] = np.where(pd.notna(x), 0.0, np.nan)
            continue
        
        if method == 'z_score':
            df_normalized[column] = (x - x_mean) / x_std
        
        elif method == 'min_max_0_1':
            df_normalized[column] = (x - x_min) / (x_max - x_min)
    
    return df_normalized

# ==============================================================================
# PIPELINE (COM CORREÃ‡Ã•ES)
# ==============================================================================

def run_pipeline(
    target_folders: List[str],
    extract_func: Callable[[str], np.ndarray],
    feature_norm_method: str,
    output_suffix: str
) -> pd.DataFrame:
    """
    Executa o loop de extraÃ§Ã£o, agrega os dados e aplica a normalizaÃ§Ã£o de features.
    """
    data_by_id: Dict[str, Dict[str, Any]] = {}
    
    all_features_names = []
    for folder in target_folders:
        suffix = folder
        prefixed_names = [f"{name}_{suffix}" for name in FEATURES_BASE_NAMES]
        all_features_names.extend(prefixed_names)

    final_columns = METADATA_COLUMNS + all_features_names
    
    try:
        metadata_df = pd.read_excel(METADATA_PATH)
        
        # ðŸ’¡ CORREÃ‡ÃƒO 1: Limpa e converte o ID do metadados para int, extraindo apenas os dÃ­gitos.
        # Isso corrige o erro 'invalid literal for int() with base 10: 'ID004''
        metadata_df["ID"] = metadata_df["ID"].astype(str).str.extract(r'(\d+)', expand=False).astype(int)

        with zipfile.ZipFile(ZIP_PATH, 'r') as zipf:
            for folder in target_folders:
                current_target_folder = BASE_FOLDER + folder
                suffix = folder 
                
                print(f" -> Processando pasta: {current_target_folder}")
                
                matches = [f for f in zipf.namelist() if f.startswith(current_target_folder) and f.endswith('.wav')]
                print(f" Â  [DEBUG] {len(matches)} arquivos encontrados.")
                if matches:
                    print(f" Â  Exemplo: {matches[:3]}")

                for file in matches:
                    filename = os.path.basename(file)
                    sample_id = filename.split("_")[0]

                    try:
                        # ðŸ’¡ CORREÃ‡ÃƒO 2: SimplificaÃ§Ã£o/Robustez na extraÃ§Ã£o do ID do filename.
                        # Extrai apenas os dÃ­gitos para garantir que a conversÃ£o para int seja bem-sucedida.
                        sample_id_clean = ''.join([c for c in sample_id if c.isdigit()])
                        
                        if not sample_id_clean:
                            print(f" Â  [AVISO] ID invÃ¡lido em {filename}")
                            continue
                        sample_id_int = int(sample_id_clean)

                        # LÃª o arquivo de Ã¡udio temporariamente
                        with zipf.open(file) as audio_file:
                            with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp:
                                tmp.write(audio_file.read())
                                tmp_path = tmp.name

                        features = extract_func(tmp_path)
                        os.remove(tmp_path)

                        # Busca metadados
                        if sample_id_int not in data_by_id:
                            meta_row = metadata_df.loc[metadata_df["ID"] == sample_id_int]
                            if not meta_row.empty:
                                age = int(meta_row["Age"].values[0])
                                sex = str(meta_row["Sex"].values[0])
                                # O valor de 'Class' pode ser NaN (Float) se estiver vazio, entÃ£o usamos .loc e .item()
                                class_value = meta_row.loc[meta_row.index[0], "Class"]
                                clas = int(class_value) if pd.notna(class_value) else np.nan
                                data_by_id[sample_id_int] = {'ID': sample_id_int, 'Age': age, 'Sex': sex, 'Class': clas}
                            else:
                                # Se o ID nÃ£o estÃ¡ no metadado, ele Ã© ignorado (ou continuado)
                                # Para o dataset de teste, onde a classe pode estar vazia, o ID deve ser vÃ¡lido.
                                print(f" Â  [AVISO] ID {sample_id_int} nÃ£o encontrado no metadata.")
                                continue

                        # Associa as features Ã  amostra
                        prefixed_feature_names = [f"{name}_{suffix}" for name in FEATURES_BASE_NAMES]
                        if len(features) != len(prefixed_feature_names):
                            raise ValueError(f"Contagem de features incorreta para {file}. Esperado {len(prefixed_feature_names)}, Recebido {len(features)}.")

                        for name, value in zip(prefixed_feature_names, features):
                            data_by_id[sample_id_int][name] = value

                    except Exception as e:
                        # Este bloco trata erros durante o processamento de um arquivo especÃ­fico.
                        print(f"[ERRO] Falha ao processar {file}: {e}", file=sys.stderr)
                        # Garante que as colunas de features existam para o ID, mesmo com NaN, se o ID foi encontrado.
                        if sample_id_int in data_by_id:
                             prefixed_feature_names = [f"{name}_{suffix}" for name in FEATURES_BASE_NAMES]
                             for name in prefixed_feature_names:
                                 data_by_id[sample_id_int].setdefault(name, np.nan)
                    
    except zipfile.BadZipFile:
        print(f"\nERRO: O arquivo ZIP nÃ£o foi encontrado ou estÃ¡ corrompido em {ZIP_PATH}", file=sys.stderr)
        return pd.DataFrame(columns=final_columns)
    except Exception as e:
        # Erros mais sÃ©rios, como falha de IO ou o erro original no carregamento do metadado.
        print(f"\nERRO FATAL DURANTE O PROCESSAMENTO: {e}", file=sys.stderr)
        return pd.DataFrame(columns=final_columns)

    # ConstrÃ³i DataFrame final
    df = pd.DataFrame(list(data_by_id.values()))
    df = df.reindex(columns=final_columns)
    
    print(f"\nâœ… ExtraÃ§Ã£o concluÃ­da. Total de amostras: {len(df)}")

    # NormalizaÃ§Ã£o
    if feature_norm_method:
        df_normalized = normalize_features(df, feature_norm_method, all_features_names)
    else:
        df_normalized = df.copy()
        
    # Define nome do arquivo de saÃ­da
    if 'signal_norm' in output_suffix:
        output_filename = f"features_signal_norm_-1_1_{output_suffix.replace('signal_norm_', '')}.csv"
    else:
        output_filename = f"features_{output_suffix}.csv"
        
    output_filepath = os.path.join(OUTPUT_DIR, output_filename)
    df_normalized.to_csv(output_filepath, index=False, encoding="utf-8-sig")
    print(f"ðŸ’¾ Resultado salvo em: {output_filepath}\n")

    return df_normalized

# ==============================================================================
# EXECUÃ‡ÃƒO DA PIPELINE
# ==============================================================================

if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)
    print(f"DiretÃ³rio de saÃ­da criado: {OUTPUT_DIR}")

print("\nFeatures Brutas")
run_pipeline(
    target_folders=TARGET_FOLDERS,
    extract_func=extract_features_original_signal,
    feature_norm_method=None,
    output_suffix='raw'
)

print("\nOriginal Signal -> Z-Score Normalization (MÃ©dia/Desvio PadrÃ£o)")
run_pipeline(
    target_folders=TARGET_FOLDERS,
    extract_func=extract_features_original_signal,
    feature_norm_method='z_score',
    output_suffix='z_score'
)

print("\nOriginal Signal -> Min-Max [0, 1] Normalization")
run_pipeline(
    target_folders=TARGET_FOLDERS,
    extract_func=extract_features_original_signal,
    feature_norm_method='min_max_0_1',
    output_suffix='min_max_0_1'
)

print("\nSignal Normalized [-1, 1] -> Z-Score Normalization (MÃ©dia/Desvio PadrÃ£o)")
run_pipeline(
    target_folders=TARGET_FOLDERS,
    extract_func=extract_features_signal_normalized,
    feature_norm_method='z_score',
    output_suffix='signal_norm_z_score'
)

print("\nSignal Normalized [-1, 1] -> Min-Max [0, 1] Normalization")
run_pipeline(
    target_folders=TARGET_FOLDERS,
    extract_func=extract_features_signal_normalized,
    feature_norm_method='min_max_0_1',
    output_suffix='signal_norm_min_max_0_1'
)


Features Brutas
 -> Processando pasta: task1/test/phonationA
 Â  [DEBUG] 67 arquivos encontrados.
 Â  Exemplo: ['task1/test/phonationA/ID161_phonationA.wav', 'task1/test/phonationA/ID062_phonationA.wav', 'task1/test/phonationA/ID334_phonationA.wav']
 -> Processando pasta: task1/test/phonationE
 Â  [DEBUG] 67 arquivos encontrados.
 Â  Exemplo: ['task1/test/phonationE/ID089_phonationE.wav', 'task1/test/phonationE/ID236_phonationE.wav', 'task1/test/phonationE/ID071_phonationE.wav']
 -> Processando pasta: task1/test/phonationI
 Â  [DEBUG] 67 arquivos encontrados.
 Â  Exemplo: ['task1/test/phonationI/ID309_phonationI.wav', 'task1/test/phonationI/ID125_phonationI.wav', 'task1/test/phonationI/ID045_phonationI.wav']
 -> Processando pasta: task1/test/phonationO
 Â  [DEBUG] 67 arquivos encontrados.
 Â  Exemplo: ['task1/test/phonationO/ID307_phonationO.wav', 'task1/test/phonationO/ID211_phonationO.wav', 'task1/test/phonationO/ID044_phonationO.wav']
 -> Processando pasta: task1/test/phonationU
 Â

Unnamed: 0,ID,Age,Sex,Class,Fo_mean_Hz_phonationA,Fhi_max_Hz_phonationA,Flo_min_Hz_phonationA,F0_std_Hz_phonationA,Jitter_percent_phonationA,Jitter_Abs_phonationA,...,Shimmer_APQ11_rhythmTA,Shimmer_DDA_rhythmTA,NHR_rhythmTA,HNR_rhythmTA,RPDE_rhythmTA,DFA_rhythmTA,spread1_rhythmTA,spread2_rhythmTA,D2_rhythmTA,PPE_rhythmTA
0,161,69,M,,0.339580,0.119616,0.180196,0.014765,0.039905,0.041691,...,0.316861,0.125251,0.129854,0.576453,,,0.595530,0.863219,,0.780583
1,62,73,F,,0.511371,0.200818,0.050891,0.118295,0.084787,0.062018,...,0.015248,0.000000,0.000000,1.000000,,,0.576569,0.616716,,0.701009
2,334,57,M,,0.288534,0.104377,0.209982,0.014593,0.569477,0.477239,...,0.094755,0.136614,0.145613,0.548902,,,0.456904,0.364233,,0.759052
3,321,70,M,,0.355199,0.165500,0.048612,0.051382,0.436573,0.342646,...,0.380078,0.431434,0.145692,0.548770,,,0.383323,0.506717,,0.958155
4,196,75,M,,0.242119,0.338939,0.022112,0.091063,0.158671,0.150291,...,0.286091,0.301065,0.098167,0.640095,,,0.305071,0.427129,,0.948020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62,32,59,F,,0.716109,0.268739,0.012970,0.150981,0.071919,0.042384,...,0.005824,0.087087,0.018645,0.894236,,,0.380704,0.426162,,0.907730
63,43,48,M,,0.141766,0.056195,0.129043,0.012681,0.113736,0.129121,...,0.059316,0.225356,0.101367,0.633072,,,0.275423,0.616047,,0.827616
64,285,66,M,,0.005246,0.129047,0.007842,0.079333,0.692558,0.843799,...,0.355410,0.410839,0.595581,0.158594,,,0.424756,0.729585,,0.971019
65,298,60,F,,0.785957,0.404597,0.340861,0.135450,0.183314,0.098602,...,0.241234,0.413126,0.051108,0.765967,,,0.405564,0.237607,,0.930883
