In [1]:
#pip install antropy

In [2]:
import parselmouth
import numpy as np
import librosa
import nolds
import pandas as pd
import joblib
import antropy as ant
from scipy.stats import variation
import os

In [3]:
def extract_features(audio_path):
    # Load audio
    sound = parselmouth.Sound(audio_path)
    y, sr = librosa.load(audio_path)

    # 1. MDVP:Fo(Hz), MDVP:Fhi(Hz), MDVP:Flo(Hz)
    pitch = sound.to_pitch()
    fo = pitch.selected_array['frequency']
    MDVP_Fo = np.mean(fo[fo > 0])  # Mean fundamental frequency
    MDVP_Fhi = np.max(fo)           # Max fundamental frequency
    MDVP_Flo = np.min(fo[fo > 0])   # Min fundamental frequency

    # 2. NHR and HNR
    harmonicity = sound.to_harmonicity()
    HNR = harmonicity.values[harmonicity.values != -200].mean()
    NHR = 1 / HNR if HNR != 0 else 0

    # 3. RPDE (Recurrence Period Density Entropy)
    RPDE = ant.perm_entropy(fo, normalize=True)    # or use a different signal for better accuracy

    # 4. DFA (Detrended Fluctuation Analysis)
    DFA = nolds.dfa(y)

    # 5. Spread2
    spread = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    Spread2 = spread.mean() / (sr / 2)

    # 6. D2 (Correlation Dimension)
    D2 = nolds.corr_dim(fo, emb_dim=25)  # Adjust emb_dim for accuracy, perhaps check paper

    # 7. PPE (Pitch Period Entropy)
    PPE = ant.app_entropy(fo)

    # Consolidate into dictionary
    features = {
        "MDVP:Fo(Hz)": MDVP_Fo,
        "MDVP:Fhi(Hz)": MDVP_Fhi,
        "MDVP:Flo(Hz)": MDVP_Flo,
        "NHR": NHR,
        "HNR": HNR,
        "RPDE": RPDE,
        "DFA": DFA,
        "Spread2": Spread2,
        "D2": D2,
        "PPE": PPE
    }
    
    return features

In [4]:
def process_directory(directory, output_csv):
    # Define CSV headers
    headers = ["MDVP:Fo(Hz)",
        "MDVP:Fhi(Hz)",
        "MDVP:Flo(Hz)",
        "NHR",
        "HNR",
        "RPDE",
        "DFA",
        "Spread2",
        "D2",
        "PPE"]
    
    # Initialize CSV file with headers
    with open(output_csv, mode='w', newline='') as f:
        writer = pd.DataFrame(columns=headers).to_csv(f, index=False)

    # Process each .wav file and append results directly to the CSV
    for filename in os.listdir(directory):
        if filename.endswith(".wav"):
            filepath = os.path.join(directory, filename)
            print(f"Processing {filename}...")
            features = extract_features(filepath)
            if features is not None:
                # Convert features to a single-row DataFrame and append to CSV
                pd.DataFrame([features]).to_csv(output_csv, mode='a', index=False, header=False)
                print(f"Features appended for {filename}.")
            else:
                print(f"Failed to extract features for {filename}.")

    print(f"All features saved to {output_csv}.")

In [5]:
# Specify directory of .wav files and output CSV filename
directory = '../data/raw/HC_AH/HC_AH'
output_csv = 'extracted_features.csv'
process_directory(directory, output_csv)

Processing AH_064F_7AB034C9-72E4-438B-A9B3-AD7FDA1596C5.wav...
Features appended for AH_064F_7AB034C9-72E4-438B-A9B3-AD7FDA1596C5.wav.
Processing AH_114S_A89F3548-0B61-4770-B800-2E26AB3908B6.wav...




Features appended for AH_114S_A89F3548-0B61-4770-B800-2E26AB3908B6.wav.
Processing AH_121A_BD5BA248-E807-4CB9-8B53-47E7FFE5F8E2.wav...
Features appended for AH_121A_BD5BA248-E807-4CB9-8B53-47E7FFE5F8E2.wav.
Processing AH_123G_559F0706-2238-447C-BA39-DB5933BA619D.wav...
Features appended for AH_123G_559F0706-2238-447C-BA39-DB5933BA619D.wav.
Processing AH_195B_39DA6A45-F4CC-492A-80D4-FB79049ACC22.wav...
Features appended for AH_195B_39DA6A45-F4CC-492A-80D4-FB79049ACC22.wav.
Processing AH_197T_7552379A-2310-46E1-9466-9D8045C990B8.wav...
Features appended for AH_197T_7552379A-2310-46E1-9466-9D8045C990B8.wav.
Processing AH_222K_FC9D2763-1836-460B-954F-37F23D6CD81D.wav...




Features appended for AH_222K_FC9D2763-1836-460B-954F-37F23D6CD81D.wav.
Processing AH_264Z_593C20CD-0A54-4177-B031-26EE147080A3.wav...




Features appended for AH_264Z_593C20CD-0A54-4177-B031-26EE147080A3.wav.
Processing AH_292J_201CB911-31C1-4CD0-BD73-4FBA4A16C21F.wav...
Features appended for AH_292J_201CB911-31C1-4CD0-BD73-4FBA4A16C21F.wav.
Processing AH_322A_C3BF5535-A11E-498E-94EB-BE7E74099FFB.wav...




Features appended for AH_322A_C3BF5535-A11E-498E-94EB-BE7E74099FFB.wav.
Processing AH_325A_3EB21DC7-C340-4D0E-AC9E-0EABF217BBEE.wav...
Features appended for AH_325A_3EB21DC7-C340-4D0E-AC9E-0EABF217BBEE.wav.
Processing AH_325J_7F5F27AA-5A93-43CF-AB17-FC53940BF4B0.wav...
Features appended for AH_325J_7F5F27AA-5A93-43CF-AB17-FC53940BF4B0.wav.
Processing AH_333L_6C551A6E-CC47-410E-AA49-2DC0A86E6489.wav...
Features appended for AH_333L_6C551A6E-CC47-410E-AA49-2DC0A86E6489.wav.
Processing AH_378G_3C2A05CE-36E4-4956-8FC2-0494B27D3EA8.wav...
Features appended for AH_378G_3C2A05CE-36E4-4956-8FC2-0494B27D3EA8.wav.
Processing AH_420J_07C96C2C-6E96-4A2F-BEC9-5CB71DB309B6.wav...
Features appended for AH_420J_07C96C2C-6E96-4A2F-BEC9-5CB71DB309B6.wav.
Processing AH_444B_E1586F09-1BF5-408D-A55E-96D9E8B76A43.wav...
Features appended for AH_444B_E1586F09-1BF5-408D-A55E-96D9E8B76A43.wav.
Processing AH_456K_CBF60DD0-82AA-430E-A5E9-E1D3AE175CCB.wav...
Features appended for AH_456K_CBF60DD0-82AA-430E-A5E9-E



Features appended for AH_511K_DDC6D065-56B3-436B-9D08-73326C791B69.wav.
Processing AH_523T_66147C3C-938A-4CF9-913E-5D49D72BD8B6.wav...
Features appended for AH_523T_66147C3C-938A-4CF9-913E-5D49D72BD8B6.wav.
Processing AH_528T_6A746E6E-FB60-4363-842F-A7368A1E5B2C.wav...
Features appended for AH_528T_6A746E6E-FB60-4363-842F-A7368A1E5B2C.wav.
Processing AH_538M_AE709CB7-1123-47F8-8BD2-000158BDBC01.wav...
Features appended for AH_538M_AE709CB7-1123-47F8-8BD2-000158BDBC01.wav.
Processing AH_562E_151814F5-BB0F-44EF-9A22-FE2862FC3411.wav...
Features appended for AH_562E_151814F5-BB0F-44EF-9A22-FE2862FC3411.wav.
Processing AH_569E_B26CCA1E-29AD-48DD-9947-48DB8A56CA31.wav...
Features appended for AH_569E_B26CCA1E-29AD-48DD-9947-48DB8A56CA31.wav.
Processing AH_596S_BBE9779F-C440-42D3-9C96-4CD6121D1F7E.wav...
Features appended for AH_596S_BBE9779F-C440-42D3-9C96-4CD6121D1F7E.wav.
Processing AH_619B_5CF9C4CA-31AA-4F22-8E57-8E53618CC224.wav...




Features appended for AH_619B_5CF9C4CA-31AA-4F22-8E57-8E53618CC224.wav.
Processing AH_621N_204CF3E2-1DA0-4908-A47F-78997B1BAFC2.wav...
Features appended for AH_621N_204CF3E2-1DA0-4908-A47F-78997B1BAFC2.wav.
Processing AH_667J_605FB4D5-E0DB-4B9B-8F58-784561C51693.wav...
Features appended for AH_667J_605FB4D5-E0DB-4B9B-8F58-784561C51693.wav.
Processing AH_678A_2E7AFA48-34C1-4DAD-A73C-95F7ABF6B138.wav...
Features appended for AH_678A_2E7AFA48-34C1-4DAD-A73C-95F7ABF6B138.wav.
Processing AH_743R_66BD23F9-D685-4315-86F8-7697B5084F7B.wav...
Features appended for AH_743R_66BD23F9-D685-4315-86F8-7697B5084F7B.wav.
Processing AH_753G_073DCC32-4397-4719-A019-DDD41F30F5F1.wav...
Features appended for AH_753G_073DCC32-4397-4719-A019-DDD41F30F5F1.wav.
Processing AH_777G_4C8ACC89-7FE2-4174-AE3A-B21B39A0C869.wav...
Features appended for AH_777G_4C8ACC89-7FE2-4174-AE3A-B21B39A0C869.wav.
Processing AH_777R_A36CF7FA-37FD-483E-98FE-040942B1DF49.wav...
Features appended for AH_777R_A36CF7FA-37FD-483E-98FE-0



Features appended for AH_789Y_20CB672C-5F66-425E-8707-BE5B7FF807E2.wav.
Processing AH_803T_66094C40-AE64-4AD3-AA97-B052C69DA3EF.wav...
Features appended for AH_803T_66094C40-AE64-4AD3-AA97-B052C69DA3EF.wav.
Processing AH_821C_8F9D5EF0-18B2-4967-B36D-82E014792BC3.wav...
Features appended for AH_821C_8F9D5EF0-18B2-4967-B36D-82E014792BC3.wav.
Processing AH_888A_7F1444B0-B12C-4B55-AF2A-463395DCAF3C.wav...
Features appended for AH_888A_7F1444B0-B12C-4B55-AF2A-463395DCAF3C.wav.
Processing AH_904H_85B22FC1-BA09-4A17-A374-B00B2445CD27.wav...
Features appended for AH_904H_85B22FC1-BA09-4A17-A374-B00B2445CD27.wav.
Processing AH_942A_3F7867F3-1AE2-4BE6-B5EC-AC3157D310CF.wav...
Features appended for AH_942A_3F7867F3-1AE2-4BE6-B5EC-AC3157D310CF.wav.
All features saved to extracted_features.csv.


