In [1]:
#pip install antropy

In [2]:
import parselmouth
import numpy as np
import librosa
import nolds
import pandas as pd
import joblib
import antropy as ant
from scipy.stats import variation
import os

In [3]:
def extract_features(audio_path):
    # Load audio
    sound = parselmouth.Sound(audio_path)
    y, sr = librosa.load(audio_path)

    # 1. MDVP:Fo(Hz), MDVP:Fhi(Hz), MDVP:Flo(Hz)
    pitch = sound.to_pitch()
    fo = pitch.selected_array['frequency']
    MDVP_Fo = np.mean(fo[fo > 0])  # Mean fundamental frequency
    MDVP_Fhi = np.max(fo)           # Max fundamental frequency
    MDVP_Flo = np.min(fo[fo > 0])   # Min fundamental frequency

    # 2. NHR and HNR
    harmonicity = sound.to_harmonicity()
    HNR = harmonicity.values[harmonicity.values != -200].mean()
    NHR = 1 / HNR if HNR != 0 else 0

    # 3. RPDE (Recurrence Period Density Entropy)
    RPDE = ant.perm_entropy(fo, normalize=True)    # or use a different signal for better accuracy

    # 4. DFA (Detrended Fluctuation Analysis)
    DFA = nolds.dfa(y)

    # 5. Spread2
    spread = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    Spread2 = spread.mean() / (sr / 2)

    # 6. D2 (Correlation Dimension)
    D2 = nolds.corr_dim(fo, emb_dim=25)  # Adjust emb_dim for accuracy, perhaps check paper

    # 7. PPE (Pitch Period Entropy)
    PPE = ant.app_entropy(fo)

    # Consolidate into dictionary
    features = {
        "MDVP:Fo(Hz)": MDVP_Fo,
        "MDVP:Fhi(Hz)": MDVP_Fhi,
        "MDVP:Flo(Hz)": MDVP_Flo,
        "NHR": NHR,
        "HNR": HNR,
        "RPDE": RPDE,
        "DFA": DFA,
        "Spread2": Spread2,
        "D2": D2,
        "PPE": PPE
    }
    
    return features

In [4]:
def process_directory(directory, output_csv):
    # Define CSV headers
    headers = ["MDVP:Fo(Hz)",
        "MDVP:Fhi(Hz)",
        "MDVP:Flo(Hz)",
        "NHR",
        "HNR",
        "RPDE",
        "DFA",
        "Spread2",
        "D2",
        "PPE"]
    
    # Initialize CSV file with headers
    with open(output_csv, mode='w', newline='') as f:
        writer = pd.DataFrame(columns=headers).to_csv(f, index=False)

    # Process each .wav file and append results directly to the CSV
    for filename in os.listdir(directory):
        if filename.endswith(".wav"):
            filepath = os.path.join(directory, filename)
            print(f"Processing {filename}...")
            features = extract_features(filepath)
            if features is not None:
                # Convert features to a single-row DataFrame and append to CSV
                pd.DataFrame([features]).to_csv(output_csv, mode='a', index=False, header=False)
                print(f"Features appended for {filename}.")
            else:
                print(f"Failed to extract features for {filename}.")

    print(f"All features saved to {output_csv}.")

In [5]:
# Specify directory of .wav files and output CSV filename
directory = '../data/raw/PD_AH/PD_AH'
output_csv = 'extracted_featuresPD.csv'
process_directory(directory, output_csv)

Processing AH_545616858-3A749CBC-3FEB-4D35-820E-E45C3E5B9B6A.wav...
Features appended for AH_545616858-3A749CBC-3FEB-4D35-820E-E45C3E5B9B6A.wav.
Processing AH_545622717-461DFFFE-54AF-42AF-BA78-528BD505D624.wav...
Features appended for AH_545622717-461DFFFE-54AF-42AF-BA78-528BD505D624.wav.
Processing AH_545622718-C052AD58-5E6B-4ADC-855C-F76B66BAFA6E.wav...
Features appended for AH_545622718-C052AD58-5E6B-4ADC-855C-F76B66BAFA6E.wav.
Processing AH_545622719-52C23861-6E0D-41E0-A3D8-9358C28C019B.wav...
Features appended for AH_545622719-52C23861-6E0D-41E0-A3D8-9358C28C019B.wav.
Processing AH_545622720-E1486AF6-8C95-47EB-829B-4D62698C987A.wav...
Features appended for AH_545622720-E1486AF6-8C95-47EB-829B-4D62698C987A.wav.
Processing AH_545622722-3C79DA68-36BB-43A2-B29C-61AEF480E07E.wav...
Features appended for AH_545622722-3C79DA68-36BB-43A2-B29C-61AEF480E07E.wav.
Processing AH_545629296-C2C009C6-8C17-42EA-B6BE-362942FC4692.wav...
Features appended for AH_545629296-C2C009C6-8C17-42EA-B6BE-362



Features appended for AH_545648867-CB17D873-1CEA-492A-B5B0-93C7463F516C.wav.
Processing AH_545692309-EA8C4DC0-9B2A-4CC7-A490-851A2129A733.wav...
Features appended for AH_545692309-EA8C4DC0-9B2A-4CC7-A490-851A2129A733.wav.
Processing AH_545692315-C2972597-9AEC-4060-A186-F1F59340640C.wav...




Features appended for AH_545692315-C2972597-9AEC-4060-A186-F1F59340640C.wav.
Processing AH_545713221-1E77C030-4558-4A88-B1A2-6AB777ACAE61.wav...
Features appended for AH_545713221-1E77C030-4558-4A88-B1A2-6AB777ACAE61.wav.
Processing AH_545713222-DA13DC3A-F24B-454E-984F-19DF19328D39.wav...
Features appended for AH_545713222-DA13DC3A-F24B-454E-984F-19DF19328D39.wav.
Processing AH_545713223-E6D59EE5-4C3F-4B40-AE8F-0657EF94DB66.wav...
Features appended for AH_545713223-E6D59EE5-4C3F-4B40-AE8F-0657EF94DB66.wav.
Processing AH_545713224-1B3708B0-8792-4FEE-B03B-C7CB9CB03D58.wav...
Features appended for AH_545713224-1B3708B0-8792-4FEE-B03B-C7CB9CB03D58.wav.
Processing AH_545743929-E2EAE1A3-7E46-4DCF-8DB7-37A5CA47DB9D.wav...
Features appended for AH_545743929-E2EAE1A3-7E46-4DCF-8DB7-37A5CA47DB9D.wav.
Processing AH_545753013-FCFF8F46-08FF-4C87-B443-D2039E5DA945.wav...
Features appended for AH_545753013-FCFF8F46-08FF-4C87-B443-D2039E5DA945.wav.
Processing AH_545753014-C68926CC-AB91-49AF-90A6-BB5C4



Features appended for AH_545753014-C68926CC-AB91-49AF-90A6-BB5C434283DB.wav.
Processing AH_545753015-58CAA743-BA9A-47E0-B9EF-CC35E9EFB839.wav...
Features appended for AH_545753015-58CAA743-BA9A-47E0-B9EF-CC35E9EFB839.wav.
Processing AH_545789668-A4F6069C-5E1A-49F5-9EDC-59C6EB833E42.wav...
Features appended for AH_545789668-A4F6069C-5E1A-49F5-9EDC-59C6EB833E42.wav.
Processing AH_545789670-C297FD53-BF71-4183-86A0-58E5E1EB0DF8.wav...
Features appended for AH_545789670-C297FD53-BF71-4183-86A0-58E5E1EB0DF8.wav.
Processing AH_545789671-794D2256-DDFF-4009-8BA8-8A306C8FA14F.wav...
Features appended for AH_545789671-794D2256-DDFF-4009-8BA8-8A306C8FA14F.wav.
Processing AH_545789674-53885025-35F1-48C1-9826-BAAEB8BEAF58.wav...




Features appended for AH_545789674-53885025-35F1-48C1-9826-BAAEB8BEAF58.wav.
Processing AH_545789675-243F18DB-4432-4C87-B12C-6EEC2D2D30D6.wav...
Features appended for AH_545789675-243F18DB-4432-4C87-B12C-6EEC2D2D30D6.wav.
Processing AH_545789677-D381D801-B073-4945-BE0D-E250126EA6B1.wav...
Features appended for AH_545789677-D381D801-B073-4945-BE0D-E250126EA6B1.wav.
Processing AH_545789680-7FF9D4F1-DDCC-4CB6-8668-76530D670FA5.wav...




Features appended for AH_545789680-7FF9D4F1-DDCC-4CB6-8668-76530D670FA5.wav.
Processing AH_545789682-7554E0C7-4E25-49C3-9E6C-04D525455E28.wav...
Features appended for AH_545789682-7554E0C7-4E25-49C3-9E6C-04D525455E28.wav.
Processing AH_545789690-DA26461A-AF40-4A43-9662-3A93EE872359.wav...
Features appended for AH_545789690-DA26461A-AF40-4A43-9662-3A93EE872359.wav.
Processing AH_545806325-8A17002B-CFD3-4DCF-8854-04F0F2BFF21B.wav...
Features appended for AH_545806325-8A17002B-CFD3-4DCF-8854-04F0F2BFF21B.wav.
Processing AH_545806326-BD0FE665-1AD5-4F55-8342-0FAB8B15680B.wav...
Features appended for AH_545806326-BD0FE665-1AD5-4F55-8342-0FAB8B15680B.wav.
Processing AH_545812844-DFBCDA22-CADB-444A-9623-16A39D45E9E7.wav...
Features appended for AH_545812844-DFBCDA22-CADB-444A-9623-16A39D45E9E7.wav.
Processing AH_545812846-0C14B32A-6C50-4B62-BC89-0A815C2DEEFA.wav...
Features appended for AH_545812846-0C14B32A-6C50-4B62-BC89-0A815C2DEEFA.wav.
Processing AH_545834603-857E007F-1CCF-4249-8160-3A0F3



Features appended for AH_545834603-857E007F-1CCF-4249-8160-3A0F3F5AB58D.wav.
Processing AH_545841221-6FC57E6E-65B6-4859-A15A-55856D7E75C0.wav...
Features appended for AH_545841221-6FC57E6E-65B6-4859-A15A-55856D7E75C0.wav.
Processing AH_545841222-DE5AEF27-7F4E-45A4-BF7D-9E87E7A786AE.wav...
Features appended for AH_545841222-DE5AEF27-7F4E-45A4-BF7D-9E87E7A786AE.wav.
Processing AH_545841223-24FB0419-5BAE-4F9C-8EBC-CD62DA6590D2.wav...
Features appended for AH_545841223-24FB0419-5BAE-4F9C-8EBC-CD62DA6590D2.wav.
Processing AH_545841226-C699FC9E-1E0C-474D-A12A-936DD92B8980.wav...
Features appended for AH_545841226-C699FC9E-1E0C-474D-A12A-936DD92B8980.wav.
Processing AH_545841227-5C77713A-66F1-49D0-BC8A-702C152E668D.wav...
Features appended for AH_545841227-5C77713A-66F1-49D0-BC8A-702C152E668D.wav.
Processing AH_545847410-D1BA3BB4-1F61-44CA-ACDE-455A8E97E04B.wav...
Features appended for AH_545847410-D1BA3BB4-1F61-44CA-ACDE-455A8E97E04B.wav.
Processing AH_545880204-EE87D3E2-0D4C-4EAA-ACD7-C3F17