In [40]:
import importlib
import extraction_functions_praat
import functions
importlib.reload(extraction_functions_praat)
importlib.reload(functions)

<module 'functions' from '/home/dene/rp2/functions.py'>

In [41]:
import os
import pandas as pd
import parselmouth
from parselmouth.praat import call
from extraction_functions_praat import PP_f0_mean_murton, PP_f0_sd_murton, PP_jitter, PP_f0_mean, PP_f0_sd
from functions import load_best_segment

In [42]:
f0_min = 60
f0_max = 300
jitter_type = 'all' ## choose from 'local' 'abs' 'rap' 'ppq5' 'ddp' 'all'

In [43]:
audio_files_dir = 'selected_audio_files/'
extracted_acoustic_feature_storage_path = 'extracted_acoustic_features/'
if not os.path.exists(extracted_acoustic_feature_storage_path):
    os.makedirs(extracted_acoustic_feature_storage_path)

AVAILABLE_FEATURES = [
    'PP_F0', 'PP_F0_SD', 
    'PP_F0_M', 'PP_F0_SD_M', 
    'PP_DUR', 
    'PP_JIT',
] ## add more

selected_features_dict = {
    'PP_F0': True, 'PP_F0_SD': True, 
    'PP_F0_M': True, 'PP_F0_SD_M': True, 
    'PP_DUR': False,
    'PP_JIT': True,
} ## enable the ones you want

if all(selected_features_dict.values()):
    selected_features_abrv = "all"
else:
    selected_features_abrv = "-".join([feature for feature, enabled in selected_features_dict.items() if enabled])

In [44]:
def extract_features_vowels(audio_file, selected_features):

    extracted_features = {}
    
    if 'PP_F0' in selected_features:
        extracted_features['PP_F0'] = PP_f0_mean(audio_file, f0_min=f0_min, f0_max=f0_max)
    if 'PP_F0_SD' in selected_features:
        extracted_features['PP_F0_SD'] = PP_f0_sd(audio_file, f0_min=f0_min, f0_max=f0_max)
    if 'PP_F0_M' in selected_features:
        extracted_features['PP_F0_M'] = PP_f0_mean_murton(audio_file, f0_min=f0_min, f0_max=f0_max)
    if 'PP_F0_SD_M' in selected_features:
        extracted_features['PP_F0_SD_M'] = PP_f0_sd_murton(audio_file, f0_min=f0_min, f0_max=f0_max)
    if 'PP_JIT' in selected_features:
        jitter_values = PP_jitter(audio_file, f0_min=f0_min, f0_max=f0_max, type=jitter_type)
        if jitter_type == 'all':
            extracted_features['PP_JIT_local'] = jitter_values[0]
            extracted_features['PP_JIT_abs'] = jitter_values[1]
            extracted_features['PP_JIT_rap'] = jitter_values[2]
            extracted_features['PP_JIT_ppq5'] = jitter_values[3]
            extracted_features['PP_JIT_ddp'] = jitter_values[4]
        else:
            extracted_features[f'PP_JIT_{jitter_type}'] = jitter_values
            
    return extracted_features


def extract_features_mpt(audio_file, selected_features):

    sound = parselmouth.Sound(audio_file)
    extracted_features = {}
    
    if 'PP_F0' in selected_features:
        extracted_features['PP_F0_SD'] = PP_f0_sd(audio_file, f0_min=f0_min, f0_max=f0_max)
    if 'PP_F0_SD_M' in selected_features:
        extracted_features['PP_F0_SD_M'] = PP_f0_sd_murton(audio_file, f0_min=f0_min, f0_max=f0_max)
    # if 'PP_DUR' in selected_features:
        # extracted_features['PP_DUR'] = PP_duration(audio_file)
    
    return extracted_features


def extract_features_sentences(audio_file, selected_features):

    sound = parselmouth.Sound(audio_file)
    extracted_features = {}
    
    if 'PP_F0' in selected_features:
        extracted_features['PP_F0'] = PP_f0_sd(audio_file, f0_min=f0_min, f0_max=f0_max)
    
    return extracted_features

In [None]:
def process_audio_files(audio_files_dir, selected_features, analyze_segments=False):

    patient_df = {}

    for file in os.listdir(audio_files_dir):
    #     if file.endswith(".wav") and not file.endswith("pre.wav"):
    #         print(f"Processing {file}...")
    #         file_path = os.path.join(audio_files_dir, file)
    #         s = parselmouth.Sound(file_path)
    #         s.pre_emphasize()
    #         s.save(os.path.splitext(file_path)[0] + "pre.wav", 'WAV')     
     
            parts = file.replace("pre.wav", "").split("_")
            if len(parts) < 4 or len(parts) > 4:
                print(f"Unexpected named audio file: {file}")
                continue

            patient_id, day, exercise, take = parts
    
            file_path = os.path.join(audio_files_dir, file)
            
            if exercise == 'VOW':
                if analyze_segments:
                    segment_file_path = file_path.replace("pre.wav", "sgmntpre.wav")
                    load_best_segment(audio_file=file_path, output_path=segment_file_path, f0_min=f0_min, f0_max=f0_max)
                    features = extract_features_vowels(segment_file_path, selected_features)
                else:
                    features = extract_features_vowels(file_path, selected_features)
            elif exercise == 'MPT':
                features = extract_features_mpt(file_path, selected_features)
            elif exercise == 'SNT':
                features = extract_features_sentences(file_path, selected_features)
                
            df_entry = {'day': int(day), **features}
            
            if patient_id not in patient_df:
                patient_df[patient_id] = {}
                
            if exercise not in patient_df[patient_id]:
                patient_df[patient_id][exercise] = []
            
            patient_df[patient_id][exercise].append(df_entry)
    
    for patient_id, exercises in patient_df.items():
        output_dir = f"{extracted_acoustic_feature_storage_path}_{patient_id}/"
        os.makedirs(output_dir, exist_ok=True)

        for exercise, data in exercises.items():
            df = pd.DataFrame(data)
            df.to_csv(os.path.join(output_dir, f"{exercise}_{selected_features_abrv}.csv"), index=False)

In [None]:
process_audio_files(audio_files_dir, selected_features=selected_features_dict, analyze_segments=True)

Processing 1234567_0_VOW_1.wav...
Processing 1234568_0_VOW_1.wav...
