In [25]:
import importlib
import functions_praat
import features
importlib.reload(functions_praat)
importlib.reload(features)

<module 'features' from '/home/dene/rp2/features.py'>

In [26]:
from paths import *
from features import *
from functions_praat import *
from functions_disvoice import *
import pandas as pd

In [27]:
def extract_features_VOW(audio_file, selected_features, segment_length, f0_min, f0_max, point_step, time_step, max_frequency, num_formants):
    extracted_features = {}
    if 'PP_F0' in selected_features:
        extracted_features['PP_F0'] = PP_f0_mean(audio_file, f0_min=f0_min, f0_max=f0_max)
    if 'PP_F0_M' in selected_features:
        extracted_features['PP_F0_M'] = PP_f0_mean_murton(audio_file, f0_min=f0_min, f0_max=f0_max)
    if 'PP_F0_SD' in selected_features:
        extracted_features['PP_F0_SD'] = PP_f0_sd(audio_file, f0_min=f0_min, f0_max=f0_max)
    if 'PP_F0_SD_M' in selected_features:
        extracted_features['PP_F0_SD_M'] = PP_f0_sd_murton(audio_file, f0_min=f0_min, f0_max=f0_max)

    if 'PP_LHR' in selected_features:
        extracted_features['PP_LHR'] = PP_lh_ratio(audio_file)
    if 'PP_LHR_M' in selected_features:
        extracted_features['PP_LHR_M'] = PP_LH_ratio_murton(audio_file, segment_length=segment_length)

    if 'PP_CPP_M' in selected_features:
        extracted_features['PP_CPP_M'] = PP_CPP_mean_murton(audio_file)
    if 'PP_CPP_M2' in selected_features:
        extracted_features['PP_CPP_M2'] = PP_CPP_median_murton(audio_file)
    if 'PP_CPP_SD_M' in selected_features:
        extracted_features['PP_CPP_SD_M'] = PP_CPP_sd_murton(audio_file)

    if 'PP_HNR' in selected_features:
        extracted_features['PP_HNR'] = PP_harmonics_to_noise(audio_file)
    if 'PP_HNR_M' in selected_features:
        extracted_features['PP_HNR_M'] = PP_harmonics_to_noise_murton(audio_file, segment_length=segment_length)

    if 'PP_JIT' in selected_features:
        jitter_values = PP_jitter(audio_file, f0_min=f0_min, f0_max=f0_max) 
        for feature in jitter_feature_selection:
            if feature in jitter_feature_indices:
                feature_idx = jitter_feature_indices[feature]            
                extracted_features[f'PP_JIT_{feature}'] = jitter_values[feature_idx]  
    if 'PP_JIT_M' in selected_features:
        jitter_values = PP_jitter_murton(audio_file, segment_length=segment_length, f0_min=f0_min, f0_max=f0_max) 
        for feature in jitter_feature_selection:
            if feature in jitter_feature_indices:
                feature_idx = jitter_feature_indices[feature]            
                extracted_features[f'PP_JIT_M_{feature}'] = jitter_values[feature_idx]    

    if 'PP_SHI' in selected_features:
        shimmer_values = PP_shimmer(audio_file, f0_min=f0_min, f0_max=f0_max) 
        for feature in shimmer_feature_selection:
            if feature in shimmer_feature_indices:
                feature_idx = shimmer_feature_indices[feature]            
                extracted_features[f'PP_SHI_{feature}'] = shimmer_values[feature_idx]
    if 'PP_SHI_M' in selected_features:
        shimmer_values = PP_shimmer_murton(audio_file, segment_length=segment_length, f0_min=f0_min, f0_max=f0_max) 
        for feature in shimmer_feature_selection:
            if feature in shimmer_feature_indices:
                feature_idx = shimmer_feature_indices[feature]            
                extracted_features[f'PP_SHI_M_{feature}'] = shimmer_values[feature_idx]
    
    if 'PP_GF_MEA' in selected_features:
        mean_glottal_formant_values = PP_glottal_formants_mean(audio_file, f0_min=f0_min, f0_max=f0_max, point_step=point_step, max_frequency=max_frequency, num_formants=num_formants)
        for i in range(len(mean_glottal_formant_values)): 
            extracted_features[f'PP_GF{i+1}_MEA'] = mean_glottal_formant_values[i]
    if 'PP_GF_MED' in selected_features:
        median_glottal_formant_values = PP_glottal_formants_median(audio_file, f0_min=f0_min, f0_max=f0_max, point_step=point_step, max_frequency=max_frequency, num_formants=num_formants)
        for i in range(len(median_glottal_formant_values)): 
            extracted_features[f'PP_GF{i+1}_MED'] = median_glottal_formant_values[i]            
    if 'PP_GF_SD' in selected_features:
        sd_glottal_formant_values = PP_glottal_formants_sd(audio_file, f0_min=f0_min, f0_max=f0_max, point_step=point_step, max_frequency=max_frequency, num_formants=num_formants)
        for i in range(len(sd_glottal_formant_values)): 
            extracted_features[f'PP_GF{i+1}_SD'] = sd_glottal_formant_values[i]        

    if 'PP_F_MEA' in selected_features:
        mean_formant_values = PP_formants_mean(audio_file, time_step=time_step, max_frequency=max_frequency, num_formants=num_formants)
        for i in range(len(mean_formant_values)): 
            extracted_features[f'PP_F{i+1}_MEA'] = mean_formant_values[i]
    if 'PP_F_MED' in selected_features:
        median_formant_values = PP_formants_median(audio_file, time_step=time_step, max_frequency=max_frequency, num_formants=num_formants)
        for i in range(len(median_formant_values)): 
            extracted_features[f'PP_F{i+1}_MED'] = median_formant_values[i]            
    if 'PP_F_SD' in selected_features:
        sd_formant_values = PP_formants_sd(audio_file, time_step=time_step, max_frequency=max_frequency, num_formants=num_formants)
        for i in range(len(sd_formant_values)): 
            extracted_features[f'PP_F{i+1}_SD'] = sd_formant_values[i]
    
    if 'DV_PHO' in selected_features: 
        phonation_features = DV_phonation(audio_file)
        for feature in phonation_features.columns:
            extracted_features[f'DV_PHO_{feature}'] = phonation_features[feature].values[0]     
    if 'DV_GLO' in selected_features:
        glottal_features = DV_glottal(audio_file)
        for feature in glottal_features.columns:
            extracted_features[f'DV_GLO_{feature}'] = glottal_features[feature].values[0]        
            
    return extracted_features

In [28]:
def extract_features_SEN(audio_file, selected_features, f0_min, f0_max, silence_threshold, min_silence_duration, segment_length, num_coefficients, time_step, max_frequency, num_formants):
    extracted_features = {}
    
    if 'PP_F0' in selected_features:
        extracted_features['PP_F0'] = PP_f0_median(audio_file, f0_min=f0_min, f0_max=f0_max)    
    if 'PP_F0_M' in selected_features:
        extracted_features['PP_F0_M'] = PP_f0_median_murton(audio_file, f0_min=f0_min, f0_max=f0_max)  
    if 'PP_F0_SD' in selected_features:
        extracted_features['PP_F0_SD'] = PP_f0_sd(audio_file, f0_min=f0_min, f0_max=f0_max)
    if 'PP_F0_SD_M' in selected_features:
        extracted_features['PP_F0_SD_M'] = PP_f0_sd_murton(audio_file, f0_min=f0_min, f0_max=f0_max)
        
    if 'PP_CPP_M' in selected_features:
        extracted_features['PP_CPP_M'] = PP_CPP_mean_murton(audio_file)
    if 'PP_CPP_M2' in selected_features:
        extracted_features['PP_CPP_M2'] = PP_CPP_median_murton(audio_file)
    if 'PP_CPP_SD_M' in selected_features:
        extracted_features['PP_CPP_SD_M'] = PP_CPP_sd_murton(audio_file)
        
    if 'PP_DUR_WP' in selected_features:
        extracted_features['PP_DUR_WP'] = PP_duration_with_pauses(audio_file, silence_threshold)
    if 'PP_DUR_WOP' in selected_features:
        extracted_features['PP_DUR_WOP'] = PP_duration_without_pauses(audio_file, silence_threshold, min_silence_duration)

    if 'PP_JIT' in selected_features:
        jitter_values = PP_jitter(audio_file, f0_min=f0_min, f0_max=f0_max) 
        for feature in jitter_feature_selection:
            if feature in jitter_feature_indices:
                feature_idx = jitter_feature_indices[feature]            
                extracted_features[f'PP_JIT_{feature}'] = jitter_values[feature_idx]  
    if 'PP_JIT_M' in selected_features:
        jitter_values = PP_jitter_murton(audio_file, segment_length=segment_length, f0_min=f0_min, f0_max=f0_max) 
        for feature in jitter_feature_selection:
            if feature in jitter_feature_indices:
                feature_idx = jitter_feature_indices[feature]            
                extracted_features[f'PP_JIT_M_{feature}'] = jitter_values[feature_idx]    
                    
    if 'PP_SHI' in selected_features:
        shimmer_values = PP_shimmer(audio_file, f0_min=f0_min, f0_max=f0_max) 
        for feature in shimmer_feature_selection:
            if feature in shimmer_feature_indices:
                feature_idx = shimmer_feature_indices[feature]            
                extracted_features[f'PP_SHI_{feature}'] = shimmer_values[feature_idx]
    if 'PP_SHI_M' in selected_features:
        shimmer_values = PP_shimmer_murton(audio_file, segment_length=segment_length, f0_min=f0_min, f0_max=f0_max) 
        for feature in shimmer_feature_selection:
            if feature in shimmer_feature_indices:
                feature_idx = shimmer_feature_indices[feature]            
                extracted_features[f'PP_SHI_M_{feature}'] = shimmer_values[feature_idx]

    if 'PP_MFC' in selected_features:
        mfc_values = PP_MFCC(audio_file)
        for i in range(num_coefficients):
            for feature in mfc_feature_selection:
                if feature in mfc_feature_indices:
                    feature_idx = mfc_feature_indices[feature]
                    extracted_features[f'PP_MFC_{i+1}_{feature}'] = mfc_values[i+(num_coefficients*feature_idx)]

    if 'PP_F_MEA' in selected_features:
        mean_formant_values = PP_formants_mean(audio_file, time_step=time_step, max_frequency=max_frequency, num_formants=num_formants)
        for i in range(len(mean_formant_values)): 
            extracted_features[f'PP_F{i+1}_MEA'] = mean_formant_values[i]
    if 'PP_F_MED' in selected_features:
        median_formant_values = PP_formants_median(audio_file, time_step=time_step, max_frequency=max_frequency, num_formants=num_formants)
        for i in range(len(median_formant_values)): 
            extracted_features[f'PP_F{i+1}_MED'] = median_formant_values[i]            
    if 'PP_F_SD' in selected_features:
        sd_formant_values = PP_formants_sd(audio_file, time_step=time_step, max_frequency=max_frequency, num_formants=num_formants)
        for i in range(len(sd_formant_values)): 
            extracted_features[f'PP_F{i+1}_SD'] = sd_formant_values[i]  

    if 'DV_PRO' in selected_features:
        prosody_features = DV_prosody(audio_file)
        for feature in prosody_features.columns:
            extracted_features[f'DV_PRO_{feature}'] = prosody_features[feature].values[0]
    if 'DV_PHO' in selected_features: 
        phonation_features = DV_phonation(audio_file)
        for feature in phonation_features.columns:
            extracted_features[f'DV_PHO_{feature}'] = phonation_features[feature].values[0]     

    return extracted_features

In [29]:
def extract_features_SPN(audio_file, selected_features, f0_min, f0_max, silence_threshold, min_silence_duration, segment_length, num_coefficients, time_step, max_frequency, num_formants):
    extracted_features = {}
    
    if 'PP_F0' in selected_features:
        extracted_features['PP_F0'] = PP_f0_median(audio_file, f0_min=f0_min, f0_max=f0_max)    
    if 'PP_F0_M' in selected_features:
        extracted_features['PP_F0_M'] = PP_f0_median_murton(audio_file, f0_min=f0_min, f0_max=f0_max)  
    if 'PP_F0_SD' in selected_features:
        extracted_features['PP_F0_SD'] = PP_f0_sd(audio_file, f0_min=f0_min, f0_max=f0_max)
    if 'PP_F0_SD_M' in selected_features:
        extracted_features['PP_F0_SD_M'] = PP_f0_sd_murton(audio_file, f0_min=f0_min, f0_max=f0_max)
        
    if 'PP_CPP_M' in selected_features:
        extracted_features['PP_CPP_M'] = PP_CPP_mean_murton(audio_file)
    if 'PP_CPP_M2' in selected_features:
        extracted_features['PP_CPP_M2'] = PP_CPP_median_murton(audio_file)
    if 'PP_CPP_SD_M' in selected_features:
        extracted_features['PP_CPP_SD_M'] = PP_CPP_sd_murton(audio_file)
        
    if 'PP_DUR_WP' in selected_features:
        extracted_features['PP_DUR_WP'] = PP_duration_with_pauses(audio_file, silence_threshold)
    if 'PP_DUR_WOP' in selected_features:
        extracted_features['PP_DUR_WOP'] = PP_duration_without_pauses(audio_file, silence_threshold, min_silence_duration)

    if 'PP_JIT' in selected_features:
        jitter_values = PP_jitter(audio_file, f0_min=f0_min, f0_max=f0_max) 
        for feature in jitter_feature_selection:
            if feature in jitter_feature_indices:
                feature_idx = jitter_feature_indices[feature]            
                extracted_features[f'PP_JIT_{feature}'] = jitter_values[feature_idx]  
    if 'PP_JIT_M' in selected_features:
        jitter_values = PP_jitter_murton(audio_file, segment_length=segment_length, f0_min=f0_min, f0_max=f0_max) 
        for feature in jitter_feature_selection:
            if feature in jitter_feature_indices:
                feature_idx = jitter_feature_indices[feature]            
                extracted_features[f'PP_JIT_M_{feature}'] = jitter_values[feature_idx]    
                    
    if 'PP_SHI' in selected_features:
        shimmer_values = PP_shimmer(audio_file, f0_min=f0_min, f0_max=f0_max) 
        for feature in shimmer_feature_selection:
            if feature in shimmer_feature_indices:
                feature_idx = shimmer_feature_indices[feature]            
                extracted_features[f'PP_SHI_{feature}'] = shimmer_values[feature_idx]
    if 'PP_SHI_M' in selected_features:
        shimmer_values = PP_shimmer_murton(audio_file, segment_length=segment_length, f0_min=f0_min, f0_max=f0_max) 
        for feature in shimmer_feature_selection:
            if feature in shimmer_feature_indices:
                feature_idx = shimmer_feature_indices[feature]            
                extracted_features[f'PP_SHI_M_{feature}'] = shimmer_values[feature_idx]

    if 'PP_MFC' in selected_features:
        mfc_values = PP_MFCC(audio_file)
        for i in range(num_coefficients):
            for feature in mfc_feature_selection:
                if feature in mfc_feature_indices:
                    feature_idx = mfc_feature_indices[feature]
                    extracted_features[f'PP_MFC_{i+1}_{feature}'] = mfc_values[i+(num_coefficients*feature_idx)]

    if 'PP_F_MEA' in selected_features:
        mean_formant_values = PP_formants_mean(audio_file, time_step=time_step, max_frequency=max_frequency, num_formants=num_formants)
        for i in range(len(mean_formant_values)): 
            extracted_features[f'PP_F{i+1}_MEA'] = mean_formant_values[i]
    if 'PP_F_MED' in selected_features:
        median_formant_values = PP_formants_median(audio_file, time_step=time_step, max_frequency=max_frequency, num_formants=num_formants)
        for i in range(len(median_formant_values)): 
            extracted_features[f'PP_F{i+1}_MED'] = median_formant_values[i]            
    if 'PP_F_SD' in selected_features:
        sd_formant_values = PP_formants_sd(audio_file, time_step=time_step, max_frequency=max_frequency, num_formants=num_formants)
        for i in range(len(sd_formant_values)): 
            extracted_features[f'PP_F{i+1}_SD'] = sd_formant_values[i]  

    if 'DV_PHO' in selected_features: 
        phonation_features = DV_phonation(audio_file)
        for feature in phonation_features.columns:
            extracted_features[f'DV_PHO_{feature}'] = phonation_features[feature].values[0]
    if 'DV_PRO' in selected_features:
        prosody_features = DV_prosody(audio_file)
        for feature in prosody_features.columns:
            extracted_features[f'DV_PRO_{feature}'] = prosody_features[feature].values[0]
    if 'DV_GLO' in selected_features:
        glottal_features = DV_glottal(audio_file)
        for feature in glottal_features.columns:
            extracted_features[f'DV_GLO_{feature}'] = glottal_features[feature].values[0]     
            
    return extracted_features

In [30]:
def extract_features_MPT(audio_file, selected_features, silence_threshold, min_silence_duration):
    extracted_features = {}
    
    if 'PP_DUR_WP' in selected_features:
        extracted_features['PP_DUR_WP'] = PP_duration_with_pauses(audio_file, silence_threshold)
    if 'PP_DUR_WOP' in selected_features:
        extracted_features['PP_DUR_WOP'] = PP_duration_without_pauses(audio_file, silence_threshold, min_silence_duration)
            
    return extracted_features

In [31]:
def process_audio_files_VOW(paths, selected_features, segment_length, f0_min, f0_max, point_step, time_step, max_frequency, num_formants):
    patient_dfs = {}

    files = [file for file in paths if file.is_file() and file.suffix == '.wav']

    for file in files:
        filename = file.stem.replace("_pre", "")
        parts = filename.split("_")
        if len(parts) != 4:
            print(f"Unexpected named audio file: {file}")
            continue

        patient_id, day, exercise, take_letter = parts
        print("Now processing", parts, "for VOW")

        features = {}
        file_path = str(file)

        if exercise == 'VOW':
            features = extract_features_VOW(
                file_path, selected_features, 
                segment_length, 
                f0_min, f0_max,
                point_step, time_step, max_frequency, num_formants)
        
        if features:    
            df_entry = {'day': int(day), **features}       
            df_key = (patient_id, take_letter, exercise)
            if df_key not in patient_dfs:
                patient_dfs[df_key] = pd.DataFrame(columns=['day'] + list(features.keys()))
            patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)
        
        for key, df in patient_dfs.items():
            patient_dfs[key] = df.sort_values(by='day', ascending=True).reset_index(drop=True)

        for (patient_id, take_letter, exercise), df in patient_dfs.items():
            file_name = f"{patient_id}_{exercise}_{take_letter}.csv"
            file_path = features_dir / exercise / patient_id / file_name

            df.to_csv(file_path, index=False)


def process_audio_files_SEN(paths, selected_features, f0_min, f0_max, silence_threshold, min_silence_duration, segment_length, num_coefficients, time_step, max_frequency, num_formants):
    patient_dfs = {}

    files = [file for file in paths if file.is_file() and file.suffix == '.wav']

    for file in files:
        filename = file.stem.replace("_pre", "")
        parts = filename.split("_")
        if len(parts) != 4:
            print(f"Unexpected named audio file: {file}")
            continue

        patient_id, day, exercise, take_letter = parts
        print("Now processing", parts, "for SEN")
        
        features = {}
        file_path = str(file)
        
        if exercise == 'SEN':
            features = extract_features_SEN(
                file_path, selected_features, 
                f0_min, f0_max, 
                silence_threshold, min_silence_duration, 
                segment_length,
                num_coefficients,
                time_step, max_frequency, num_formants)
            
        if features:
            df_entry = {'day': int(day), **features}    
            df_key = (patient_id, take_letter, exercise)
            if df_key not in patient_dfs:
                patient_dfs[df_key] = pd.DataFrame(columns=['day'] + list(features.keys()))
            patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)
        
        for key, df in patient_dfs.items():
            patient_dfs[key] = df.sort_values(by='day', ascending=True).reset_index(drop=True)
            
        for (patient_id, take_letter, exercise), df in patient_dfs.items():
            file_name = f"{patient_id}_{exercise}_{take_letter}.csv"
            file_path = features_dir / exercise / patient_id / file_name

            df.to_csv(file_path, index=False)
  

def process_audio_files_SPN(paths, selected_features, f0_min, f0_max, silence_threshold, min_silence_duration, segment_length, num_coefficients, time_step, max_frequency, num_formants):
    patient_dfs = {}

    files = [file for file in paths if file.is_file() and file.suffix == '.wav']

    for file in files:
        filename = file.stem.replace("_pre", "")
        parts = filename.split("_")
        if len(parts) != 4:
            print(f"Unexpected named audio file: {file}")
            continue

        patient_id, day, exercise, take_letter = parts
        print("Now processing", parts, "for SPN")
        
        features = {}
        file_path = str(file)
        
        if exercise == 'SPN':
            features = extract_features_SPN(
                file_path, selected_features, 
                f0_min, f0_max, 
                silence_threshold, min_silence_duration, 
                segment_length,
                num_coefficients,
                time_step, max_frequency, num_formants)
            
        if features:
            df_entry = {'day': int(day), **features}    
            df_key = (patient_id, take_letter, exercise)
            if df_key not in patient_dfs:
                patient_dfs[df_key] = pd.DataFrame(columns=['day'] + list(features.keys()))
            patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)
        
        for key, df in patient_dfs.items():
            patient_dfs[key] = df.sort_values(by='day', ascending=True).reset_index(drop=True)
            
        for (patient_id, take_letter, exercise), df in patient_dfs.items():
            file_name = f"{patient_id}_{exercise}_{take_letter}.csv"
            file_path = features_dir / exercise / patient_id / file_name

            df.to_csv(file_path, index=False)
          

def process_audio_files_MPT(paths, selected_features, silence_threshold, min_silence_duration):
    patient_dfs = {}

    files = [file for file in paths if file.is_file() and file.suffix == '.wav']
    
    for file in files:
        filename = file.stem.replace("_pre", "")
        parts = filename.split("_")
        if len(parts) != 4:
            print(f"Unexpected named audio file: {file}")
            continue

        patient_id, day, exercise, take_letter = parts
        print("Now processing", parts, "for MPT")

        features = {}
        file_path = str(file)
        
        if exercise == 'MPT':
            features = extract_features_MPT(
                file_path, selected_features, 
                silence_threshold, min_silence_duration)
            
        if features:            
            df_entry = {'day': int(day), **features}    
            df_key = (patient_id, take_letter, exercise)
            if df_key not in patient_dfs:
                patient_dfs[df_key] = pd.DataFrame(columns=['day'] + list(features.keys()))
            patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)
            
        for key, df in patient_dfs.items():
            patient_dfs[key] = df.sort_values(by='day', ascending=True).reset_index(drop=True)

        for (patient_id, take_letter, exercise), df in patient_dfs.items():
            file_name = f"{patient_id}_{exercise}_{take_letter}.csv"
            file_path = features_dir / exercise / patient_id / file_name

            df.to_csv(file_path, index=False)

In [32]:
### processes only files that have not been processed before
### this means that once one file, disregarding which 'day' it was recorded on, has been processed
### no new files of this same type (id, exercise, mic, vowel) will be processed
all_files = [file for folder in ['SPN', 'SEN', 'MPT'] for file in (processed_dir / folder).rglob('*') if file.is_file()]
processed_files = [file for folder in ['SPN', 'SEN', 'MPT'] for file in (features_dir / folder).rglob('*') if file.is_file()]
processed_keys = {(file.parts[-2], file.stem[-6:]) for file in processed_files}
unprocessed_files = [file for file in all_files if (file.parts[-2], file.stem[-10:-4]) not in processed_keys]

### this achieves the same but specifically for VOW exercise recordings
all_files_VOW = [file for file in (segments_dir / 'VOW').rglob('*') if file.is_file()]
processed_files_VOW = [file for file in (features_dir / 'VOW').rglob('*') if file.is_file()]
processed_keys_VOW = {(file.parts[-2], file.stem[-6:]) for file in processed_files_VOW}
unprocessed_files_VOW = [file for file in all_files_VOW if (file.parts[-2], file.stem[-10:-4]) not in processed_keys_VOW]

# process_audio_files_VOW(
#     paths=unprocessed_files_VOW,
#     selected_features=selected_features_dict_VOW,
#     segment_length=1.0,
#     f0_min=60, f0_max=300,
#     point_step=0.0025, time_step=0.01, max_frequency=5000, num_formants=5)

process_audio_files_SEN(
    paths=unprocessed_files,
    selected_features=selected_features_dict_SEN,
    f0_min=60, f0_max=300,
    silence_threshold=50, min_silence_duration=0.5,
    num_coefficients=12,
    segment_length=1.0,
    time_step=0.01, max_frequency=5000, num_formants=5)

process_audio_files_SPN(
    paths=unprocessed_files,
    selected_features=selected_features_dict_SPN,
    f0_min=60, f0_max=300,
    silence_threshold=50, min_silence_duration=0.5,
    num_coefficients=12,
    segment_length=1.0,
    time_step=0.01, max_frequency=5000, num_formants=5)

process_audio_files_MPT(
    paths=unprocessed_files,
    selected_features=selected_features_dict_MPT,
    silence_threshold=50, min_silence_duration=0.5)

All files [PosixPath('audio_files_pre/SPN/1234572/1234572_0_SPN_2_pre.wav'), PosixPath('audio_files_pre/SPN/1234572/1234572_0_SPN_1_pre.wav'), PosixPath('audio_files_pre/SPN/1234572/1234572_2_SPN_1_pre.wav'), PosixPath('audio_files_pre/SPN/1234572/1234572_1_SPN_2_pre.wav'), PosixPath('audio_files_pre/SPN/1234572/1234572_1_SPN_1_pre.wav'), PosixPath('audio_files_pre/SPN/1234572/1234572_2_SPN_2_pre.wav'), PosixPath('audio_files_pre/SPN/1234571/1234571_3_SPN_1_pre.wav'), PosixPath('audio_files_pre/SPN/1234571/1234571_1_SPN_1_pre.wav'), PosixPath('audio_files_pre/SPN/1234571/1234571_2_SPN_2_pre.wav'), PosixPath('audio_files_pre/SPN/1234571/1234571_3_SPN_2_pre.wav'), PosixPath('audio_files_pre/SPN/1234571/1234571_1_SPN_2_pre.wav'), PosixPath('audio_files_pre/SPN/1234571/1234571_0_SPN_2_pre.wav'), PosixPath('audio_files_pre/SPN/1234571/1234571_0_SPN_1_pre.wav'), PosixPath('audio_files_pre/SPN/1234571/1234571_2_SPN_1_pre.wav'), PosixPath('audio_files_pre/SPN/1234573/1234573_0_SPN_1_pre.wav'),

  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234572', '2', 'SEN', '2'] for SEN


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234572', '1', 'SEN', '2'] for SEN
Now processing ['1234572', '0', 'SEN', '2'] for SEN
Now processing ['1234572', '0', 'SEN', '1'] for SEN
Now processing ['1234572', '2', 'SEN', '1'] for SEN
Now processing ['1234571', '0', 'SEN', '1'] for SEN


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234571', '3', 'SEN', '1'] for SEN
Now processing ['1234571', '2', 'SEN', '2'] for SEN


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234571', '3', 'SEN', '2'] for SEN
Now processing ['1234571', '1', 'SEN', '2'] for SEN
Now processing ['1234571', '2', 'SEN', '1'] for SEN
Now processing ['1234571', '1', 'SEN', '1'] for SEN
Now processing ['1234571', '0', 'SEN', '2'] for SEN
Now processing ['1234573', '0', 'SEN', '1'] for SEN


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234573', '2', 'SEN', '2'] for SEN


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234573', '1', 'SEN', '1'] for SEN
Now processing ['1234573', '2', 'SEN', '1'] for SEN
Now processing ['1234573', '1', 'SEN', '2'] for SEN
Now processing ['1234573', '0', 'SEN', '2'] for SEN
Now processing ['1234570', '2', 'SEN', '1'] for SEN


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234570', '1', 'SEN', '2'] for SEN


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234570', '0', 'SEN', '2'] for SEN
Now processing ['1234570', '1', 'SEN', '1'] for SEN
Now processing ['1234570', '2', 'SEN', '2'] for SEN
Now processing ['1234570', '0', 'SEN', '1'] for SEN
Now processing ['1234567', '3', 'SEN', '1'] for SEN


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234567', '3', 'SEN', '2'] for SEN


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234567', '1', 'SEN', '1'] for SEN
Now processing ['1234567', '4', 'SEN', '2'] for SEN
Now processing ['1234567', '2', 'SEN', '1'] for SEN
Now processing ['1234567', '4', 'SEN', '1'] for SEN
Now processing ['1234567', '2', 'SEN', '2'] for SEN
Now processing ['1234567', '0', 'SEN', '2'] for SEN
Now processing ['1234567', '0', 'SEN', '1'] for SEN
Now processing ['1234567', '1', 'SEN', '2'] for SEN
Now processing ['1234568', '3', 'SEN', '2'] for SEN


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234568', '0', 'SEN', '2'] for SEN
Now processing ['1234568', '1', 'SEN', '1'] for SEN


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234568', '4', 'SEN', '2'] for SEN
Now processing ['1234568', '2', 'SEN', '2'] for SEN
Now processing ['1234568', '1', 'SEN', '2'] for SEN
Now processing ['1234568', '2', 'SEN', '1'] for SEN
Now processing ['1234568', '3', 'SEN', '1'] for SEN
Now processing ['1234568', '4', 'SEN', '1'] for SEN
Now processing ['1234568', '0', 'SEN', '1'] for SEN
Now processing ['1234569', '1', 'SEN', '1'] for SEN


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234569', '2', 'SEN', '1'] for SEN
Now processing ['1234569', '0', 'SEN', '2'] for SEN


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234569', '1', 'SEN', '2'] for SEN
Now processing ['1234569', '2', 'SEN', '2'] for SEN
Now processing ['1234569', '0', 'SEN', '1'] for SEN
Now processing ['1234572', '0', 'MPT', '1'] for SEN
Now processing ['1234572', '2', 'MPT', '1'] for SEN
Now processing ['1234572', '0', 'MPT', '2'] for SEN
Now processing ['1234572', '1', 'MPT', '1'] for SEN
Now processing ['1234572', '2', 'MPT', '2'] for SEN
Now processing ['1234572', '1', 'MPT', '2'] for SEN
Now processing ['1234571', '0', 'MPT', '1'] for SEN
Now processing ['1234571', '0', 'MPT', '2'] for SEN
Now processing ['1234571', '1', 'MPT', '2'] for SEN
Now processing ['1234571', '1', 'MPT', '1'] for SEN
Now processing ['1234571', '3', 'MPT', '1'] for SEN
Now processing ['1234571', '2', 'MPT', '2'] for SEN
Now processing ['1234571', '2', 'MPT', '1'] for SEN
Now processing ['1234571', '3', 'MPT', '2'] for SEN
Now processing ['1234573', '1', 'MPT', '1'] for SEN
Now processing ['1234573', '0', 'MPT', '1'] for SEN
Now processi

  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234572', '0', 'SPN', '1'] for SPN


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234572', '2', 'SPN', '1'] for SPN
Now processing ['1234572', '1', 'SPN', '2'] for SPN
Now processing ['1234572', '1', 'SPN', '1'] for SPN
Now processing ['1234572', '2', 'SPN', '2'] for SPN
Now processing ['1234571', '3', 'SPN', '1'] for SPN


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234571', '1', 'SPN', '1'] for SPN
Now processing ['1234571', '2', 'SPN', '2'] for SPN


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234571', '3', 'SPN', '2'] for SPN
Now processing ['1234571', '1', 'SPN', '2'] for SPN
Now processing ['1234571', '0', 'SPN', '2'] for SPN
Now processing ['1234571', '0', 'SPN', '1'] for SPN
Now processing ['1234571', '2', 'SPN', '1'] for SPN
Now processing ['1234573', '0', 'SPN', '1'] for SPN


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234573', '1', 'SPN', '1'] for SPN
Now processing ['1234573', '2', 'SPN', '1'] for SPN
Now processing ['1234573', '0', 'SPN', '2'] for SPN


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234573', '2', 'SPN', '2'] for SPN
Now processing ['1234573', '1', 'SPN', '2'] for SPN
Now processing ['1234570', '2', 'SPN', '1'] for SPN


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234570', '0', 'SPN', '2'] for SPN


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234570', '0', 'SPN', '1'] for SPN
Now processing ['1234570', '2', 'SPN', '2'] for SPN
Now processing ['1234570', '1', 'SPN', '1'] for SPN
Now processing ['1234570', '1', 'SPN', '2'] for SPN
Now processing ['1234567', '2', 'SPN', '2'] for SPN


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234567', '0', 'SPN', '1'] for SPN


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234567', '3', 'SPN', '2'] for SPN
Now processing ['1234567', '2', 'SPN', '1'] for SPN
Now processing ['1234567', '4', 'SPN', '1'] for SPN
Now processing ['1234567', '3', 'SPN', '1'] for SPN
Now processing ['1234567', '1', 'SPN', '1'] for SPN
Now processing ['1234567', '0', 'SPN', '2'] for SPN
Now processing ['1234567', '1', 'SPN', '2'] for SPN
Now processing ['1234568', '2', 'SPN', '1'] for SPN


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234568', '0', 'SPN', '1'] for SPN
Now processing ['1234568', '1', 'SPN', '1'] for SPN
Now processing ['1234568', '4', 'SPN', '1'] for SPN
Now processing ['1234568', '3', 'SPN', '1'] for SPN
Now processing ['1234569', '2', 'SPN', '2'] for SPN


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234569', '0', 'SPN', '1'] for SPN


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234569', '1', 'SPN', '2'] for SPN
Now processing ['1234569', '0', 'SPN', '2'] for SPN
Now processing ['1234569', '1', 'SPN', '1'] for SPN
Now processing ['1234569', '2', 'SPN', '1'] for SPN
Now processing ['1234572', '1', 'SEN', '1'] for SPN
Now processing ['1234572', '2', 'SEN', '2'] for SPN
Now processing ['1234572', '1', 'SEN', '2'] for SPN
Now processing ['1234572', '0', 'SEN', '2'] for SPN
Now processing ['1234572', '0', 'SEN', '1'] for SPN
Now processing ['1234572', '2', 'SEN', '1'] for SPN
Now processing ['1234571', '0', 'SEN', '1'] for SPN
Now processing ['1234571', '3', 'SEN', '1'] for SPN
Now processing ['1234571', '2', 'SEN', '2'] for SPN
Now processing ['1234571', '3', 'SEN', '2'] for SPN
Now processing ['1234571', '1', 'SEN', '2'] for SPN
Now processing ['1234571', '2', 'SEN', '1'] for SPN
Now processing ['1234571', '1', 'SEN', '1'] for SPN
Now processing ['1234571', '0', 'SEN', '2'] for SPN
Now processing ['1234573', '0', 'SEN', '1'] for SPN
Now processi

  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)
  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234572', '2', 'MPT', '1'] for MPT
Now processing ['1234572', '0', 'MPT', '2'] for MPT
Now processing ['1234572', '1', 'MPT', '1'] for MPT
Now processing ['1234572', '2', 'MPT', '2'] for MPT
Now processing ['1234572', '1', 'MPT', '2'] for MPT
Now processing ['1234571', '0', 'MPT', '1'] for MPT
Now processing ['1234571', '0', 'MPT', '2'] for MPT
Now processing ['1234571', '1', 'MPT', '2'] for MPT
Now processing ['1234571', '1', 'MPT', '1'] for MPT
Now processing ['1234571', '3', 'MPT', '1'] for MPT


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)
  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234571', '2', 'MPT', '2'] for MPT
Now processing ['1234571', '2', 'MPT', '1'] for MPT
Now processing ['1234571', '3', 'MPT', '2'] for MPT
Now processing ['1234573', '1', 'MPT', '1'] for MPT
Now processing ['1234573', '0', 'MPT', '1'] for MPT


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)
  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234573', '2', 'MPT', '2'] for MPT
Now processing ['1234573', '1', 'MPT', '2'] for MPT
Now processing ['1234573', '0', 'MPT', '2'] for MPT
Now processing ['1234573', '2', 'MPT', '1'] for MPT
Now processing ['1234570', '2', 'MPT', '2'] for MPT
Now processing ['1234570', '1', 'MPT', '1'] for MPT
Now processing ['1234570', '1', 'MPT', '2'] for MPT


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)
  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234570', '0', 'MPT', '1'] for MPT
Now processing ['1234570', '0', 'MPT', '2'] for MPT
Now processing ['1234570', '2', 'MPT', '1'] for MPT
Now processing ['1234567', '1', 'MPT', '1'] for MPT


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234567', '2', 'MPT', '1'] for MPT
Now processing ['1234567', '4', 'MPT', '1'] for MPT
Now processing ['1234567', '0', 'MPT', '2'] for MPT


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234567', '3', 'MPT', '2'] for MPT
Now processing ['1234567', '1', 'MPT', '2'] for MPT
Now processing ['1234567', '0', 'MPT', '1'] for MPT
Now processing ['1234567', '2', 'MPT', '2'] for MPT
Now processing ['1234567', '4', 'MPT', '2'] for MPT
Now processing ['1234567', '3', 'MPT', '1'] for MPT
Now processing ['1234568', '2', 'MPT', '2'] for MPT
Now processing ['1234568', '0', 'MPT', '1'] for MPT
Now processing ['1234568', '4', 'MPT', '1'] for MPT
Now processing ['1234568', '3', 'MPT', '1'] for MPT


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)
  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234568', '2', 'MPT', '1'] for MPT
Now processing ['1234568', '4', 'MPT', '2'] for MPT
Now processing ['1234568', '3', 'MPT', '2'] for MPT
Now processing ['1234568', '1', 'MPT', '2'] for MPT
Now processing ['1234568', '0', 'MPT', '2'] for MPT
Now processing ['1234568', '1', 'MPT', '1'] for MPT
Now processing ['1234569', '1', 'MPT', '2'] for MPT


  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)
  patient_dfs[df_key] = pd.concat([patient_dfs[df_key], pd.DataFrame([df_entry])], ignore_index=True)


Now processing ['1234569', '0', 'MPT', '1'] for MPT
Now processing ['1234569', '0', 'MPT', '2'] for MPT
Now processing ['1234569', '1', 'MPT', '1'] for MPT
Now processing ['1234569', '2', 'MPT', '2'] for MPT
Now processing ['1234569', '2', 'MPT', '1'] for MPT
