In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import opensmile
import IPython
import os
import librosa
import glob
import parselmouth 
import statistics
import scipy
import math

from sklearn.preprocessing import minmax_scale
from parselmouth.praat import call
from scipy.stats.mstats import zscore
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [2]:
# This is the function to measure source acoustics using default male parameters.

def measurePitch(voiceID, f0min, f0max, unit):
    pitch_grad_three_list = []
    pitch_range_three_list = []
    
    sound = parselmouth.Sound(voiceID) # read the sound
    duration = call(sound, "Get total duration") # duration
    pitch = call(sound, "To Pitch (cc)", 0, f0min, 15, 'no', 0.03, 0.45, -0.29, 3.5, 0.14, f0max)
    meanF0 = call(pitch, "Get mean", 0, 0, unit) # get mean pitch
    minF0 = call(pitch, "Get minimum", 0, 0, unit, "parabolic") # get mean pitch
    maxF0 = call(pitch, "Get maximum", 0, 0, unit, "parabolic") # get mean pitch
    stdevF0 = call(pitch, "Get standard deviation", 0 ,0, unit) # get standard deviation
    harmonicity = call(sound, "To Harmonicity (cc)", 0.01, f0min, 0.1, 1.0)
    hnr = call(harmonicity, "Get mean", 0, 0)
    pointProcess = call(sound, "To PointProcess (periodic, cc)", f0min, f0max)
    localJitter = call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3)
    localabsoluteJitter = call(pointProcess, "Get jitter (local, absolute)", 0, 0, 0.0001, 0.02, 1.3)
    rapJitter = call(pointProcess, "Get jitter (rap)", 0, 0, 0.0001, 0.02, 1.3)
    ppq5Jitter = call(pointProcess, "Get jitter (ppq5)", 0, 0, 0.0001, 0.02, 1.3)
    ddpJitter = call(pointProcess, "Get jitter (ddp)", 0, 0, 0.0001, 0.02, 1.3)
    localShimmer =  call([sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    localdbShimmer = call([sound, pointProcess], "Get shimmer (local_dB)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    apq3Shimmer = call([sound, pointProcess], "Get shimmer (apq3)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    aqpq5Shimmer = call([sound, pointProcess], "Get shimmer (apq5)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    apq11Shimmer =  call([sound, pointProcess], "Get shimmer (apq11)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    ddaShimmer = call([sound, pointProcess], "Get shimmer (dda)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    
    
    time_steps = 0.033
    pitch_values = pitch.selected_array['frequency']
    pitch_values = pitch_values[pitch_values != 0]
    
    time_step_buff_upper = 15
    time_step_buff_low = 15
    next_count = 0
    mean_count=0
    for i in range(len(pitch_values)):
        pitches_upper = np.absolute(pitch_values[i:(i+time_step_buff_upper)])
        if i >= time_step_buff_low:
            pitches_lower = np.absolute(pitch_values[(i-time_step_buff_low):(i)])
        if i< time_step_buff_low:
            pitches_lower = np.absolute(pitch_values[0:i])

        median_buffer = np.median(pitch_values)
        window_median = np.median(np.concatenate([pitches_lower, pitches_upper]))

        if i == 0:
            #print("first element was: " + str(pitch_values[i]))
            if pitch_values[i] <= median_buffer-.4*median_buffer:
                pitch_values[i] = pitch_values[i]*2

            if pitch_values[i] >= median_buffer+.8*median_buffer:
                pitch_values[i] = pitch_values[i]/2
            #print("and is now: " +str(pitch_values[i]))

        if pitch_values[i] <= window_median-0.4*window_median and pitch_values[i]*2 < 300:
            pitch_values[i] = pitch_values[i]*2
            #print("Drop detected now: " +str(pitch_values[i])+ " at count: " + str(i))
        if pitch_values[i] >= window_median+0.8*window_median and pitch_values[i]/2 > 75:
            pitch_values[i] = pitch_values[i]/2
            #print("Increase detected now: " +str(pitch_values[i])+ " at count: " + str(i))
  
    
    
    last_avg = statistics.mean(pitch_values[0:(3)])
    i = 3
    
    for count in range(math.ceil(len(pitch_values)/3)-1):
            avg_for_range_cal = statistics.mean(np.absolute(pitch_values[i:(i+3)]))
            pitch_range_three_list.append(np.absolute(avg_for_range_cal-last_avg))
            pitch_grad_three_list.append(np.absolute(avg_for_range_cal-last_avg)/(len(pitch_values[i:i+3]+3)*0.033))
            last_avg = avg_for_range_cal        
            i = i + 3
  
    F0_dev_step_mean = statistics.mean(pitch_range_three_list)
    F0_dev_step_max = max(pitch_range_three_list)
    
    relative_pitch_grad_to_mean = np.asarray(pitch_grad_three_list)/meanF0
    relative_pitch_grad_m_mean_d_sd =  (np.asarray(pitch_grad_three_list)-meanF0)/stdevF0
     
    relative_pitch_range_to_mean = np.asarray(pitch_range_three_list)/meanF0
    relative_pitch_range_m_mean_d_sd =  (np.asarray(pitch_range_three_list)-meanF0)/stdevF0

    max_rel_F0range_d_mean = max(relative_pitch_range_to_mean)
    min_rel_F0range_d_mean = min(relative_pitch_range_to_mean)
    sd_rel_F0range_d_mean = statistics.pstdev(relative_pitch_range_to_mean)
    mean_rel_F0range_d_mean = statistics.mean(relative_pitch_range_to_mean)
        
    max_rel_F0range_m_mean_d_sd = max(relative_pitch_range_m_mean_d_sd)
    min_rel_F0range_m_mean_d_sd = min(relative_pitch_range_m_mean_d_sd)
    sd_rel_F0range_m_mean_d_sd = statistics.pstdev(relative_pitch_range_m_mean_d_sd)
    mean_rel_F0range_m_mean_d_sd = statistics.mean(relative_pitch_range_m_mean_d_sd)
       
    max_rel_F0grad_d_mean = max(relative_pitch_grad_to_mean)
    min_rel_F0grad_d_mean = min(relative_pitch_grad_to_mean)
    sd_rel_F0grad_d_mean = statistics.pstdev(relative_pitch_grad_to_mean)
    mean_rel_F0grad_d_mean = statistics.mean(relative_pitch_grad_to_mean)
        
    max_rel_F0grad_m_mean_d_sd = max(relative_pitch_grad_m_mean_d_sd)
    min_rel_F0grad_m_mean_d_sd = min(relative_pitch_grad_m_mean_d_sd)
    sd_rel_F0grad_m_mean_d_sd = statistics.pstdev(relative_pitch_grad_m_mean_d_sd)
    mean_rel_F0grad_m_mean_d_sd = statistics.mean(relative_pitch_grad_m_mean_d_sd)
                       
    #print(pitch)
    #print(pitch_values)
    #print(hnr)
    #print(localJitter)
    #print(localabsoluteJitter)
    #print(localShimmer)
    #print(apq3Shimmer)

    
    return duration, meanF0, minF0, maxF0, stdevF0, hnr, localJitter, localabsoluteJitter, rapJitter, ppq5Jitter, ddpJitter, localShimmer, localdbShimmer, apq3Shimmer, aqpq5Shimmer, apq11Shimmer, ddaShimmer, max_rel_F0grad_d_mean, min_rel_F0grad_d_mean, sd_rel_F0grad_d_mean,mean_rel_F0grad_d_mean, max_rel_F0grad_m_mean_d_sd, min_rel_F0grad_m_mean_d_sd, sd_rel_F0grad_m_mean_d_sd, mean_rel_F0grad_m_mean_d_sd, max_rel_F0range_d_mean, min_rel_F0range_d_mean, sd_rel_F0range_d_mean, mean_rel_F0range_d_mean, max_rel_F0range_m_mean_d_sd, min_rel_F0range_m_mean_d_sd, sd_rel_F0range_m_mean_d_sd, mean_rel_F0range_m_mean_d_sd, F0_dev_step_mean, F0_dev_step_max  

pitch_values = np.copy(pitch_values_in)
time_step_buff_upper = 20
time_step_buff_low = 20
next_count = 0
mean_count=0
for i in range(len(pitch_values)):
    pitches_upper = np.absolute(pitch_values[i:(i+time_step_buff_upper)])
    if i >= time_step_buff_low:
        pitches_lower = np.absolute(pitch_values[(i-time_step_buff_low):(i)])
    if i< time_step_buff_low:
        pitches_lower = np.absolute(pitch_values[0:i])

    median_buffer = np.median(pitch_values)
    window_median = np.median(np.concatenate([pitches_lower, pitches_upper]))
    
    if i == 0:
        print("first element was: " + str(pitch_values[i]))
        if pitch_values[i] <= median_buffer-.4*median_buffer:
            pitch_values[i] = pitch_values[i]*2
            
        if pitch_values[i] >= median_buffer+.8*median_buffer:
            pitch_values[i] = pitch_values[i]/2
        print("and is now: " +str(pitch_values[i]))
        
    if pitch_values[i] <= window_median-0.4*window_median and pitch_values[i]*2 < 300:
        pitch_values[i] = pitch_values[i]*2
        print("Drop detected now: " +str(pitch_values[i])+ " at count: " + str(i))
    if pitch_values[i] >= window_median+0.8*window_median and pitch_values[i]/2 > 75:
        pitch_values[i] = pitch_values[i]/2
        print("Increase detected now: " +str(pitch_values[i])+ " at count: " + str(i))


 


count1=0
count2=0
dcount3=0
count4=0
count5=0
count6 = 0

for i in range(len(pitch_values)):

    if pitch_values[i] <75 and pitch_values[i] >= 74.935:
        count1 = count1+1
    if pitch_values[i] >=75 and pitch_values[i] <150:
        count2 = count2+1
    if pitch_values[i] >=150 and pitch_values[i] <200:
        count3 = count3+1
    if pitch_values[i] >=200 and pitch_values[i] <250:
        count4 = count4+1
    if pitch_values[i] >=250 and pitch_values[i] <=300:
        count5 = count5+1
    if pitch_values[i] >300:
        count6 = count6+1



print(count1)
print(count2)
print(count3)
print(count4)
print(count5)
print(count6)



median_buffer

In [3]:
def spec_centoid(wave_file, meanF0, stdevF0):
        data, sample_rate = librosa.load(wave_file)
        spectrum_Cen = librosa.feature.spectral_centroid(y = data, sr=sample_rate, S=None, n_fft=2048, hop_length=512, 
                                      freq=None, win_length=1024, window='hann', center=True, 
                                      pad_mode='constant')
    
        spec_cen_list = (spectrum_Cen[0])
        spec_cen_list_grouped = []
        #print(spec_cen_list)
        
        last_avg = statistics.mean(spec_cen_list[0:(3)])
        i = 3
        
        for count in range(math.ceil(len(spec_cen_list)/3)-1):
                avg_for_range = statistics.mean(np.absolute(spec_cen_list[i:(i+3)]))
                spec_cen_list_grouped.append(np.absolute(avg_for_range-last_avg))
                last_avg = avg_for_range        
                i = i + 3

        
        
        relative_spec_cen_to_mean = np.asarray(spec_cen_list_grouped)/meanF0
        relative_spec_cen_m_mean_d_sd =  (np.asarray(spec_cen_list_grouped)-meanF0)/stdevF0
        
        
        
        
        spec_cen_relF0_mean_mean =statistics.mean(relative_spec_cen_to_mean)
        spec_cen_relF0_mean_sd = statistics.pstdev(relative_spec_cen_to_mean)
        spec_cen_relF0_mean_median = statistics.median(relative_spec_cen_to_mean)
        
        spec_cen_m_mean_d_sd_mean =statistics.mean(relative_spec_cen_m_mean_d_sd)
        spec_cen_m_mean_d_sd_sd = statistics.pstdev(relative_spec_cen_m_mean_d_sd)
        spec_cen_m_mean_d_sd_median = statistics.median(relative_spec_cen_m_mean_d_sd)
        
        
        
        
        
        
        return spec_cen_relF0_mean_mean, spec_cen_relF0_mean_sd, spec_cen_relF0_mean_median, spec_cen_m_mean_d_sd_mean, spec_cen_m_mean_d_sd_sd, spec_cen_m_mean_d_sd_median
        

In [4]:
# This function measures formants using Formant Position formula
def measureFormants(sound, wave_file, f0min,f0max):
    sound = parselmouth.Sound(sound) # read the sound
    pitch = call(sound, "To Pitch (cc)", 0, f0min, 15, 'no', 0.03, 0.45, 0.01, 0.35, 0.14, f0max)
    pointProcess = call(sound, "To PointProcess (periodic, cc)", f0min, f0max)
    
    formants = call(sound, "To Formant (burg)", 0.0025, 5, 5000, 0.025, 50)
    numPoints = call(pointProcess, "Get number of points")

    f1_list = []
    f2_list = []
    f3_list = []
    f4_list = []
    
    # Measure formants only at glottal pulses
    for point in range(0, numPoints):
        point += 1
        t = call(pointProcess, "Get time from index", point)
        f1 = call(formants, "Get value at time", 1, t, 'Hertz', 'Linear')
        f2 = call(formants, "Get value at time", 2, t, 'Hertz', 'Linear')
        f3 = call(formants, "Get value at time", 3, t, 'Hertz', 'Linear')
        f4 = call(formants, "Get value at time", 4, t, 'Hertz', 'Linear')
        f1_list.append(f1)
        f2_list.append(f2)
        f3_list.append(f3)
        f4_list.append(f4)
    
    f1_list = [f1 for f1 in f1_list if str(f1) != 'nan']
    f2_list = [f2 for f2 in f2_list if str(f2) != 'nan']
    f3_list = [f3 for f3 in f3_list if str(f3) != 'nan']
    f4_list = [f4 for f4 in f4_list if str(f4) != 'nan']
    
    # calculate mean formants across pulses
    f1_mean = statistics.mean(f1_list)
    f2_mean = statistics.mean(f2_list)
    f3_mean = statistics.mean(f3_list)
    f4_mean = statistics.mean(f4_list)
    
    # calculate median formants across pulses, this is what is used in all subsequent calcualtions
    # you can use mean if you want, just edit the code in the boxes below to replace median with mean
    f1_median = statistics.median(f1_list)
    #print(f1_list)
    f2_median = statistics.median(f2_list)
    f3_median = statistics.median(f3_list)
    f4_median = statistics.median(f4_list)
    f1_sd = statistics.pstdev(f1_list)
    f2_sd = statistics.pstdev(f2_list)
    f3_sd = statistics.pstdev(f3_list)
    f4_sd = statistics.pstdev(f4_list)

    
    return f1_mean, f2_mean, f3_mean, f4_mean, f1_median, f2_median, f3_median, f4_median, f1_sd, f2_sd, f3_sd, f4_sd


In [5]:
def runPCA(df):
    # z-score the Jitter and Shimmer measurements
    measures = ['localJitter', 'localabsoluteJitter', 'rapJitter', 'ppq5Jitter', 'ddpJitter',
                'localShimmer', 'localdbShimmer', 'apq3Shimmer', 'apq5Shimmer', 'apq11Shimmer', 'ddaShimmer']
    x = df.loc[:, measures].values
    x = StandardScaler().fit_transform(x)
    # PCA
    pca = PCA(n_components=2)
    principalComponents = pca.fit_transform(x)
    principalDf = pd.DataFrame(data = principalComponents, columns = ['JitterPCA', 'ShimmerPCA'])
    principalDf
    return principalDf

In [6]:
def demoTableCreate():
    demographic_files = ["train_split_Depression_AVEC2017.csv", "dev_split_Depression_AVEC2017.csv", "full_test_split.csv"]
    dataDemographic = pd.DataFrame()
    for k in range(len(demographic_files)):

        dataDemographic_temp = pd.read_csv("C:/Users/admul/Desktop/School/Thesis/DAICWOZ Data/" + demographic_files[k] , usecols=["Participant_ID","PHQ8_Binary","PHQ8_Score","Gender"], skip_blank_lines=True, na_filter=True).dropna()

        if k == 0:
            dataDemographic_temp["data_set"] = "0"
        if k == 1:
            dataDemographic_temp["data_set"] = "1"
        if k == 2:
            dataDemographic_temp["data_set"] = "2"
    
        dataDemographic = dataDemographic.append(dataDemographic_temp)
        
    del dataDemographic_temp
    return dataDemographic


In [7]:
def demographicInfo(patient_id):
    PHQ8_Binary =  dataDemographic['PHQ8_Binary'].loc[dataDemographic['Participant_ID'] == float(patient_id)]
    PHQ8_Binary1 = PHQ8_Binary.iat[0]
    PHQ8_Score = dataDemographic['PHQ8_Score'].loc[dataDemographic['Participant_ID'] == float(patient_id)]
    PHQ8_Score1 = PHQ8_Score.iat[0]
    Gender = dataDemographic['Gender'].loc[dataDemographic['Participant_ID'] == float(patient_id)]
    Gender1 = Gender.iat[0]
    Data_set = dataDemographic['data_set'].loc[dataDemographic['Participant_ID'] == float(patient_id)]
    Data_set1 = Data_set.iat[0]
    
    return Data_set1, PHQ8_Binary1, PHQ8_Score1, Gender1
    

In [8]:
def COVAREP(patient_id):
    
    dataCOVAREP= pd.read_csv( "C:/insert directory" + patient_id +".csv",
                             sep=',',header=None)
    dataCOVAREP.columns =['F0', 'VoiceMarker', 'NAQ', 'QoQ', 'H1H2', 'PSP', 'MDQ', 'peakSlope', 'RdShape','Rd_Conf',
                         'Creak','MCEP_0','MCEP_1','MCEP_2','MCEP_3','MCEP_4','MCEP_5','MCEP_6','MCEP_7','MCEP_8',
                         'MCEP_9','MCEP_10','MCEP_11','MCEP_12','MCEP_13','MCEP_14','MCEP_15','MCEP_16','MCEP_17','MCEP_18',
                         'MCEP_19','MCEP_20','MCEP_21','MCEP_22','MCEP_23','MCEP_24','HMPDM_0','HMPDM_1','HMPDM_2','HMPDM_3',
                         'HMPDM_4','HMPDM_5','HMPDM_6','HMPDM_7','HMPDM_8','HMPDM_9','HMPDM_10','HMPDM_11','HMPDM_12','HMPDM_13',
                         'HMPDM_14','HMPDM_15','HMPDM_16','HMPDM_17','HMPDM_18','HMPDM_19','HMPDM_20','HMPDM_21','HMPDM_22','HMPDM_23',
                         'HMPDM_24','HMPDD_0','HMPDD_1','HMPDD_2','HMPDD_3','HMPDD_4','HMPDD_5','HMPDD_6','HMPDD_7','HMPDD_8',
                          'HMPDD_9','HMPDD_10','HMPDD_11', 'HMPDD_12']


    dataCOVAREP_Voiced = dataCOVAREP.loc[dataCOVAREP['VoiceMarker'] == 1]
    resultsC = pd.DataFrame(index=np.arange(1), columns=np.arange(1))

    resultsC['patient_id'] = float(patient_id)
    resultsC['F0_C_mean'] = statistics.mean(dataCOVAREP_Voiced['F0'])
    resultsC['NAQ_mean'] = statistics.mean(dataCOVAREP_Voiced['NAQ'])
    resultsC['QoQ_mean'] = statistics.mean(dataCOVAREP_Voiced['QoQ'])
    resultsC['H1H2_mean'] = statistics.mean(dataCOVAREP_Voiced['H1H2'])
    resultsC['PSP_mean'] = statistics.mean(dataCOVAREP_Voiced['PSP'])
    resultsC['MDQ_mean'] = statistics.mean(dataCOVAREP_Voiced['MDQ'])
    resultsC['peakSlope_mean'] = statistics.mean(dataCOVAREP_Voiced['peakSlope'])
    resultsC['RdShape_mean'] = statistics.mean(dataCOVAREP_Voiced['RdShape'])
    resultsC['Rd_Conf_mean'] = statistics.mean(dataCOVAREP_Voiced['Rd_Conf'])
    resultsC['Creak_mean'] = statistics.mean(dataCOVAREP_Voiced['Creak'])
    resultsC['MCEP_0_mean'] = statistics.mean(dataCOVAREP_Voiced['MCEP_0'])
    resultsC['MCEP_1_mean'] = statistics.mean(dataCOVAREP_Voiced['MCEP_1'])
    resultsC['MCEP_2_mean'] = statistics.mean(dataCOVAREP_Voiced['MCEP_2'])
    resultsC['MCEP_3_mean'] = statistics.mean(dataCOVAREP_Voiced['MCEP_3'])
    resultsC['MCEP_4_mean'] = statistics.mean(dataCOVAREP_Voiced['MCEP_4'])
    resultsC['MCEP_5_mean'] = statistics.mean(dataCOVAREP_Voiced['MCEP_5'])
    resultsC['MCEP_6_mean'] = statistics.mean(dataCOVAREP_Voiced['MCEP_6'])
    resultsC['MCEP_7_mean'] = statistics.mean(dataCOVAREP_Voiced['MCEP_7'])
    resultsC['MCEP_8_mean'] = statistics.mean(dataCOVAREP_Voiced['MCEP_8'])
    resultsC['MCEP_9_mean'] = statistics.mean(dataCOVAREP_Voiced['MCEP_9'])
    resultsC['MCEP_10_mean'] = statistics.mean(dataCOVAREP_Voiced['MCEP_10'])
    resultsC['MCEP_11_mean'] = statistics.mean(dataCOVAREP_Voiced['MCEP_11'])
    resultsC['MCEP_12_mean'] = statistics.mean(dataCOVAREP_Voiced['MCEP_12'])
    resultsC['MCEP_13_mean'] = statistics.mean(dataCOVAREP_Voiced['MCEP_13'])
    resultsC['MCEP_14_mean'] = statistics.mean(dataCOVAREP_Voiced['MCEP_14'])
    resultsC['MCEP_15_mean'] = statistics.mean(dataCOVAREP_Voiced['MCEP_15'])
    resultsC['MCEP_16_mean'] = statistics.mean(dataCOVAREP_Voiced['MCEP_16'])
    resultsC['MCEP_17_mean'] = statistics.mean(dataCOVAREP_Voiced['MCEP_17'])
    resultsC['MCEP_18_mean'] = statistics.mean(dataCOVAREP_Voiced['MCEP_18'])
    resultsC['MCEP_19_mean'] = statistics.mean(dataCOVAREP_Voiced['MCEP_19'])
    resultsC['MCEP_20_mean'] = statistics.mean(dataCOVAREP_Voiced['MCEP_20'])
    resultsC['MCEP_21_mean'] = statistics.mean(dataCOVAREP_Voiced['MCEP_21'])
    resultsC['MCEP_22_mean'] = statistics.mean(dataCOVAREP_Voiced['MCEP_22'])
    resultsC['MCEP_23_mean'] = statistics.mean(dataCOVAREP_Voiced['MCEP_23'])
    resultsC['MCEP_24_mean'] = statistics.mean(dataCOVAREP_Voiced['MCEP_24'])
    resultsC['HMPDM_0_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDM_0'])
    resultsC['HMPDM_1_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDM_1'])
    resultsC['HMPDM_2_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDM_2'])
    resultsC['HMPDM_3_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDM_3'])
    resultsC['HMPDM_4_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDM_4'])
    resultsC['HMPDM_5_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDM_5'])
    resultsC['HMPDM_6_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDM_6'])
    resultsC['HMPDM_7_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDM_7'])
    resultsC['HMPDM_8_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDM_8'])
    resultsC['HMPDM_9_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDM_9'])
    resultsC['HMPDM_10_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDM_10'])
    resultsC['HMPDM_11_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDM_11'])
    resultsC['HMPDM_12_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDM_12'])
    resultsC['HMPDM_13_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDM_13'])
    resultsC['HMPDM_14_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDM_14'])
    resultsC['HMPDM_15_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDM_15'])
    resultsC['HMPDM_16_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDM_16'])
    resultsC['HMPDM_17_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDM_17'])
    resultsC['HMPDM_18_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDM_18'])
    resultsC['HMPDM_19_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDM_19'])
    resultsC['HMPDM_20_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDM_20'])
    resultsC['HMPDM_21_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDM_21'])
    resultsC['HMPDM_22_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDM_22'])
    resultsC['HMPDM_23_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDM_23'])
    resultsC['HMPDM_24_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDM_24'])
    resultsC['HMPDD_0_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDD_0'])
    resultsC['HMPDD_1_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDD_1'])
    resultsC['HMPDD_2_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDD_2'])
    resultsC['HMPDD_3_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDD_3'])
    resultsC['HMPDD_4_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDD_4'])
    resultsC['HMPDD_5_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDD_5'])
    resultsC['HMPDD_6_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDD_6'])
    resultsC['HMPDD_7_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDD_7'])
    resultsC['HMPDD_8_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDD_8'])
    resultsC['HMPDD_9_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDD_9'])
    resultsC['HMPDD_10_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDD_10'])
    resultsC['HMPDD_11_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDD_11'])
    resultsC['HMPDD_12_mean'] = statistics.mean(dataCOVAREP_Voiced['HMPDD_12'])

    resultsC['F0_C_median'] = statistics.median(dataCOVAREP_Voiced['F0'])
    resultsC['NAQ_median'] = statistics.median(dataCOVAREP_Voiced['NAQ'])
    resultsC['QoQ_median'] = statistics.median(dataCOVAREP_Voiced['QoQ'])
    resultsC['H1H2_median'] = statistics.median(dataCOVAREP_Voiced['H1H2'])
    resultsC['PSP_median'] = statistics.median(dataCOVAREP_Voiced['PSP'])
    resultsC['MDQ_median'] = statistics.median(dataCOVAREP_Voiced['MDQ'])
    resultsC['peakSlope_median'] = statistics.median(dataCOVAREP_Voiced['peakSlope'])
    resultsC['RdShape_median'] = statistics.median(dataCOVAREP_Voiced['RdShape'])
    resultsC['Rd_Conf_median'] = statistics.median(dataCOVAREP_Voiced['Rd_Conf'])
    resultsC['Creak_median'] = statistics.median(dataCOVAREP_Voiced['Creak'])
    resultsC['MCEP_0_median'] = statistics.median(dataCOVAREP_Voiced['MCEP_0'])
    resultsC['MCEP_1_median'] = statistics.median(dataCOVAREP_Voiced['MCEP_1'])
    resultsC['MCEP_2_median'] = statistics.median(dataCOVAREP_Voiced['MCEP_2'])
    resultsC['MCEP_3_median'] = statistics.median(dataCOVAREP_Voiced['MCEP_3'])
    resultsC['MCEP_4_median'] = statistics.median(dataCOVAREP_Voiced['MCEP_4'])
    resultsC['MCEP_5_median'] = statistics.median(dataCOVAREP_Voiced['MCEP_5'])
    resultsC['MCEP_6_median'] = statistics.median(dataCOVAREP_Voiced['MCEP_6'])
    resultsC['MCEP_7_median'] = statistics.median(dataCOVAREP_Voiced['MCEP_7'])
    resultsC['MCEP_8_median'] = statistics.median(dataCOVAREP_Voiced['MCEP_8'])
    resultsC['MCEP_9_median'] = statistics.median(dataCOVAREP_Voiced['MCEP_9'])
    resultsC['MCEP_10_median'] = statistics.median(dataCOVAREP_Voiced['MCEP_10'])
    resultsC['MCEP_11_median'] = statistics.median(dataCOVAREP_Voiced['MCEP_11'])
    resultsC['MCEP_12_median'] = statistics.median(dataCOVAREP_Voiced['MCEP_12'])
    resultsC['MCEP_13_median'] = statistics.median(dataCOVAREP_Voiced['MCEP_13'])
    resultsC['MCEP_14_median'] = statistics.median(dataCOVAREP_Voiced['MCEP_14'])
    resultsC['MCEP_15_median'] = statistics.median(dataCOVAREP_Voiced['MCEP_15'])
    resultsC['MCEP_16_median'] = statistics.median(dataCOVAREP_Voiced['MCEP_16'])
    resultsC['MCEP_17_median'] = statistics.median(dataCOVAREP_Voiced['MCEP_17'])
    resultsC['MCEP_18_median'] = statistics.median(dataCOVAREP_Voiced['MCEP_18'])
    resultsC['MCEP_19_median'] = statistics.median(dataCOVAREP_Voiced['MCEP_19'])
    resultsC['MCEP_20_median'] = statistics.median(dataCOVAREP_Voiced['MCEP_20'])
    resultsC['MCEP_21_median'] = statistics.median(dataCOVAREP_Voiced['MCEP_21'])
    resultsC['MCEP_22_median'] = statistics.median(dataCOVAREP_Voiced['MCEP_22'])
    resultsC['MCEP_23_median'] = statistics.median(dataCOVAREP_Voiced['MCEP_23'])
    resultsC['MCEP_24_median'] = statistics.median(dataCOVAREP_Voiced['MCEP_24'])
    resultsC['HMPDM_0_median'] = statistics.median(dataCOVAREP_Voiced['HMPDM_0'])
    resultsC['HMPDM_1_median'] = statistics.median(dataCOVAREP_Voiced['HMPDM_1'])
    resultsC['HMPDM_2_median'] = statistics.median(dataCOVAREP_Voiced['HMPDM_2'])
    resultsC['HMPDM_3_median'] = statistics.median(dataCOVAREP_Voiced['HMPDM_3'])
    resultsC['HMPDM_4_median'] = statistics.median(dataCOVAREP_Voiced['HMPDM_4'])
    resultsC['HMPDM_5_median'] = statistics.median(dataCOVAREP_Voiced['HMPDM_5'])
    resultsC['HMPDM_6_median'] = statistics.median(dataCOVAREP_Voiced['HMPDM_6'])
    resultsC['HMPDM_7_median'] = statistics.median(dataCOVAREP_Voiced['HMPDM_7'])
    resultsC['HMPDM_8_median'] = statistics.median(dataCOVAREP_Voiced['HMPDM_8'])
    resultsC['HMPDM_9_median'] = statistics.median(dataCOVAREP_Voiced['HMPDM_9'])
    resultsC['HMPDM_10_median'] = statistics.median(dataCOVAREP_Voiced['HMPDM_10'])
    resultsC['HMPDM_11_median'] = statistics.median(dataCOVAREP_Voiced['HMPDM_11'])
    resultsC['HMPDM_12_median'] = statistics.median(dataCOVAREP_Voiced['HMPDM_12'])
    resultsC['HMPDM_13_median'] = statistics.median(dataCOVAREP_Voiced['HMPDM_13'])
    resultsC['HMPDM_14_median'] = statistics.median(dataCOVAREP_Voiced['HMPDM_14'])
    resultsC['HMPDM_15_median'] = statistics.median(dataCOVAREP_Voiced['HMPDM_15'])
    resultsC['HMPDM_16_median'] = statistics.median(dataCOVAREP_Voiced['HMPDM_16'])
    resultsC['HMPDM_17_median'] = statistics.median(dataCOVAREP_Voiced['HMPDM_17'])
    resultsC['HMPDM_18_median'] = statistics.median(dataCOVAREP_Voiced['HMPDM_18'])
    resultsC['HMPDM_19_median'] = statistics.median(dataCOVAREP_Voiced['HMPDM_19'])
    resultsC['HMPDM_20_median'] = statistics.median(dataCOVAREP_Voiced['HMPDM_20'])
    resultsC['HMPDM_21_median'] = statistics.median(dataCOVAREP_Voiced['HMPDM_21'])
    resultsC['HMPDM_22_median'] = statistics.median(dataCOVAREP_Voiced['HMPDM_22'])
    resultsC['HMPDM_23_median'] = statistics.median(dataCOVAREP_Voiced['HMPDM_23'])
    resultsC['HMPDM_24_median'] = statistics.median(dataCOVAREP_Voiced['HMPDM_24'])
    resultsC['HMPDD_0_median'] = statistics.median(dataCOVAREP_Voiced['HMPDD_0'])
    resultsC['HMPDD_1_median'] = statistics.median(dataCOVAREP_Voiced['HMPDD_1'])
    resultsC['HMPDD_2_median'] = statistics.median(dataCOVAREP_Voiced['HMPDD_2'])
    resultsC['HMPDD_3_median'] = statistics.median(dataCOVAREP_Voiced['HMPDD_3'])
    resultsC['HMPDD_4_median'] = statistics.median(dataCOVAREP_Voiced['HMPDD_4'])
    resultsC['HMPDD_5_median'] = statistics.median(dataCOVAREP_Voiced['HMPDD_5'])
    resultsC['HMPDD_6_median'] = statistics.median(dataCOVAREP_Voiced['HMPDD_6'])
    resultsC['HMPDD_7_median'] = statistics.median(dataCOVAREP_Voiced['HMPDD_7'])
    resultsC['HMPDD_8_median'] = statistics.median(dataCOVAREP_Voiced['HMPDD_8'])
    resultsC['HMPDD_9_median'] = statistics.median(dataCOVAREP_Voiced['HMPDD_9'])
    resultsC['HMPDD_10_median'] = statistics.median(dataCOVAREP_Voiced['HMPDD_10'])
    resultsC['HMPDD_11_median'] = statistics.median(dataCOVAREP_Voiced['HMPDD_11'])
    resultsC['HMPDD_12_median'] = statistics.median(dataCOVAREP_Voiced['HMPDD_12'])

    resultsC['F0_C_sd'] = statistics.pstdev(dataCOVAREP_Voiced['F0'])
    resultsC['NAQ_sd'] = statistics.pstdev(dataCOVAREP_Voiced['NAQ'])
    resultsC['QoQ_sd'] = statistics.pstdev(dataCOVAREP_Voiced['QoQ'])
    resultsC['H1H2_sd'] = statistics.pstdev(dataCOVAREP_Voiced['H1H2'])
    resultsC['PSP_sd'] = statistics.pstdev(dataCOVAREP_Voiced['PSP'])
    resultsC['MDQ_sd'] = statistics.pstdev(dataCOVAREP_Voiced['MDQ'])
    resultsC['peakSlope_sd'] = statistics.pstdev(dataCOVAREP_Voiced['peakSlope'])
    resultsC['RdShape_sd'] = statistics.pstdev(dataCOVAREP_Voiced['RdShape'])
    resultsC['Rd_Conf_sd'] = statistics.pstdev(dataCOVAREP_Voiced['Rd_Conf'])
    resultsC['Creak_sd'] = statistics.pstdev(dataCOVAREP_Voiced['Creak'])
    resultsC['MCEP_0_sd'] = statistics.pstdev(dataCOVAREP_Voiced['MCEP_0'])
    resultsC['MCEP_1_sd'] = statistics.pstdev(dataCOVAREP_Voiced['MCEP_1'])
    resultsC['MCEP_2_sd'] = statistics.pstdev(dataCOVAREP_Voiced['MCEP_2'])
    resultsC['MCEP_3_sd'] = statistics.pstdev(dataCOVAREP_Voiced['MCEP_3'])
    resultsC['MCEP_4_sd'] = statistics.pstdev(dataCOVAREP_Voiced['MCEP_4'])
    resultsC['MCEP_5_sd'] = statistics.pstdev(dataCOVAREP_Voiced['MCEP_5'])
    resultsC['MCEP_6_sd'] = statistics.pstdev(dataCOVAREP_Voiced['MCEP_6'])
    resultsC['MCEP_7_sd'] = statistics.pstdev(dataCOVAREP_Voiced['MCEP_7'])
    resultsC['MCEP_8_sd'] = statistics.pstdev(dataCOVAREP_Voiced['MCEP_8'])
    resultsC['MCEP_9_sd'] = statistics.pstdev(dataCOVAREP_Voiced['MCEP_9'])
    resultsC['MCEP_10_sd'] = statistics.pstdev(dataCOVAREP_Voiced['MCEP_10'])
    resultsC['MCEP_11_sd'] = statistics.pstdev(dataCOVAREP_Voiced['MCEP_11'])
    resultsC['MCEP_12_sd'] = statistics.pstdev(dataCOVAREP_Voiced['MCEP_12'])
    resultsC['MCEP_13_sd'] = statistics.pstdev(dataCOVAREP_Voiced['MCEP_13'])
    resultsC['MCEP_14_sd'] = statistics.pstdev(dataCOVAREP_Voiced['MCEP_14'])
    resultsC['MCEP_15_sd'] = statistics.pstdev(dataCOVAREP_Voiced['MCEP_15'])
    resultsC['MCEP_16_sd'] = statistics.pstdev(dataCOVAREP_Voiced['MCEP_16'])
    resultsC['MCEP_17_sd'] = statistics.pstdev(dataCOVAREP_Voiced['MCEP_17'])
    resultsC['MCEP_18_sd'] = statistics.pstdev(dataCOVAREP_Voiced['MCEP_18'])
    resultsC['MCEP_19_sd'] = statistics.pstdev(dataCOVAREP_Voiced['MCEP_19'])
    resultsC['MCEP_20_sd'] = statistics.pstdev(dataCOVAREP_Voiced['MCEP_20'])
    resultsC['MCEP_21_sd'] = statistics.pstdev(dataCOVAREP_Voiced['MCEP_21'])
    resultsC['MCEP_22_sd'] = statistics.pstdev(dataCOVAREP_Voiced['MCEP_22'])
    resultsC['MCEP_23_sd'] = statistics.pstdev(dataCOVAREP_Voiced['MCEP_23'])
    resultsC['MCEP_24_sd'] = statistics.pstdev(dataCOVAREP_Voiced['MCEP_24'])
    resultsC['HMPDM_0_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDM_0'])
    resultsC['HMPDM_1_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDM_1'])
    resultsC['HMPDM_2_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDM_2'])
    resultsC['HMPDM_3_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDM_3'])
    resultsC['HMPDM_4_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDM_4'])
    resultsC['HMPDM_5_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDM_5'])
    resultsC['HMPDM_6_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDM_6'])
    resultsC['HMPDM_7_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDM_7'])
    resultsC['HMPDM_8_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDM_8'])
    resultsC['HMPDM_9_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDM_9'])
    resultsC['HMPDM_10_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDM_10'])
    resultsC['HMPDM_11_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDM_11'])
    resultsC['HMPDM_12_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDM_12'])
    resultsC['HMPDM_13_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDM_13'])
    resultsC['HMPDM_14_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDM_14'])
    resultsC['HMPDM_15_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDM_15'])
    resultsC['HMPDM_16_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDM_16'])
    resultsC['HMPDM_17_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDM_17'])
    resultsC['HMPDM_18_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDM_18'])
    resultsC['HMPDM_19_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDM_19'])
    resultsC['HMPDM_20_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDM_20'])
    resultsC['HMPDM_21_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDM_21'])
    resultsC['HMPDM_22_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDM_22'])
    resultsC['HMPDM_23_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDM_23'])
    resultsC['HMPDM_24_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDM_24'])
    resultsC['HMPDD_0_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDD_0'])
    resultsC['HMPDD_1_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDD_1'])
    resultsC['HMPDD_2_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDD_2'])
    resultsC['HMPDD_3_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDD_3'])
    resultsC['HMPDD_4_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDD_4'])
    resultsC['HMPDD_5_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDD_5'])
    resultsC['HMPDD_6_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDD_6'])
    resultsC['HMPDD_7_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDD_7'])
    resultsC['HMPDD_8_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDD_8'])
    resultsC['HMPDD_9_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDD_9'])
    resultsC['HMPDD_10_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDD_10'])
    resultsC['HMPDD_11_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDD_11'])
    resultsC['HMPDD_12_sd'] = statistics.pstdev(dataCOVAREP_Voiced['HMPDD_12'])

    resultsC = resultsC.drop(resultsC.columns[[0]],axis = 1)

    return resultsC


In [9]:
def band_power_spec (wavefile):
    mean_power_per_band_per_channel = []
    freq_bands = {"band1" : [0,500],
              "band2" : [500,1000],
              "band3" : [1000, 1500],
              "band4" : [1500, 2000]
               }
    data, sample_rate = librosa.load(wave_file)

    
    freq, spectrum =scipy.signal.welch(data, fs=sample_rate, window='hamming', nperseg=100, 
                   noverlap=0.0, nfft=1024, detrend='constant', 
                   return_onesided=True, scaling='density', axis=- 1, 
                   average='mean')
    frequencies_indexes = [np.logical_and(freq >= band[0], freq <= band[1]) for band in freq_bands.values()]
    mean_power_per_band_per_channel += [np.mean(spectrum[idx]) for idx in frequencies_indexes]
    
    band_1_perct_spec_energy = mean_power_per_band_per_channel[0] / sum(mean_power_per_band_per_channel)
    band_2_perct_spec_energy = mean_power_per_band_per_channel[1] / sum(mean_power_per_band_per_channel)
    band_3_perct_spec_energy = mean_power_per_band_per_channel[2] / sum(mean_power_per_band_per_channel)
    band_4_perct_spec_energy = mean_power_per_band_per_channel[3] / sum(mean_power_per_band_per_channel)
    
    return band_1_perct_spec_energy, band_2_perct_spec_energy, band_3_perct_spec_energy, band_4_perct_spec_energy


#### CODE 

In [10]:
# create lists to put the results
file_list = []
duration_list = []
mean_F0_list = []
sd_F0_list = []
hnr_list = []
localJitter_list = []
localabsoluteJitter_list = []
rapJitter_list = []
ppq5Jitter_list = []
ddpJitter_list = []
localShimmer_list = []
localdbShimmer_list = []
apq3Shimmer_list = []
aqpq5Shimmer_list = []
apq11Shimmer_list = []
ddaShimmer_list = []
f1_mean_list = []
f2_mean_list = []
f3_mean_list = []
f4_mean_list = []
f1_median_list = []
f2_median_list = []
f3_median_list = []
f4_median_list = []
f1_sd_list = []
f2_sd_list = []
f3_sd_list = []
f4_sd_list = []
patient_id_list = []

spec_cen_list = []
spec_cen_relF0_mean_mean_list = []
spec_cen_relF0_mean_sd_list = []
spec_cen_relF0_mean_median_list = []
spec_cen_m_mean_d_sd_mean_list = []
spec_cen_m_mean_d_sd_sd_list =[]
spec_cen_m_mean_d_sd_median_list = []

PHQ8_Binary_list = []
PHQ8_Score_list = []
Gender_list = []
data_set_list = []
patient_id_list = []
min_F0_list = []
max_F0_list = []

max_rel_F0range_d_mean_list = []
min_rel_F0range_d_mean_list = []
sd_rel_F0range_d_mean_list = []
mean_rel_F0range_d_mean_list = []

max_rel_F0range_m_mean_d_sd_list = []
min_rel_F0range_m_mean_d_sd_list = []
sd_rel_F0range_m_mean_d_sd_list = []
mean_rel_F0range_m_mean_d_sd_list = []

max_rel_F0grad_d_mean_list = []
min_rel_F0grad_d_mean_list = []
sd_rel_F0grad_d_mean_list = []
mean_rel_F0grad_d_mean_list = []

max_rel_F0grad_m_mean_d_sd_list = []
min_rel_F0grad_m_mean_d_sd_list = []
sd_rel_F0grad_m_mean_d_sd_list = []
mean_rel_F0grad_m_mean_d_sd_list = []

F0_dev_step_mean_list = []
F0_dev_step_max_list = []


band1_spec_energy_perct_list = []
band2_spec_energy_perct_list = []
band3_spec_energy_perct_list = []
band4_spec_energy_perct_list = []

In [12]:
# Go through all the wave files in the folder and measure all the acoustics
count = 0

for wave_file in glob.glob(fileGlob):
    print("Started File " + str(count))
    sound = parselmouth.Sound(wave_file)
    patient_id = wave_file[-7:-4]
    
    dataDemographic = demoTableCreate()
    (Data_set, PHQ8_Binary, PHQ8_Score, Gender) = demographicInfo(patient_id)
    
    (duration, meanF0, minF0, maxF0, stdevF0, hnr, localJitter, localabsoluteJitter, rapJitter, ppq5Jitter, 
     ddpJitter, localShimmer, localdbShimmer, apq3Shimmer, aqpq5Shimmer, apq11Shimmer, ddaShimmer, max_rel_F0grad_d_mean, 
     min_rel_F0grad_d_mean, sd_rel_F0grad_d_mean, mean_rel_F0grad_d_mean, max_rel_F0grad_m_mean_d_sd, 
     min_rel_F0grad_m_mean_d_sd, sd_rel_F0grad_m_mean_d_sd, mean_rel_F0grad_m_mean_d_sd,
     max_rel_F0range_d_mean, min_rel_F0range_d_mean, sd_rel_F0range_d_mean, mean_rel_F0range_d_mean, 
     max_rel_F0range_m_mean_d_sd, min_rel_F0range_m_mean_d_sd, sd_rel_F0range_m_mean_d_sd, mean_rel_F0range_m_mean_d_sd,
     F0_dev_step_mean, F0_dev_step_max) = measurePitch(sound, 75, 300, "Hertz")
    
    
    (f1_mean, f2_mean, f3_mean, f4_mean, f1_median, f2_median, f3_median, f4_median, f1_sd, f2_sd, f3_sd, f4_sd) = measureFormants(
        sound, wave_file, 75, 300)
    
    (spec_cen_relF0_mean_mean, spec_cen_relF0_mean_sd, spec_cen_relF0_mean_median,
    spec_cen_m_mean_d_sd_mean, spec_cen_m_mean_d_sd_sd, spec_cen_m_mean_d_sd_median)= spec_centoid(wave_file, meanF0, stdevF0)

    
    
    (band_1_perct_spec_energy, band_2_perct_spec_energy, band_3_perct_spec_energy, band_4_perct_spec_energy) = band_power_spec(wave_file)
    
    #data pitch based
    duration_list.append(duration) # make duration list
    mean_F0_list.append(meanF0) # make a mean F0 list
    min_F0_list.append(minF0)
    max_F0_list.append(maxF0)
    sd_F0_list.append(stdevF0) # make a sd F0 list
    hnr_list.append(hnr) #add HNR data
    
    #data relatives of F0 abs gradient 
    max_rel_F0grad_d_mean_list.append(max_rel_F0grad_d_mean)
    min_rel_F0grad_d_mean_list.append(min_rel_F0grad_d_mean)
    sd_rel_F0grad_d_mean_list.append(sd_rel_F0grad_d_mean)
    mean_rel_F0grad_d_mean_list.append(mean_rel_F0grad_d_mean)

    max_rel_F0grad_m_mean_d_sd_list.append(max_rel_F0grad_m_mean_d_sd)
    min_rel_F0grad_m_mean_d_sd_list.append(min_rel_F0grad_m_mean_d_sd)
    sd_rel_F0grad_m_mean_d_sd_list.append(sd_rel_F0grad_m_mean_d_sd)
    mean_rel_F0grad_m_mean_d_sd_list.append(mean_rel_F0grad_m_mean_d_sd)

    #data relatives of F0 abs range 
    max_rel_F0range_d_mean_list.append(max_rel_F0range_d_mean)
    min_rel_F0range_d_mean_list.append(min_rel_F0range_d_mean)
    sd_rel_F0range_d_mean_list.append(sd_rel_F0range_d_mean)
    mean_rel_F0range_d_mean_list.append(mean_rel_F0range_d_mean)

    max_rel_F0range_m_mean_d_sd_list.append(max_rel_F0range_m_mean_d_sd)
    min_rel_F0range_m_mean_d_sd_list.append(min_rel_F0range_m_mean_d_sd)
    sd_rel_F0range_m_mean_d_sd_list.append(sd_rel_F0range_m_mean_d_sd)
    mean_rel_F0range_m_mean_d_sd_list.append(mean_rel_F0range_m_mean_d_sd)
    
    F0_dev_step_mean_list.append(F0_dev_step_mean) 
    F0_dev_step_max_list.append(F0_dev_step_max)
    
    # add raw jitter and shimmer measures
    localJitter_list.append(localJitter)
    localabsoluteJitter_list.append(localabsoluteJitter)
    rapJitter_list.append(rapJitter)
    ppq5Jitter_list.append(ppq5Jitter)
    ddpJitter_list.append(ddpJitter)
    localShimmer_list.append(localShimmer)
    localdbShimmer_list.append(localdbShimmer)
    apq3Shimmer_list.append(apq3Shimmer)
    aqpq5Shimmer_list.append(aqpq5Shimmer)
    apq11Shimmer_list.append(apq11Shimmer)
    ddaShimmer_list.append(ddaShimmer)
    
    # add the formant data
    f1_mean_list.append(f1_mean)
    f2_mean_list.append(f2_mean)
    f3_mean_list.append(f3_mean)
    f4_mean_list.append(f4_mean)
    f1_median_list.append(f1_median)
    f2_median_list.append(f2_median)
    f3_median_list.append(f3_median)
    f4_median_list.append(f4_median)
    f1_sd_list.append(f1_sd)
    f2_sd_list.append(f2_sd)
    f3_sd_list.append(f3_sd)
    f4_sd_list.append(f4_sd)
    
    # add spec_cen data
    spec_cen_relF0_mean_mean_list.append(spec_cen_relF0_mean_mean)
    spec_cen_relF0_mean_sd_list.append(spec_cen_relF0_mean_sd)
    spec_cen_relF0_mean_median_list.append(spec_cen_relF0_mean_median)
    spec_cen_m_mean_d_sd_mean_list.append(spec_cen_m_mean_d_sd_mean)
    spec_cen_m_mean_d_sd_sd_list.append(spec_cen_m_mean_d_sd_sd)
    spec_cen_m_mean_d_sd_median_list.append(spec_cen_m_mean_d_sd_median)    

    
    
    #bandwidth energy percentages
    band1_spec_energy_perct_list.append(band_1_perct_spec_energy)
    band2_spec_energy_perct_list.append(band_2_perct_spec_energy)
    band3_spec_energy_perct_list.append(band_3_perct_spec_energy)
    band4_spec_energy_perct_list.append(band_4_perct_spec_energy)

    # patient number data
    
    patient_id_list.append(patient_id)
    
    # demographic data
    PHQ8_Binary_list.append(PHQ8_Binary)
    PHQ8_Score_list.append(PHQ8_Score)
    Gender_list.append(Gender)
    data_set_list.append(Data_set)
    
    print("Finished File " + str(count))
    count = count + 1

Started File 0
Finished File 0
Started File 1
Finished File 1
Started File 2
Finished File 2
Started File 3
Finished File 3
Started File 4
Finished File 4
Started File 5
Finished File 5
Started File 6
Finished File 6
Started File 7
Finished File 7
Started File 8
Finished File 8
Started File 9
Finished File 9
Started File 10
Finished File 10
Started File 11
Finished File 11
Started File 12
Finished File 12
Started File 13
Finished File 13
Started File 14
Finished File 14
Started File 15
Finished File 15
Started File 16
Finished File 16
Started File 17
Finished File 17
Started File 18
Finished File 18
Started File 19
Finished File 19
Started File 20
Finished File 20
Started File 21
Finished File 21
Started File 22
Finished File 22
Started File 23
Finished File 23
Started File 24
Finished File 24
Started File 25
Finished File 25
Started File 26
Finished File 26
Started File 27
Finished File 27
Started File 28
Finished File 28
Started File 29
Finished File 29
Started File 30
Finished File 

In [13]:
# Add the data to Pandas



dataDavid = pd.DataFrame(np.column_stack([patient_id_list, data_set_list, PHQ8_Binary_list, PHQ8_Score_list, Gender_list, 
                                            duration_list, mean_F0_list, sd_F0_list, min_F0_list, max_F0_list,
                                            band1_spec_energy_perct_list, band2_spec_energy_perct_list, 
                                            band3_spec_energy_perct_list, band4_spec_energy_perct_list, hnr_list, 
                                            localJitter_list, localabsoluteJitter_list, rapJitter_list, 
                                            ppq5Jitter_list, ddpJitter_list, localShimmer_list, 
                                            localdbShimmer_list, apq3Shimmer_list, aqpq5Shimmer_list, 
                                            apq11Shimmer_list, ddaShimmer_list, f1_mean_list, 
                                            f2_mean_list, f3_mean_list, f4_mean_list, 
                                            f1_median_list, f2_median_list, f3_median_list, 
                                            f4_median_list, f1_sd_list, f2_sd_list, f3_sd_list, f4_sd_list,spec_cen_relF0_mean_mean_list, 
                                            spec_cen_relF0_mean_sd_list, spec_cen_relF0_mean_median_list, spec_cen_m_mean_d_sd_mean_list, 
                                             spec_cen_m_mean_d_sd_sd_list, spec_cen_m_mean_d_sd_median_list,
                                             max_rel_F0grad_d_mean_list,min_rel_F0grad_d_mean_list, sd_rel_F0grad_d_mean_list,
                                             mean_rel_F0grad_d_mean_list, max_rel_F0grad_m_mean_d_sd_list, min_rel_F0grad_m_mean_d_sd_list,
                                             sd_rel_F0grad_m_mean_d_sd_list, mean_rel_F0grad_m_mean_d_sd_list,
                                             max_rel_F0range_d_mean_list,min_rel_F0range_d_mean_list, sd_rel_F0range_d_mean_list,
                                             mean_rel_F0range_d_mean_list, max_rel_F0range_m_mean_d_sd_list, min_rel_F0range_m_mean_d_sd_list,
                                             sd_rel_F0range_m_mean_d_sd_list, mean_rel_F0range_m_mean_d_sd_list, F0_dev_step_mean_list, 
                                             F0_dev_step_max_list]), dtype=np.float32,
                                    columns=['patient_id', 'data_set', 'PHQ8_Binary', 'PHQ8_Score', 'Gender',
                                             'duration', 'meanF0Hz', 'stdevF0Hz', 'F0_min', 'F0_max',
                                             'Band1_spec_eng_perct', 'Band2_spec_eng_perct', 'Band3_spec_eng_perct',
                                             'Band4_spec_eng_perct', 'HNR', 
                                            'localJitter', 'localabsoluteJitter', 'rapJitter', 
                                            'ppq5Jitter', 'ddpJitter', 'localShimmer', 
                                            'localdbShimmer', 'apq3Shimmer', 'apq5Shimmer', 
                                            'apq11Shimmer', 'ddaShimmer', 'f1_mean', 'f2_mean', 
                                            'f3_mean', 'f4_mean', 'f1_median', 
                                            'f2_median', 'f3_median', 'f4_median', 'f1_sd', 'f2_sd', 'f3_sd', 'f4_sd',
                                           'spec_cen_mean_relF0' ,  'spec_cen_sd_relF0', 'spec_cen_median_relF0', 
                                             'spec_cen_mean_mF0dsd', 'spec_cen_sd_mF0dsd', 'spec_cen_median_mF0dsd',
                                             'max_rel_F0grad_o_mean', 'min_rel_F0grade_o_mean',
                                            'sd_rel_F0grad_o_mean', 'mean_rel_F0grade_o_mean', 'max_rel_F0grad_m_mean_o_sd',
                                            'min_rel_F0grade_m_mean_o_sd','sd_rel_F0grad_m_mean_o_sd', 'mean_rel_F0grade_m_mean_o_sd',
                                            'max_F0_Range_rel_mean', 'min_F0_Range_rel_mean', 'sd_F0_Range_rel_mean', 
                                            'mean_F0_Range_rel_mean', 'max_F0_range_m_mean_o_sd', 'min_F0_range_m_mean_o_sd', 
                                            'sd_F0_range_m_mean_o_sd', 'mean_F0_range_m_mean_o_sd', 'F0_absRange_mean', 'F0_absRange_amx'])
pcaData = runPCA(dataDavid) # Run jitter and shimmer PCA
#dataDavid = pd.concat([dataDavid, pcaData], axis=1) # Add PCA data



In [14]:
#dataDavid

In [15]:
dataDavid['pF'] = (zscore(dataDavid.f1_median) + zscore(dataDavid.f2_median) + zscore(dataDavid.f3_median) + zscore(dataDavid.f4_median)) / 4

In [16]:
dataDavid['fdisp'] = (dataDavid['f4_median'] - dataDavid['f1_median']) / 3

In [17]:
dataDavid['avgFormant'] = (dataDavid['f1_median'] + dataDavid['f2_median'] + dataDavid['f3_median'] + dataDavid['f4_median']) / 4

In [18]:
dataDavid['mff'] = (dataDavid['f1_median'] * dataDavid['f2_median'] * dataDavid['f3_median'] * dataDavid['f4_median']) ** 0.25

In [19]:
# reload the data again
#dataDavid.to_csv("processed_results.csv", index=False)
#dataDavid = pd.read_csv('processed_results.csv', header=0)

dataDavid['fitch_vtl'] = ((1 * (35000 / (4 * dataDavid['f1_median']))) +
                   (3 * (35000 / (4 * dataDavid['f2_median']))) + 
                   (5 * (35000 / (4 * dataDavid['f3_median']))) + 
                   (7 * (35000 / (4 * dataDavid['f4_median'])))) / 4

In [20]:
xysum = (0.5 * dataDavid['f1_median']) + (1.5 * dataDavid['f2_median']) + (2.5 * dataDavid['f3_median']) + (3.5 * dataDavid['f4_median'])
xsquaredsum = (0.5 ** 2) + (1.5 ** 2) + (2.5 ** 2) + (3.5 ** 2)
dataDavid['delta_f'] = xysum / xsquaredsum

In [21]:
dataDavid['vtl_delta_f'] = 35000 / (2 * dataDavid['delta_f'])

In [22]:
#dataDavid

In [23]:
import warnings
warnings.filterwarnings("ignore")

In [24]:
dataDavid['F0_SD_div_F0_Mean'] = dataDavid['stdevF0Hz'] / dataDavid['meanF0Hz']

In [25]:
count = 0

resultsC = pd.DataFrame()
for wave_file in glob.glob(fileGlob):
    print("Started File " + str(count))
    patient_id = wave_file[-7:-4]
    resultsC = resultsC.append(COVAREP(patient_id))
    
    print("Finished File " + str(count))
    count = count + 1

dataDavid = pd.merge(dataDavid, resultsC, on ='patient_id', how='left')

Started File 0
Finished File 0
Started File 1
Finished File 1
Started File 2
Finished File 2
Started File 3
Finished File 3
Started File 4
Finished File 4
Started File 5
Finished File 5
Started File 6
Finished File 6
Started File 7
Finished File 7
Started File 8
Finished File 8
Started File 9
Finished File 9
Started File 10
Finished File 10
Started File 11
Finished File 11
Started File 12
Finished File 12
Started File 13
Finished File 13
Started File 14
Finished File 14
Started File 15
Finished File 15
Started File 16
Finished File 16
Started File 17
Finished File 17
Started File 18
Finished File 18
Started File 19
Finished File 19
Started File 20
Finished File 20
Started File 21
Finished File 21
Started File 22
Finished File 22
Started File 23
Finished File 23
Started File 24
Finished File 24
Started File 25
Finished File 25
Started File 26
Finished File 26
Started File 27
Finished File 27
Started File 28
Finished File 28
Started File 29
Finished File 29
Started File 30
Finished File 

In [26]:
dataDuration= pd.read_csv('C:/Users/admul/Desktop/School/Thesis/DAICWOZ Data/Altered Audio/DurationData.csv',
                             sep=',', na_values = 'Null')

dataDuration.columns =['patient_id', 'response_dur_mean', 'response_dur_sd', 'overlap_mean', 'overlap_sd',
                                       'response_no_overlap_mean', 'response_no_overlap_sd']

dataDuration = dataDuration.dropna(thresh=4)
dataDuration = dataDuration[dataDuration.patient_id != 304]

dataDavid = pd.merge(dataDavid, dataDuration, on ='patient_id', how='inner')

In [27]:
dataDavid.to_csv('C:/Users/admul/Desktop/School/Thesis/DAICWOZ Data/Altered Audio/Ellie Removed/model_prepped_data_Ellie_Lit_Removed_082722_15b.csv', index=False)

In [None]:
dataDavid

If you want to compare the magnitude of your spectra, you should use scaling='spectrum'. If you compare integrals over a bandwidth, you should use scaling='density'.

https://github.com/scipy/scipy/issues/8651

https://stackoverflow.com/questions/71575836/proper-use-of-window-parameter-for-welch-power-spectral-density-scipy