In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install praat-parselmouth

Collecting praat-parselmouth
[?25l  Downloading https://files.pythonhosted.org/packages/21/b4/3f6c01ac90e2b26d4bb581b2c13e959e1e856e6f63d2ceac006dd695a25b/praat_parselmouth-0.4.0-cp37-cp37m-manylinux2010_x86_64.whl (10.7MB)
[K     |████████████████████████████████| 10.7MB 27.4MB/s 
Installing collected packages: praat-parselmouth
Successfully installed praat-parselmouth-0.4.0


In [None]:
import glob
import numpy as np
import pandas as pd
import parselmouth 
import statistics


from parselmouth.praat import call
from scipy.stats.mstats import zscore
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [None]:
def measurePitch(voiceID, f0min, f0max, unit):
    sound = parselmouth.Sound(voiceID) # read the sound
    duration = call(sound, "Get total duration") # duration
    pitch = call(sound, "To Pitch", 0.0, f0min, f0max) #create a praat pitch object
    meanF0 = call(pitch, "Get mean", 0, 0, unit) # get mean pitch
    stdevF0 = call(pitch, "Get standard deviation", 0 ,0, unit) # get standard deviation
    harmonicity = call(sound, "To Harmonicity (cc)", 0.01, f0min, 0.1, 1.0)
    hnr = call(harmonicity, "Get mean", 0, 0)
    pointProcess = call(sound, "To PointProcess (periodic, cc)", f0min, f0max)
    localJitter = call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3)
    localabsoluteJitter = call(pointProcess, "Get jitter (local, absolute)", 0, 0, 0.0001, 0.02, 1.3)
    rapJitter = call(pointProcess, "Get jitter (rap)", 0, 0, 0.0001, 0.02, 1.3)
    ppq5Jitter = call(pointProcess, "Get jitter (ppq5)", 0, 0, 0.0001, 0.02, 1.3)
    ddpJitter = call(pointProcess, "Get jitter (ddp)", 0, 0, 0.0001, 0.02, 1.3)
    localShimmer =  call([sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    localdbShimmer = call([sound, pointProcess], "Get shimmer (local_dB)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    apq3Shimmer = call([sound, pointProcess], "Get shimmer (apq3)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    aqpq5Shimmer = call([sound, pointProcess], "Get shimmer (apq5)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    apq11Shimmer =  call([sound, pointProcess], "Get shimmer (apq11)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    ddaShimmer = call([sound, pointProcess], "Get shimmer (dda)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    
    return duration, meanF0, stdevF0, hnr, localJitter, localabsoluteJitter, rapJitter, ppq5Jitter, ddpJitter, localShimmer, localdbShimmer, apq3Shimmer, aqpq5Shimmer, apq11Shimmer, ddaShimmer

In [None]:
def measureFormants(sound, wave_file, f0min,f0max):
    sound = parselmouth.Sound(sound) # read the sound
    pitch = call(sound, "To Pitch (cc)", 0, f0min, 15, 'no', 0.03, 0.45, 0.01, 0.35, 0.14, f0max)
    pointProcess = call(sound, "To PointProcess (periodic, cc)", f0min, f0max)
    
    formants = call(sound, "To Formant (burg)", 0.0025, 5, 5000, 0.025, 50)
    numPoints = call(pointProcess, "Get number of points")

    f1_list = []
    f2_list = []
    f3_list = []
    f4_list = []
    
    # Measure formants only at glottal pulses
    for point in range(0, numPoints):
        point += 1
        t = call(pointProcess, "Get time from index", point)
        f1 = call(formants, "Get value at time", 1, t, 'Hertz', 'Linear')
        f2 = call(formants, "Get value at time", 2, t, 'Hertz', 'Linear')
        f3 = call(formants, "Get value at time", 3, t, 'Hertz', 'Linear')
        f4 = call(formants, "Get value at time", 4, t, 'Hertz', 'Linear')
        f1_list.append(f1)
        f2_list.append(f2)
        f3_list.append(f3)
        f4_list.append(f4)
    
    f1_list = [f1 for f1 in f1_list if str(f1) != 'nan']
    f2_list = [f2 for f2 in f2_list if str(f2) != 'nan']
    f3_list = [f3 for f3 in f3_list if str(f3) != 'nan']
    f4_list = [f4 for f4 in f4_list if str(f4) != 'nan']
    
    # calculate mean formants across pulses
    f1_mean = statistics.mean(f1_list)
    f2_mean = statistics.mean(f2_list)
    f3_mean = statistics.mean(f3_list)
    f4_mean = statistics.mean(f4_list)
    
    # calculate median formants across pulses, this is what is used in all subsequent calcualtions
    # you can use mean if you want, just edit the code in the boxes below to replace median with mean
    f1_median = statistics.median(f1_list)
    f2_median = statistics.median(f2_list)
    f3_median = statistics.median(f3_list)
    f4_median = statistics.median(f4_list)
    
    return f1_mean, f2_mean, f3_mean, f4_mean, f1_median, f2_median, f3_median, f4_median

In [None]:
def runPCA(df):
    # z-score the Jitter and Shimmer measurements
    measures = ['localJitter', 'localabsoluteJitter', 'rapJitter', 'ppq5Jitter', 'ddpJitter',
                'localShimmer', 'localdbShimmer', 'apq3Shimmer', 'apq5Shimmer', 'apq11Shimmer', 'ddaShimmer']
    x = df.loc[:, measures].values
    x = StandardScaler().fit_transform(x)
    # PCA
    pca = PCA(n_components=2)
    principalComponents = pca.fit_transform(x)
    principalDf = pd.DataFrame(data = principalComponents, columns = ['JitterPCA', 'ShimmerPCA'])
    principalDf
    return principalDf

In [None]:
# create lists to put the results
file_list = []
duration_list = []
mean_F0_list = []
sd_F0_list = []
hnr_list = []
localJitter_list = []
localabsoluteJitter_list = []
rapJitter_list = []
ppq5Jitter_list = []
ddpJitter_list = []
localShimmer_list = []
localdbShimmer_list = []
apq3Shimmer_list = []
aqpq5Shimmer_list = []
apq11Shimmer_list = []
ddaShimmer_list = []
f1_mean_list = []
f2_mean_list = []
f3_mean_list = []
f4_mean_list = []
f1_median_list = []
f2_median_list = []
f3_median_list = []
f4_median_list = []

# Go through all the wave files in the folder and measure all the acoustics
for wave_file in glob.glob("/content/drive/MyDrive/IAA Trabajos/Modelos/pva_wav_sanos/*.wav"):
    sound = parselmouth.Sound(wave_file)
    (duration, meanF0, stdevF0, hnr, localJitter, localabsoluteJitter, rapJitter, ppq5Jitter, ddpJitter, 
     localShimmer, localdbShimmer, apq3Shimmer, aqpq5Shimmer, apq11Shimmer, ddaShimmer) = measurePitch(
        sound, 75, 300, "Hertz")
    (f1_mean, f2_mean, f3_mean, f4_mean, f1_median, f2_median, f3_median, f4_median) = measureFormants(sound, wave_file, 75, 300)
    file_list.append(wave_file) # make an ID list
    duration_list.append(duration) # make duration list
    mean_F0_list.append(meanF0) # make a mean F0 list
    sd_F0_list.append(stdevF0) # make a sd F0 list
    hnr_list.append(hnr) #add HNR data
    
    # add raw jitter and shimmer measures
    localJitter_list.append(localJitter)
    localabsoluteJitter_list.append(localabsoluteJitter)
    rapJitter_list.append(rapJitter)
    ppq5Jitter_list.append(ppq5Jitter)
    ddpJitter_list.append(ddpJitter)
    localShimmer_list.append(localShimmer)
    localdbShimmer_list.append(localdbShimmer)
    apq3Shimmer_list.append(apq3Shimmer)
    aqpq5Shimmer_list.append(aqpq5Shimmer)
    apq11Shimmer_list.append(apq11Shimmer)
    ddaShimmer_list.append(ddaShimmer)
    
    # add the formant data
    f1_mean_list.append(f1_mean)
    f2_mean_list.append(f2_mean)
    f3_mean_list.append(f3_mean)
    f4_mean_list.append(f4_mean)
    f1_median_list.append(f1_median)
    f2_median_list.append(f2_median)
    f3_median_list.append(f3_median)
    f4_median_list.append(f4_median)

In [None]:
print(mean_F0_list)

[90.78417354490149, 136.81301242840948, 82.17452141402372, 114.82435669590275, 96.93112710895359, 132.71246836274003, 115.70040125506203, 116.85612316302634, 138.93561043626036, 144.29487540382843, 147.04510531747383, 129.81403305456016, 122.94046590007326, 150.54156937735135, 111.19873671000292, 94.1852976299248, 137.3434143633595, 152.9463435314179, 134.23359411685368, 102.61037672062243, 123.7565020784319, 116.99002462944496, 134.93284622507718, 151.61342370959466, 141.41776025055614, 115.62314753755949, 131.74146549679836, 159.2484674077816, 124.32862811537606, 107.1204427003632, 99.82456764355197, 118.96775277115091, 103.0617574316006, 141.22105757019722, 125.19165140965242, 143.97126159463394, 107.09722125586673, 107.96993092923488, 88.49746894836133, 138.0073426869155, 95.75729860873477, 106.34449346931152, 119.28371283664977, 134.11545031139676, 125.58687406440076, 146.60447238660805, 134.97489428995505, 114.07388750594774, 117.15646601323667, 143.1712831629671, 124.44812507836

In [None]:
# Add the data to Pandas
df = pd.DataFrame(np.column_stack([file_list, duration_list, mean_F0_list, sd_F0_list, hnr_list, 
                                   localJitter_list, localabsoluteJitter_list, rapJitter_list, 
                                   ppq5Jitter_list, ddpJitter_list, localShimmer_list, 
                                   localdbShimmer_list, apq3Shimmer_list, aqpq5Shimmer_list, 
                                   apq11Shimmer_list, ddaShimmer_list, f1_mean_list, 
                                   f2_mean_list, f3_mean_list, f4_mean_list, 
                                   f1_median_list, f2_median_list, f3_median_list, 
                                   f4_median_list]),
                                   columns=['voiceID', 'duration', 'meanF0Hz', 'stdevF0Hz', 'HNR', 
                                            'localJitter', 'localabsoluteJitter', 'rapJitter', 
                                            'ppq5Jitter', 'ddpJitter', 'localShimmer', 
                                            'localdbShimmer', 'apq3Shimmer', 'apq5Shimmer', 
                                            'apq11Shimmer', 'ddaShimmer', 'f1_mean', 'f2_mean', 
                                            'f3_mean', 'f4_mean', 'f1_median', 
                                            'f2_median', 'f3_median', 'f4_median'])

#pcaData = runPCA(df) # Run jitter and shimmer PCA
#df = pd.concat([df, pcaData], axis=1) # Add PCA data
# reload the data so it's all numbers
df.to_csv("processed_results_sanos.csv", index=False)
df = pd.read_csv('processed_results_sanos.csv', header=0)
df.sort_values('voiceID').head(20)

Unnamed: 0,voiceID,duration,meanF0Hz,stdevF0Hz,HNR,localJitter,localabsoluteJitter,rapJitter,ppq5Jitter,ddpJitter,localShimmer,localdbShimmer,apq3Shimmer,apq5Shimmer,apq11Shimmer,ddaShimmer,f1_mean,f2_mean,f3_mean,f4_mean,f1_median,f2_median,f3_median,f4_median
41,/content/drive/MyDrive/IAA Trabajos/pva_wav_sa...,1.1312,106.344493,3.130821,21.645715,0.006009,5.7e-05,0.002307,0.003618,0.00692,0.031649,0.280745,0.016429,0.019516,0.027407,0.049287,493.748977,1031.263321,2279.838566,4140.811754,492.531721,1030.645536,2280.24838,4143.74793
38,/content/drive/MyDrive/IAA Trabajos/pva_wav_sa...,1.81562,88.497469,1.028905,23.212947,0.005423,6.1e-05,0.002853,0.003167,0.008558,0.021791,0.189909,0.010951,0.013614,0.020802,0.032853,600.681937,1068.75917,2434.914151,3590.283662,598.690548,1063.262166,2426.356613,3593.113387
39,/content/drive/MyDrive/IAA Trabajos/pva_wav_sa...,1.58746,138.007343,1.097803,26.181949,0.00202,1.5e-05,0.000929,0.001232,0.002786,0.018728,0.16332,0.009198,0.011864,0.017983,0.027594,665.955223,1074.512144,2328.966716,3668.15797,663.081941,1074.340122,2330.274037,3667.759518
40,/content/drive/MyDrive/IAA Trabajos/pva_wav_sa...,1.14072,95.757299,0.873109,23.465534,0.003639,3.8e-05,0.001888,0.002342,0.005665,0.020419,0.172523,0.009435,0.012199,0.018751,0.028306,563.992356,1099.115655,2522.007446,3357.26919,558.067346,1098.611914,2522.02381,3365.725831
110,/content/drive/MyDrive/IAA Trabajos/pva_wav_sa...,1.87264,206.823746,0.989594,25.391994,0.002498,1.2e-05,0.001451,0.001488,0.004352,0.026423,0.229471,0.012609,0.015719,0.027528,0.037827,800.349069,1191.892927,2507.268873,4219.88259,803.571806,1219.282305,2556.203083,4217.815923
43,/content/drive/MyDrive/IAA Trabajos/pva_wav_sa...,0.61788,134.11545,0.601765,31.046801,0.001992,1.5e-05,0.000872,0.001222,0.002616,0.014474,0.125855,0.006772,0.008998,0.015632,0.020317,625.315342,989.80134,2406.66262,3854.594934,623.790526,985.98425,2408.213415,3924.181564
42,/content/drive/MyDrive/IAA Trabajos/pva_wav_sa...,1.33082,119.283713,1.177208,26.246406,0.002241,1.9e-05,0.001167,0.001375,0.0035,0.023878,0.206397,0.012157,0.01552,0.023621,0.036472,614.55319,1159.485242,2212.696772,3308.420499,614.792179,1161.423654,2217.060004,3297.644274
45,/content/drive/MyDrive/IAA Trabajos/pva_wav_sa...,1.42586,146.604472,1.520067,25.059149,0.0031,2.1e-05,0.00164,0.001641,0.004921,0.031854,0.273764,0.017702,0.020661,0.024765,0.053107,567.236045,948.313997,2211.340681,3501.768679,564.454176,937.081183,2228.554959,3509.412157
44,/content/drive/MyDrive/IAA Trabajos/pva_wav_sa...,1.68252,125.586874,1.053643,23.583349,0.004986,4e-05,0.002754,0.002938,0.008262,0.021177,0.184988,0.009764,0.013404,0.023536,0.029291,502.074269,1085.820822,2421.120099,3744.687127,501.33877,1088.196967,2420.263505,3728.257022
109,/content/drive/MyDrive/IAA Trabajos/pva_wav_sa...,1.93918,234.610097,1.401614,29.238748,0.002331,1e-05,0.001321,0.001312,0.003963,0.01413,0.12314,0.007228,0.008991,0.013315,0.021685,894.672033,1391.487984,3039.764077,3875.266242,945.979232,1407.165458,3144.511709,3910.195504


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 144 entries, 0 to 143
Data columns (total 24 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   voiceID              144 non-null    object 
 1   duration             144 non-null    float64
 2   meanF0Hz             144 non-null    float64
 3   stdevF0Hz            144 non-null    float64
 4   HNR                  144 non-null    float64
 5   localJitter          144 non-null    float64
 6   localabsoluteJitter  144 non-null    float64
 7   rapJitter            144 non-null    float64
 8   ppq5Jitter           144 non-null    float64
 9   ddpJitter            144 non-null    float64
 10  localShimmer         144 non-null    float64
 11  localdbShimmer       144 non-null    float64
 12  apq3Shimmer          144 non-null    float64
 13  apq5Shimmer          144 non-null    float64
 14  apq11Shimmer         144 non-null    float64
 15  ddaShimmer           144 non-null    flo

In [None]:
df.voiceID = df.voiceID.str.replace('/content/drive/MyDrive/IAA Trabajos/Modelos/pva_wav_sanos/','')

In [None]:
df.head(10)

Unnamed: 0,voiceID,duration,meanF0Hz,stdevF0Hz,HNR,localJitter,localabsoluteJitter,rapJitter,ppq5Jitter,ddpJitter,localShimmer,localdbShimmer,apq3Shimmer,apq5Shimmer,apq11Shimmer,ddaShimmer,f1_mean,f2_mean,f3_mean,f4_mean,f1_median,f2_median,f3_median,f4_median
0,9-a_n.wav,0.83818,90.784174,0.947235,25.407817,0.006059,6.7e-05,0.003237,0.003859,0.009712,0.019526,0.171497,0.00793,0.011484,0.027055,0.023789,641.228106,1072.190481,2136.205482,3374.665433,641.699583,1072.672415,2136.637191,3379.368924
1,43-a_n.wav,0.84934,136.813012,0.751588,27.096361,0.001998,1.5e-05,0.001014,0.001222,0.003041,0.009837,0.085955,0.00542,0.005866,0.007634,0.01626,687.991629,1159.419654,2178.392632,3366.234438,687.554816,1159.95748,2179.437399,3366.339045
2,98-a_n.wav,0.90162,82.174521,1.020332,22.921196,0.004288,5.2e-05,0.001936,0.002594,0.005807,0.043013,0.369315,0.016265,0.025184,0.044935,0.048795,680.371789,1102.93773,2609.615425,3414.517949,684.944218,1102.338219,2615.2574,3419.859496
3,706-a_n.wav,0.81964,114.824357,0.969736,20.727258,0.004635,4e-05,0.002372,0.002763,0.007116,0.045824,0.402363,0.024257,0.028993,0.04092,0.072772,641.543794,1184.566012,2187.001724,4055.033562,657.455262,1193.523262,2186.825689,4037.905096
4,679-a_n.wav,1.68502,96.931127,1.30441,23.928955,0.003973,4.1e-05,0.001891,0.002115,0.005672,0.021713,0.187253,0.010614,0.012648,0.022631,0.031842,675.528536,1116.511773,2724.800406,3247.023765,672.207101,1115.429549,2725.685239,3247.704953
5,860-a_n.wav,1.13794,132.712468,3.174869,25.634514,0.003778,2.8e-05,0.001852,0.002077,0.005556,0.025115,0.223945,0.013409,0.015427,0.02005,0.040227,640.658393,1096.721985,2322.065879,3530.87954,634.039106,1095.374393,2341.931565,3512.960645
6,957-a_n.wav,1.26272,115.700401,1.404188,22.842807,0.003079,2.7e-05,0.001188,0.001436,0.003563,0.024956,0.21869,0.013515,0.015975,0.020189,0.040545,641.963767,1097.731706,2470.129013,3702.121769,638.702381,1100.549146,2464.983432,3685.354801
7,955-a_n.wav,0.73694,116.856123,1.943195,26.247186,0.003607,3.1e-05,0.001215,0.00163,0.003644,0.020239,0.176203,0.008775,0.010732,0.019424,0.026325,610.005533,981.264246,2217.50922,3426.126149,609.326472,976.137374,2219.462982,3422.863898
8,956-a_n.wav,0.87786,138.93561,1.845303,16.85793,0.007746,5.6e-05,0.004145,0.004857,0.012435,0.039984,0.357861,0.020502,0.023894,0.032005,0.061507,633.147582,1075.281435,2621.712859,3996.920404,638.300107,1088.354362,2620.43217,4010.609276
9,951-a_n.wav,1.233,144.294875,1.115464,27.801579,0.002045,1.4e-05,0.000954,0.001237,0.002861,0.012072,0.104307,0.00575,0.007842,0.011338,0.017251,648.449774,1221.834022,2603.921877,3741.922455,646.713433,1231.703201,2585.734378,3737.485829


In [None]:
df_sanos_genero_sexo = pd.read_excel('/content/drive/MyDrive/IAA Trabajos/Modelos/sanos.xlsx')

In [None]:
df_sanos_genero_sexo.head(5)

Unnamed: 0,NRO,IDAUDIO,EDAD,GENERO
0,9,9-a_n.wav,25,M
1,23,23-a_n.wav,26,F
2,25,25-a_n.wav,27,F
3,26,26-a_n.wav,26,F
4,33,33-a_n.wav,27,F


Haciendo el merge

In [None]:
df_sanos = pd.merge(df, pd.DataFrame(df_sanos_genero_sexo,columns = ['IDAUDIO','EDAD','GENERO']) , left_on='voiceID', right_on='IDAUDIO')

In [None]:
df_sanos

Unnamed: 0,voiceID,duration,meanF0Hz,stdevF0Hz,HNR,localJitter,localabsoluteJitter,rapJitter,ppq5Jitter,ddpJitter,localShimmer,localdbShimmer,apq3Shimmer,apq5Shimmer,apq11Shimmer,ddaShimmer,f1_mean,f2_mean,f3_mean,f4_mean,f1_median,f2_median,f3_median,f4_median,IDAUDIO,EDAD,GENERO
0,9-a_n.wav,0.83818,90.784174,0.947235,25.407817,0.006059,0.000067,0.003237,0.003859,0.009712,0.019526,0.171497,0.007930,0.011484,0.027055,0.023789,641.228106,1072.190481,2136.205482,3374.665433,641.699583,1072.672415,2136.637191,3379.368924,9-a_n.wav,25,M
1,43-a_n.wav,0.84934,136.813012,0.751588,27.096361,0.001998,0.000015,0.001014,0.001222,0.003041,0.009837,0.085955,0.005420,0.005866,0.007634,0.016260,687.991629,1159.419654,2178.392632,3366.234438,687.554816,1159.957480,2179.437399,3366.339045,43-a_n.wav,32,M
2,98-a_n.wav,0.90162,82.174521,1.020332,22.921196,0.004288,0.000052,0.001936,0.002594,0.005807,0.043013,0.369315,0.016265,0.025184,0.044935,0.048795,680.371789,1102.937730,2609.615425,3414.517949,684.944218,1102.338219,2615.257400,3419.859496,98-a_n.wav,27,M
3,706-a_n.wav,0.81964,114.824357,0.969736,20.727258,0.004635,0.000040,0.002372,0.002763,0.007116,0.045824,0.402363,0.024257,0.028993,0.040920,0.072772,641.543794,1184.566012,2187.001724,4055.033562,657.455262,1193.523262,2186.825689,4037.905096,706-a_n.wav,27,M
4,679-a_n.wav,1.68502,96.931127,1.304410,23.928955,0.003973,0.000041,0.001891,0.002115,0.005672,0.021713,0.187253,0.010614,0.012648,0.022631,0.031842,675.528536,1116.511773,2724.800406,3247.023765,672.207101,1115.429549,2725.685239,3247.704953,679-a_n.wav,29,M
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,2040-a_n.wav,1.47340,233.085114,1.261945,24.920506,0.002633,0.000011,0.001419,0.001705,0.004257,0.019656,0.172557,0.009137,0.012375,0.020402,0.027410,636.099570,1420.801066,2495.878129,4213.178157,640.604560,1419.040430,2497.791388,4207.868930,2040-a_n.wav,26,F
140,2206-a_n.wav,1.93166,226.738675,1.483066,19.585316,0.005091,0.000022,0.002969,0.003152,0.008907,0.033643,0.299413,0.018620,0.020871,0.026655,0.055860,802.176490,1451.605153,2801.535410,4029.009589,805.163043,1448.675822,2802.416454,4024.320392,2206-a_n.wav,26,F
141,2250-a_n.wav,0.71944,224.485784,1.676993,22.927223,0.004787,0.000021,0.002603,0.003395,0.007809,0.023115,0.200756,0.012485,0.014693,0.018622,0.037455,780.952966,1263.634291,2153.119867,3896.618065,800.234704,1349.095660,2316.573070,4256.561518,2250-a_n.wav,29,F
142,2203-a_n.wav,0.71770,228.453707,3.780583,26.383066,0.002159,0.000009,0.001233,0.001274,0.003699,0.022832,0.198038,0.012298,0.014395,0.018396,0.036893,894.175393,1393.370278,2736.448972,3763.905466,892.456542,1393.817853,2738.257683,3765.157281,2203-a_n.wav,26,F


In [None]:
media_audio_sanos = df_sanos.duration.mean()

In [None]:
media_audio_sanos

1.3441861111111106

In [None]:
df_sanos

Unnamed: 0,voiceID,duration,meanF0Hz,stdevF0Hz,HNR,localJitter,localabsoluteJitter,rapJitter,ppq5Jitter,ddpJitter,localShimmer,localdbShimmer,apq3Shimmer,apq5Shimmer,apq11Shimmer,ddaShimmer,f1_mean,f2_mean,f3_mean,f4_mean,f1_median,f2_median,f3_median,f4_median,IDAUDIO,EDAD,GENERO
0,9-a_n.wav,0.83818,90.784174,0.947235,25.407817,0.006059,0.000067,0.003237,0.003859,0.009712,0.019526,0.171497,0.007930,0.011484,0.027055,0.023789,641.228106,1072.190481,2136.205482,3374.665433,641.699583,1072.672415,2136.637191,3379.368924,9-a_n.wav,25,M
1,43-a_n.wav,0.84934,136.813012,0.751588,27.096361,0.001998,0.000015,0.001014,0.001222,0.003041,0.009837,0.085955,0.005420,0.005866,0.007634,0.016260,687.991629,1159.419654,2178.392632,3366.234438,687.554816,1159.957480,2179.437399,3366.339045,43-a_n.wav,32,M
2,98-a_n.wav,0.90162,82.174521,1.020332,22.921196,0.004288,0.000052,0.001936,0.002594,0.005807,0.043013,0.369315,0.016265,0.025184,0.044935,0.048795,680.371789,1102.937730,2609.615425,3414.517949,684.944218,1102.338219,2615.257400,3419.859496,98-a_n.wav,27,M
3,706-a_n.wav,0.81964,114.824357,0.969736,20.727258,0.004635,0.000040,0.002372,0.002763,0.007116,0.045824,0.402363,0.024257,0.028993,0.040920,0.072772,641.543794,1184.566012,2187.001724,4055.033562,657.455262,1193.523262,2186.825689,4037.905096,706-a_n.wav,27,M
4,679-a_n.wav,1.68502,96.931127,1.304410,23.928955,0.003973,0.000041,0.001891,0.002115,0.005672,0.021713,0.187253,0.010614,0.012648,0.022631,0.031842,675.528536,1116.511773,2724.800406,3247.023765,672.207101,1115.429549,2725.685239,3247.704953,679-a_n.wav,29,M
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,2040-a_n.wav,1.47340,233.085114,1.261945,24.920506,0.002633,0.000011,0.001419,0.001705,0.004257,0.019656,0.172557,0.009137,0.012375,0.020402,0.027410,636.099570,1420.801066,2495.878129,4213.178157,640.604560,1419.040430,2497.791388,4207.868930,2040-a_n.wav,26,F
140,2206-a_n.wav,1.93166,226.738675,1.483066,19.585316,0.005091,0.000022,0.002969,0.003152,0.008907,0.033643,0.299413,0.018620,0.020871,0.026655,0.055860,802.176490,1451.605153,2801.535410,4029.009589,805.163043,1448.675822,2802.416454,4024.320392,2206-a_n.wav,26,F
141,2250-a_n.wav,0.71944,224.485784,1.676993,22.927223,0.004787,0.000021,0.002603,0.003395,0.007809,0.023115,0.200756,0.012485,0.014693,0.018622,0.037455,780.952966,1263.634291,2153.119867,3896.618065,800.234704,1349.095660,2316.573070,4256.561518,2250-a_n.wav,29,F
142,2203-a_n.wav,0.71770,228.453707,3.780583,26.383066,0.002159,0.000009,0.001233,0.001274,0.003699,0.022832,0.198038,0.012298,0.014395,0.018396,0.036893,894.175393,1393.370278,2736.448972,3763.905466,892.456542,1393.817853,2738.257683,3765.157281,2203-a_n.wav,26,F


AUDIOS PARKINSON

In [None]:
# create lists to put the results
file_list = []
duration_list = []
mean_F0_list = []
sd_F0_list = []
hnr_list = []
localJitter_list = []
localabsoluteJitter_list = []
rapJitter_list = []
ppq5Jitter_list = []
ddpJitter_list = []
localShimmer_list = []
localdbShimmer_list = []
apq3Shimmer_list = []
aqpq5Shimmer_list = []
apq11Shimmer_list = []
ddaShimmer_list = []
f1_mean_list = []
f2_mean_list = []
f3_mean_list = []
f4_mean_list = []
f1_median_list = []
f2_median_list = []
f3_median_list = []
f4_median_list = []

# Go through all the wave files in the folder and measure all the acoustics
n=0
for wave_file in glob.glob("/content/drive/MyDrive/IAA Trabajos/Modelos/pva_wav_parkinson/*.wav"):
    sound = parselmouth.Sound(wave_file).extract_part(to_time=media_audio_sanos, preserve_times=True)
    ##sound = parselmouth.Sound(wave_file)
    try:
      (duration, meanF0, stdevF0, hnr, localJitter, localabsoluteJitter, rapJitter, ppq5Jitter, ddpJitter, 
      localShimmer, localdbShimmer, apq3Shimmer, aqpq5Shimmer, apq11Shimmer, ddaShimmer) = measurePitch(sound, 75, 300, "Hertz")
      (f1_mean, f2_mean, f3_mean, f4_mean, f1_median, f2_median, f3_median, f4_median) = measureFormants(sound, wave_file, 75, 300)
      file_list.append(wave_file) # make an ID list
      duration_list.append(duration) # make duration list
      mean_F0_list.append(meanF0) # make a mean F0 list
      sd_F0_list.append(stdevF0) # make a sd F0 list
      hnr_list.append(hnr) #add HNR data
      
      # add raw jitter and shimmer measures
      localJitter_list.append(localJitter)
      localabsoluteJitter_list.append(localabsoluteJitter)
      rapJitter_list.append(rapJitter)
      ppq5Jitter_list.append(ppq5Jitter)
      ddpJitter_list.append(ddpJitter)
      localShimmer_list.append(localShimmer)
      localdbShimmer_list.append(localdbShimmer)
      apq3Shimmer_list.append(apq3Shimmer)
      aqpq5Shimmer_list.append(aqpq5Shimmer)
      apq11Shimmer_list.append(apq11Shimmer)
      ddaShimmer_list.append(ddaShimmer)
      
      # add the formant data
      f1_mean_list.append(f1_mean)
      f2_mean_list.append(f2_mean)
      f3_mean_list.append(f3_mean)
      f4_mean_list.append(f4_mean)
      f1_median_list.append(f1_median)
      f2_median_list.append(f2_median)
      f3_median_list.append(f3_median)
      f4_median_list.append(f4_median)
      n=n+1
      print(n)

    except statistics.StatisticsError:
      print("Error en el audio N°: ",n)
      continue
    

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
Error en el audio N°:  40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
Error en el audio N°:  126
127
128
129
130
131
Error en el audio N°:  131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
Error en el audio N°:  199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
2

In [None]:
len(file_list)

381

In [None]:
# Add the data to Pandas
df1 = pd.DataFrame(np.column_stack([file_list, duration_list, mean_F0_list, sd_F0_list, hnr_list, 
                                   localJitter_list, localabsoluteJitter_list, rapJitter_list, 
                                   ppq5Jitter_list, ddpJitter_list, localShimmer_list, 
                                   localdbShimmer_list, apq3Shimmer_list, aqpq5Shimmer_list, 
                                   apq11Shimmer_list, ddaShimmer_list, f1_mean_list, 
                                   f2_mean_list, f3_mean_list, f4_mean_list, 
                                   f1_median_list, f2_median_list, f3_median_list, 
                                   f4_median_list]),
                                   columns=['voiceID', 'duration', 'meanF0Hz', 'stdevF0Hz', 'HNR', 
                                            'localJitter', 'localabsoluteJitter', 'rapJitter', 
                                            'ppq5Jitter', 'ddpJitter', 'localShimmer', 
                                            'localdbShimmer', 'apq3Shimmer', 'apq5Shimmer', 
                                            'apq11Shimmer', 'ddaShimmer', 'f1_mean', 'f2_mean', 
                                            'f3_mean', 'f4_mean', 'f1_median', 
                                            'f2_median', 'f3_median', 'f4_median'])

"""pcaData = runPCA(df1) # Run jitter and shimmer PCA"""
#df1 = pd.concat([df1, pcaData], axis=1) # Add PCA data
# reload the data so it's all numbers
df1.to_csv("processed_results_parkinson.csv", index=False)
df1 = pd.read_csv('processed_results_parkinson.csv', header=0)
df1.sort_values('voiceID').head(20)

Unnamed: 0,voiceID,duration,meanF0Hz,stdevF0Hz,HNR,localJitter,localabsoluteJitter,rapJitter,ppq5Jitter,ddpJitter,localShimmer,localdbShimmer,apq3Shimmer,apq5Shimmer,apq11Shimmer,ddaShimmer,f1_mean,f2_mean,f3_mean,f4_mean,f1_median,f2_median,f3_median,f4_median
124,/content/drive/MyDrive/IAA Trabajos/pva_wav_pa...,1.344186,208.119435,7.16564,20.518833,0.004767,2.3e-05,0.002653,0.002381,0.007958,0.03831,0.347434,0.021208,0.023568,0.026405,0.063625,724.38447,1355.130801,1615.180044,2833.185159,707.708899,1370.012995,1587.878195,2826.024034
338,/content/drive/MyDrive/IAA Trabajos/pva_wav_pa...,1.344186,216.260595,3.315187,17.560326,0.004744,2.2e-05,0.002629,0.002636,0.007887,0.075709,0.694425,0.040798,0.047434,0.06258,0.122394,878.065978,1330.581542,1621.925941,3008.854533,877.235528,1341.082674,1586.47298,3011.783424
2,/content/drive/MyDrive/IAA Trabajos/pva_wav_pa...,1.344186,145.216545,47.756632,18.180372,0.006082,4.2e-05,0.002934,0.003381,0.008802,0.038977,0.372766,0.020415,0.023436,0.028529,0.061245,705.966373,1089.058662,1847.852808,2880.268414,699.242806,1084.103901,1860.582235,2879.436019
122,/content/drive/MyDrive/IAA Trabajos/pva_wav_pa...,1.344186,123.572036,2.569603,17.745942,0.006029,4.9e-05,0.002584,0.003242,0.007751,0.054718,0.486278,0.028789,0.035105,0.046422,0.086367,668.915432,1186.011955,2317.299632,2968.926517,667.551582,1189.716325,2315.088245,2941.217579
364,/content/drive/MyDrive/IAA Trabajos/pva_wav_pa...,1.344186,221.402817,5.345403,21.275465,0.003231,1.5e-05,0.001226,0.001735,0.003677,0.03496,0.301582,0.014848,0.021836,0.038813,0.044544,838.731088,1150.628264,1561.071097,2973.402758,859.150298,1223.568332,1421.494896,2970.539123
308,/content/drive/MyDrive/IAA Trabajos/pva_wav_pa...,1.344186,214.640097,4.028785,17.900713,0.005841,2.7e-05,0.003125,0.004012,0.009374,0.047697,0.422048,0.025723,0.030637,0.035169,0.077169,709.226541,1276.260753,1847.354805,2962.101914,703.979776,1269.237922,1824.378779,2994.757361
71,/content/drive/MyDrive/IAA Trabajos/pva_wav_pa...,1.344186,155.801664,1.102093,16.074331,0.003502,2.2e-05,0.00202,0.002101,0.00606,0.063016,0.550154,0.037272,0.036575,0.041491,0.111815,750.517352,1272.737565,2448.470712,2902.181539,750.101362,1272.507219,2451.515631,2901.206146
329,/content/drive/MyDrive/IAA Trabajos/pva_wav_pa...,1.344186,264.866972,1.707236,1.553103,0.064643,0.000252,0.031329,0.032897,0.093986,0.297195,1.847494,0.103162,,,0.309485,736.48698,1087.480827,1909.515147,2757.766825,746.127651,1018.056998,1903.980799,2737.952906
202,/content/drive/MyDrive/IAA Trabajos/pva_wav_pa...,1.344186,156.350516,3.630683,21.389564,0.003136,2e-05,0.000829,0.001266,0.002486,0.050152,0.438405,0.026982,0.03444,0.04443,0.080947,729.747787,1234.085845,2101.048182,2912.976605,731.971201,1245.242409,2092.160159,2910.231696
100,/content/drive/MyDrive/IAA Trabajos/pva_wav_pa...,1.344186,249.747861,46.574699,7.984642,0.035431,0.000141,0.018017,0.020376,0.054052,0.125696,1.061414,0.044645,0.053582,0.067949,0.133934,581.695512,1146.666957,2048.866117,2800.856308,601.520233,1154.137969,2115.849936,2776.073803


In [None]:
df1

Unnamed: 0,voiceID,duration,meanF0Hz,stdevF0Hz,HNR,localJitter,localabsoluteJitter,rapJitter,ppq5Jitter,ddpJitter,localShimmer,localdbShimmer,apq3Shimmer,apq5Shimmer,apq11Shimmer,ddaShimmer,f1_mean,f2_mean,f3_mean,f4_mean,f1_median,f2_median,f3_median,f4_median
0,/content/drive/MyDrive/IAA Trabajos/pva_wav_pa...,1.344186,200.839457,8.170530,16.541212,0.010257,0.000051,0.004700,0.006250,0.014099,0.043037,0.442595,0.021867,0.028317,0.035232,0.065602,812.484547,1229.509608,1891.952253,3013.080402,814.333564,1229.865457,1859.626617,3029.760981
1,/content/drive/MyDrive/IAA Trabajos/pva_wav_pa...,1.344186,214.720424,31.898131,8.979777,0.022646,0.000104,0.011647,0.012473,0.034942,0.120260,1.022657,0.061139,0.079808,0.106589,0.183417,680.121589,1200.468374,1773.776707,3035.635154,696.239767,1207.559266,1771.173980,3044.784242
2,/content/drive/MyDrive/IAA Trabajos/pva_wav_pa...,1.344186,145.216545,47.756632,18.180372,0.006082,0.000042,0.002934,0.003381,0.008802,0.038977,0.372766,0.020415,0.023436,0.028529,0.061245,705.966373,1089.058662,1847.852808,2880.268414,699.242806,1084.103901,1860.582235,2879.436019
3,/content/drive/MyDrive/IAA Trabajos/pva_wav_pa...,1.344186,221.788614,7.236109,12.807497,0.023551,0.000107,0.012546,0.015432,0.037638,0.073603,0.801078,0.036423,0.051388,0.062333,0.109268,755.080119,1342.718510,2433.654324,3374.342223,912.367103,1346.606937,2839.049219,3660.059876
4,/content/drive/MyDrive/IAA Trabajos/pva_wav_pa...,1.344186,135.297177,11.119186,3.296878,0.070588,0.000532,0.041229,0.052293,0.123688,0.198787,1.677030,0.071443,,,0.214330,670.433360,1128.772415,2170.207770,2998.324953,671.030343,1128.155931,2174.232334,2999.079586
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
376,/content/drive/MyDrive/IAA Trabajos/pva_wav_pa...,1.344186,108.416125,5.114389,18.428638,0.007969,0.000074,0.002903,0.004255,0.008709,0.050586,0.474273,0.023114,0.031514,0.044150,0.069343,657.864950,1131.370475,2240.672396,2702.532044,660.732355,1135.279467,2233.482079,2668.037320
377,/content/drive/MyDrive/IAA Trabajos/pva_wav_pa...,1.344186,147.886980,2.696971,20.096379,0.003681,0.000025,0.001669,0.001991,0.005008,0.054365,0.530526,0.029984,0.032616,0.040272,0.089952,657.593807,1056.510262,2399.724944,3006.883565,657.732345,1047.193691,2471.422532,3015.213716
378,/content/drive/MyDrive/IAA Trabajos/pva_wav_pa...,1.344186,153.668240,14.556438,13.405543,0.004981,0.000032,0.002161,0.002535,0.006483,0.108995,1.060263,0.051857,0.067747,0.079355,0.155572,777.349936,1183.406754,1866.143043,3085.955342,755.216794,1191.496349,1868.048512,3099.075209
379,/content/drive/MyDrive/IAA Trabajos/pva_wav_pa...,1.344186,201.646817,2.471537,20.868889,0.002799,0.000014,0.001376,0.001551,0.004127,0.042447,0.358815,0.023672,0.025341,0.028839,0.071015,825.899948,1061.369201,1641.099739,3052.200468,820.763734,1038.761701,1651.811710,3057.694567


In [None]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 381 entries, 0 to 380
Data columns (total 24 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   voiceID              381 non-null    object 
 1   duration             381 non-null    float64
 2   meanF0Hz             381 non-null    float64
 3   stdevF0Hz            381 non-null    float64
 4   HNR                  381 non-null    float64
 5   localJitter          381 non-null    float64
 6   localabsoluteJitter  381 non-null    float64
 7   rapJitter            381 non-null    float64
 8   ppq5Jitter           380 non-null    float64
 9   ddpJitter            381 non-null    float64
 10  localShimmer         381 non-null    float64
 11  localdbShimmer       381 non-null    float64
 12  apq3Shimmer          379 non-null    float64
 13  apq5Shimmer          375 non-null    float64
 14  apq11Shimmer         368 non-null    float64
 15  ddaShimmer           379 non-null    flo

In [None]:
df1

Unnamed: 0,voiceID,duration,meanF0Hz,stdevF0Hz,HNR,localJitter,localabsoluteJitter,rapJitter,ppq5Jitter,ddpJitter,localShimmer,localdbShimmer,apq3Shimmer,apq5Shimmer,apq11Shimmer,ddaShimmer,f1_mean,f2_mean,f3_mean,f4_mean,f1_median,f2_median,f3_median,f4_median
0,/content/drive/MyDrive/IAA Trabajos/pva_wav_pa...,1.344186,200.839457,8.170530,16.541212,0.010257,0.000051,0.004700,0.006250,0.014099,0.043037,0.442595,0.021867,0.028317,0.035232,0.065602,812.484547,1229.509608,1891.952253,3013.080402,814.333564,1229.865457,1859.626617,3029.760981
1,/content/drive/MyDrive/IAA Trabajos/pva_wav_pa...,1.344186,214.720424,31.898131,8.979777,0.022646,0.000104,0.011647,0.012473,0.034942,0.120260,1.022657,0.061139,0.079808,0.106589,0.183417,680.121589,1200.468374,1773.776707,3035.635154,696.239767,1207.559266,1771.173980,3044.784242
2,/content/drive/MyDrive/IAA Trabajos/pva_wav_pa...,1.344186,145.216545,47.756632,18.180372,0.006082,0.000042,0.002934,0.003381,0.008802,0.038977,0.372766,0.020415,0.023436,0.028529,0.061245,705.966373,1089.058662,1847.852808,2880.268414,699.242806,1084.103901,1860.582235,2879.436019
3,/content/drive/MyDrive/IAA Trabajos/pva_wav_pa...,1.344186,221.788614,7.236109,12.807497,0.023551,0.000107,0.012546,0.015432,0.037638,0.073603,0.801078,0.036423,0.051388,0.062333,0.109268,755.080119,1342.718510,2433.654324,3374.342223,912.367103,1346.606937,2839.049219,3660.059876
4,/content/drive/MyDrive/IAA Trabajos/pva_wav_pa...,1.344186,135.297177,11.119186,3.296878,0.070588,0.000532,0.041229,0.052293,0.123688,0.198787,1.677030,0.071443,,,0.214330,670.433360,1128.772415,2170.207770,2998.324953,671.030343,1128.155931,2174.232334,2999.079586
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
376,/content/drive/MyDrive/IAA Trabajos/pva_wav_pa...,1.344186,108.416125,5.114389,18.428638,0.007969,0.000074,0.002903,0.004255,0.008709,0.050586,0.474273,0.023114,0.031514,0.044150,0.069343,657.864950,1131.370475,2240.672396,2702.532044,660.732355,1135.279467,2233.482079,2668.037320
377,/content/drive/MyDrive/IAA Trabajos/pva_wav_pa...,1.344186,147.886980,2.696971,20.096379,0.003681,0.000025,0.001669,0.001991,0.005008,0.054365,0.530526,0.029984,0.032616,0.040272,0.089952,657.593807,1056.510262,2399.724944,3006.883565,657.732345,1047.193691,2471.422532,3015.213716
378,/content/drive/MyDrive/IAA Trabajos/pva_wav_pa...,1.344186,153.668240,14.556438,13.405543,0.004981,0.000032,0.002161,0.002535,0.006483,0.108995,1.060263,0.051857,0.067747,0.079355,0.155572,777.349936,1183.406754,1866.143043,3085.955342,755.216794,1191.496349,1868.048512,3099.075209
379,/content/drive/MyDrive/IAA Trabajos/pva_wav_pa...,1.344186,201.646817,2.471537,20.868889,0.002799,0.000014,0.001376,0.001551,0.004127,0.042447,0.358815,0.023672,0.025341,0.028839,0.071015,825.899948,1061.369201,1641.099739,3052.200468,820.763734,1038.761701,1651.811710,3057.694567


In [None]:
df1.voiceID = df1.voiceID.str.replace('/content/drive/MyDrive/IAA Trabajos/Modelos/pva_wav_parkinson/','')

In [None]:
df1

Unnamed: 0,voiceID,duration,meanF0Hz,stdevF0Hz,HNR,localJitter,localabsoluteJitter,rapJitter,ppq5Jitter,ddpJitter,localShimmer,localdbShimmer,apq3Shimmer,apq5Shimmer,apq11Shimmer,ddaShimmer,f1_mean,f2_mean,f3_mean,f4_mean,f1_median,f2_median,f3_median,f4_median
0,pva_4486749_2014-01-07-195653.wav,1.344186,200.839457,8.170530,16.541212,0.010257,0.000051,0.004700,0.006250,0.014099,0.043037,0.442595,0.021867,0.028317,0.035232,0.065602,812.484547,1229.509608,1891.952253,3013.080402,814.333564,1229.865457,1859.626617,3029.760981
1,pva_7427219_2014-01-07-231044.wav,1.344186,214.720424,31.898131,8.979777,0.022646,0.000104,0.011647,0.012473,0.034942,0.120260,1.022657,0.061139,0.079808,0.106589,0.183417,680.121589,1200.468374,1773.776707,3035.635154,696.239767,1207.559266,1771.173980,3044.784242
2,pva_0099762_2014-01-08-001919.wav,1.344186,145.216545,47.756632,18.180372,0.006082,0.000042,0.002934,0.003381,0.008802,0.038977,0.372766,0.020415,0.023436,0.028529,0.061245,705.966373,1089.058662,1847.852808,2880.268414,699.242806,1084.103901,1860.582235,2879.436019
3,pva_9177692_2014-01-08-002323.wav,1.344186,221.788614,7.236109,12.807497,0.023551,0.000107,0.012546,0.015432,0.037638,0.073603,0.801078,0.036423,0.051388,0.062333,0.109268,755.080119,1342.718510,2433.654324,3374.342223,912.367103,1346.606937,2839.049219,3660.059876
4,pva_0296853_2014-01-08-010230.wav,1.344186,135.297177,11.119186,3.296878,0.070588,0.000532,0.041229,0.052293,0.123688,0.198787,1.677030,0.071443,,,0.214330,670.433360,1128.772415,2170.207770,2998.324953,671.030343,1128.155931,2174.232334,2999.079586
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
376,pva_3204173_2014-01-24-001933.wav,1.344186,108.416125,5.114389,18.428638,0.007969,0.000074,0.002903,0.004255,0.008709,0.050586,0.474273,0.023114,0.031514,0.044150,0.069343,657.864950,1131.370475,2240.672396,2702.532044,660.732355,1135.279467,2233.482079,2668.037320
377,pva_1655581_2014-01-24-033709.wav,1.344186,147.886980,2.696971,20.096379,0.003681,0.000025,0.001669,0.001991,0.005008,0.054365,0.530526,0.029984,0.032616,0.040272,0.089952,657.593807,1056.510262,2399.724944,3006.883565,657.732345,1047.193691,2471.422532,3015.213716
378,pva_2051504_2014-01-24-032302.wav,1.344186,153.668240,14.556438,13.405543,0.004981,0.000032,0.002161,0.002535,0.006483,0.108995,1.060263,0.051857,0.067747,0.079355,0.155572,777.349936,1183.406754,1866.143043,3085.955342,755.216794,1191.496349,1868.048512,3099.075209
379,pva_8154836_2014-01-24-055847.wav,1.344186,201.646817,2.471537,20.868889,0.002799,0.000014,0.001376,0.001551,0.004127,0.042447,0.358815,0.023672,0.025341,0.028839,0.071015,825.899948,1061.369201,1641.099739,3052.200468,820.763734,1038.761701,1651.811710,3057.694567


In [None]:
df_parkinson_genero_sexo = pd.read_excel('/content/drive/MyDrive/IAA Trabajos/Modelos/parkinson.xlsx')

In [None]:
df_parkinson_genero_sexo.IDAUDIO=df_parkinson_genero_sexo.IDAUDIO.str.replace('pav','pva')

In [None]:
df_parkinson_genero_sexo

Unnamed: 0,IDAUDIO,EDAD,GENERO
0,pva_3812301_2014-01-22-043016.wav,64,M
1,pva_6572221_2014-01-09-195232.wav,66,F
2,pva_2388093_2014-01-08-184005.wav,49,F
3,pva_2618085_2014-01-20-162335.wav,51,F
4,pva_4318819_2014-01-17-133237.wav,63,F
...,...,...,...
255,pva_0194489_2014-01-21-192606.wav,61,F
256,pva_5841487_2014-01-19-224515.wav,81,M
257,pva_2393731_2014-01-09-185255.wav,54,F
258,pva_9851720_2014-01-21-214713.wav,48,F


In [None]:
df_parkinson_genero_sexo.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   IDAUDIO  260 non-null    object
 1   EDAD     260 non-null    int64 
 2   GENERO   260 non-null    object
dtypes: int64(1), object(2)
memory usage: 6.2+ KB


Haciendo merge

In [None]:
df_parkinson = pd.merge(df1, pd.DataFrame(df_parkinson_genero_sexo,columns = ['IDAUDIO','EDAD','GENERO']), left_on='voiceID', right_on='IDAUDIO')

In [None]:
df_parkinson

Unnamed: 0,voiceID,duration,meanF0Hz,stdevF0Hz,HNR,localJitter,localabsoluteJitter,rapJitter,ppq5Jitter,ddpJitter,localShimmer,localdbShimmer,apq3Shimmer,apq5Shimmer,apq11Shimmer,ddaShimmer,f1_mean,f2_mean,f3_mean,f4_mean,f1_median,f2_median,f3_median,f4_median,IDAUDIO,EDAD,GENERO
0,pva_4486749_2014-01-07-195653.wav,1.344186,200.839457,8.170530,16.541212,0.010257,0.000051,0.004700,0.006250,0.014099,0.043037,0.442595,0.021867,0.028317,0.035232,0.065602,812.484547,1229.509608,1891.952253,3013.080402,814.333564,1229.865457,1859.626617,3029.760981,pva_4486749_2014-01-07-195653.wav,53,F
1,pva_0099762_2014-01-08-001919.wav,1.344186,145.216545,47.756632,18.180372,0.006082,0.000042,0.002934,0.003381,0.008802,0.038977,0.372766,0.020415,0.023436,0.028529,0.061245,705.966373,1089.058662,1847.852808,2880.268414,699.242806,1084.103901,1860.582235,2879.436019,pva_0099762_2014-01-08-001919.wav,51,F
2,pva_0296853_2014-01-08-010230.wav,1.344186,135.297177,11.119186,3.296878,0.070588,0.000532,0.041229,0.052293,0.123688,0.198787,1.677030,0.071443,,,0.214330,670.433360,1128.772415,2170.207770,2998.324953,671.030343,1128.155931,2174.232334,2999.079586,pva_0296853_2014-01-08-010230.wav,62,M
3,pva_7796654_2014-01-08-031028.wav,1.344186,114.763184,4.507640,14.704079,0.010702,0.000093,0.005045,0.005974,0.015135,0.095745,0.875451,0.044557,0.067176,0.092615,0.133671,639.860913,1098.130458,1576.843641,2934.184458,636.538920,1112.212007,1562.150767,2962.291819,pva_7796654_2014-01-08-031028.wav,67,M
4,pva_1748808_2014-01-08-102953.wav,1.344186,181.496237,7.748818,16.444203,0.006176,0.000034,0.003138,0.003678,0.009415,0.061994,0.529230,0.030248,0.037124,0.048461,0.090743,790.558148,1218.161920,2478.447352,2899.010989,791.712816,1215.810282,2490.737363,2898.580614,pva_1748808_2014-01-08-102953.wav,68,M
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
249,pva_7525265_2014-01-23-194046.wav,1.344186,144.814484,1.830275,22.539868,0.002167,0.000015,0.000847,0.001055,0.002541,0.029463,0.265232,0.013974,0.017917,0.028580,0.041922,667.454626,1175.181894,1621.314975,2728.072253,678.859089,1170.697032,1664.209418,2721.585818,pva_7525265_2014-01-23-194046.wav,58,M
250,pva_1655581_2014-01-24-033709.wav,1.344186,147.886980,2.696971,20.096379,0.003681,0.000025,0.001669,0.001991,0.005008,0.054365,0.530526,0.029984,0.032616,0.040272,0.089952,657.593807,1056.510262,2399.724944,3006.883565,657.732345,1047.193691,2471.422532,3015.213716,pva_1655581_2014-01-24-033709.wav,78,M
251,pva_2051504_2014-01-24-032302.wav,1.344186,153.668240,14.556438,13.405543,0.004981,0.000032,0.002161,0.002535,0.006483,0.108995,1.060263,0.051857,0.067747,0.079355,0.155572,777.349936,1183.406754,1866.143043,3085.955342,755.216794,1191.496349,1868.048512,3099.075209,pva_2051504_2014-01-24-032302.wav,84,F
252,pva_8154836_2014-01-24-055847.wav,1.344186,201.646817,2.471537,20.868889,0.002799,0.000014,0.001376,0.001551,0.004127,0.042447,0.358815,0.023672,0.025341,0.028839,0.071015,825.899948,1061.369201,1641.099739,3052.200468,820.763734,1038.761701,1651.811710,3057.694567,pva_8154836_2014-01-24-055847.wav,77,F


UNIENDO DATASETS


Añadiendo etiquetas(status)

In [None]:
df_sanos = df_sanos.assign(status=0)

In [None]:
df_parkinson = df_parkinson.assign(status=1)

In [None]:
df_sanos.describe

<bound method NDFrame.describe of           voiceID  duration    meanF0Hz  ...  EDAD  GENERO  status
0       9-a_n.wav   0.83818   90.784174  ...    25       M       0
1      43-a_n.wav   0.84934  136.813012  ...    32       M       0
2      98-a_n.wav   0.90162   82.174521  ...    27       M       0
3     706-a_n.wav   0.81964  114.824357  ...    27       M       0
4     679-a_n.wav   1.68502   96.931127  ...    29       M       0
..            ...       ...         ...  ...   ...     ...     ...
139  2040-a_n.wav   1.47340  233.085114  ...    26       F       0
140  2206-a_n.wav   1.93166  226.738675  ...    26       F       0
141  2250-a_n.wav   0.71944  224.485784  ...    29       F       0
142  2203-a_n.wav   0.71770  228.453707  ...    26       F       0
143  2163-a_n.wav   0.90758  229.530945  ...    34       F       0

[144 rows x 28 columns]>

In [None]:
df_sanos

Unnamed: 0,voiceID,duration,meanF0Hz,stdevF0Hz,HNR,localJitter,localabsoluteJitter,rapJitter,ppq5Jitter,ddpJitter,localShimmer,localdbShimmer,apq3Shimmer,apq5Shimmer,apq11Shimmer,ddaShimmer,f1_mean,f2_mean,f3_mean,f4_mean,f1_median,f2_median,f3_median,f4_median,IDAUDIO,EDAD,GENERO,status
0,9-a_n.wav,0.83818,90.784174,0.947235,25.407817,0.006059,0.000067,0.003237,0.003859,0.009712,0.019526,0.171497,0.007930,0.011484,0.027055,0.023789,641.228106,1072.190481,2136.205482,3374.665433,641.699583,1072.672415,2136.637191,3379.368924,9-a_n.wav,25,M,0
1,43-a_n.wav,0.84934,136.813012,0.751588,27.096361,0.001998,0.000015,0.001014,0.001222,0.003041,0.009837,0.085955,0.005420,0.005866,0.007634,0.016260,687.991629,1159.419654,2178.392632,3366.234438,687.554816,1159.957480,2179.437399,3366.339045,43-a_n.wav,32,M,0
2,98-a_n.wav,0.90162,82.174521,1.020332,22.921196,0.004288,0.000052,0.001936,0.002594,0.005807,0.043013,0.369315,0.016265,0.025184,0.044935,0.048795,680.371789,1102.937730,2609.615425,3414.517949,684.944218,1102.338219,2615.257400,3419.859496,98-a_n.wav,27,M,0
3,706-a_n.wav,0.81964,114.824357,0.969736,20.727258,0.004635,0.000040,0.002372,0.002763,0.007116,0.045824,0.402363,0.024257,0.028993,0.040920,0.072772,641.543794,1184.566012,2187.001724,4055.033562,657.455262,1193.523262,2186.825689,4037.905096,706-a_n.wav,27,M,0
4,679-a_n.wav,1.68502,96.931127,1.304410,23.928955,0.003973,0.000041,0.001891,0.002115,0.005672,0.021713,0.187253,0.010614,0.012648,0.022631,0.031842,675.528536,1116.511773,2724.800406,3247.023765,672.207101,1115.429549,2725.685239,3247.704953,679-a_n.wav,29,M,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,2040-a_n.wav,1.47340,233.085114,1.261945,24.920506,0.002633,0.000011,0.001419,0.001705,0.004257,0.019656,0.172557,0.009137,0.012375,0.020402,0.027410,636.099570,1420.801066,2495.878129,4213.178157,640.604560,1419.040430,2497.791388,4207.868930,2040-a_n.wav,26,F,0
140,2206-a_n.wav,1.93166,226.738675,1.483066,19.585316,0.005091,0.000022,0.002969,0.003152,0.008907,0.033643,0.299413,0.018620,0.020871,0.026655,0.055860,802.176490,1451.605153,2801.535410,4029.009589,805.163043,1448.675822,2802.416454,4024.320392,2206-a_n.wav,26,F,0
141,2250-a_n.wav,0.71944,224.485784,1.676993,22.927223,0.004787,0.000021,0.002603,0.003395,0.007809,0.023115,0.200756,0.012485,0.014693,0.018622,0.037455,780.952966,1263.634291,2153.119867,3896.618065,800.234704,1349.095660,2316.573070,4256.561518,2250-a_n.wav,29,F,0
142,2203-a_n.wav,0.71770,228.453707,3.780583,26.383066,0.002159,0.000009,0.001233,0.001274,0.003699,0.022832,0.198038,0.012298,0.014395,0.018396,0.036893,894.175393,1393.370278,2736.448972,3763.905466,892.456542,1393.817853,2738.257683,3765.157281,2203-a_n.wav,26,F,0


In [None]:
df_parkinson.describe

<bound method NDFrame.describe of                                voiceID  duration  ...  GENERO  status
0    pva_4486749_2014-01-07-195653.wav  1.344186  ...       F       1
1    pva_0099762_2014-01-08-001919.wav  1.344186  ...       F       1
2    pva_0296853_2014-01-08-010230.wav  1.344186  ...       M       1
3    pva_7796654_2014-01-08-031028.wav  1.344186  ...       M       1
4    pva_1748808_2014-01-08-102953.wav  1.344186  ...       M       1
..                                 ...       ...  ...     ...     ...
249  pva_7525265_2014-01-23-194046.wav  1.344186  ...       M       1
250  pva_1655581_2014-01-24-033709.wav  1.344186  ...       M       1
251  pva_2051504_2014-01-24-032302.wav  1.344186  ...       F       1
252  pva_8154836_2014-01-24-055847.wav  1.344186  ...       F       1
253  pva_7304504_2014-01-24-170501.wav  1.344186  ...       F       1

[254 rows x 28 columns]>

In [None]:
scaler = StandardScaler()

In [None]:
x=df_sanos.drop(["voiceID","IDAUDIO",'GENERO','status','EDAD'],axis=1)
x_voiceID = df_sanos["voiceID"]
x_IDAUDIO = df_sanos["IDAUDIO"]
x_GENERO = df_sanos["GENERO"]
x_status = df_sanos["status"]
x_edad = df_sanos["EDAD"]

In [None]:
df_sanos_escalado = pd.DataFrame(scaler.fit_transform(x),columns = x.columns)

In [None]:
df_sanos_escalado["voiceID"] = x_voiceID
df_sanos_escalado["IDAUDIO"] = x_IDAUDIO
df_sanos_escalado["GENERO"] = x_GENERO
df_sanos_escalado["status"] = x_status
df_sanos_escalado["EDAD"] = x_edad

In [None]:
df_sanos_escalado

Unnamed: 0,duration,meanF0Hz,stdevF0Hz,HNR,localJitter,localabsoluteJitter,rapJitter,ppq5Jitter,ddpJitter,localShimmer,localdbShimmer,apq3Shimmer,apq5Shimmer,apq11Shimmer,ddaShimmer,f1_mean,f2_mean,f3_mean,f4_mean,f1_median,f2_median,f3_median,f4_median
0,-1.206574,-1.354588,-0.303164,0.402110,0.472487,1.210365,0.359788,0.858594,0.359788,-0.469384,-0.477198,-0.557752,-0.504295,0.204492,-0.557752,-0.331076,-0.506054,-1.084220,-0.819439,-0.352048,-0.458909,-0.996196,-0.800032
1,-1.179963,-0.460067,-0.330749,0.849439,-0.733118,-0.615986,-0.671915,-0.819227,-0.671915,-0.920559,-0.935861,-0.739379,-0.979465,-1.227035,-0.739379,0.136170,0.134013,-0.948048,-0.845725,0.074579,0.076294,-0.874163,-0.837042
2,-1.055301,-1.521907,-0.292858,-0.256646,-0.053352,0.700375,-0.244209,0.053655,-0.244209,0.624239,0.583471,0.045494,0.654432,1.522463,0.045494,0.060035,-0.280438,0.443856,-0.695186,0.050291,-0.277008,0.368462,-0.685022
3,-1.250783,-0.887393,-0.299992,-0.837863,0.049755,0.286564,-0.041760,0.161088,-0.041760,0.755133,0.760671,0.623935,0.976582,1.226531,0.623935,-0.327921,0.318532,-0.920259,1.301831,-0.205461,0.282108,-0.853097,1.070475
4,0.812720,-1.235128,-0.252804,0.010330,-0.146719,0.308419,-0.265006,-0.250871,-0.265006,-0.367553,-0.392718,-0.363487,-0.405812,-0.121602,-0.363487,0.011643,-0.180835,0.815651,-1.217404,-0.068213,-0.196736,0.683318,-1.174010
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,0.308111,1.410877,-0.258792,0.273012,-0.544773,-0.731950,-0.483751,-0.511712,-0.483751,-0.463321,-0.471516,-0.470390,-0.428963,-0.285927,-0.470390,-0.382318,2.051970,0.076734,1.794898,-0.362236,1.664904,0.033539,1.553240
140,1.400834,1.287541,-0.227615,-1.140387,0.185038,-0.341045,0.235198,0.408502,0.235198,0.187959,0.208666,0.215945,0.289649,0.175058,0.215945,1.277067,2.278004,1.063337,1.220692,1.168782,1.846619,0.902096,1.031889
141,-1.489710,1.243759,-0.200272,-0.255049,0.094878,-0.380661,0.065458,0.563383,0.065458,-0.302268,-0.320316,-0.228078,-0.232838,-0.417093,-0.228078,1.065009,0.898716,-1.029623,0.807919,1.122930,1.236026,-0.483157,1.691546
142,-1.493859,1.320871,0.096321,0.660473,-0.685554,-0.796603,-0.570102,-0.785928,-0.570102,-0.315474,-0.334889,-0.241619,-0.258063,-0.433727,-0.241619,2.196290,1.850689,0.853251,0.394144,1.980943,1.510248,0.719165,0.295762


In [None]:
y=df_parkinson.drop(["voiceID","IDAUDIO",'GENERO','status','EDAD'],axis=1)
y_voiceID = df_parkinson["voiceID"]
y_IDAUDIO = df_parkinson["IDAUDIO"]
y_GENERO = df_parkinson["GENERO"]
y_status = df_parkinson["status"]
y_edad = df_parkinson["EDAD"]

In [None]:
df_parkinson_escalado = pd.DataFrame(scaler.fit_transform(y),columns = y.columns)

In [None]:
df_parkinson_escalado["voiceID"] = y_voiceID
df_parkinson_escalado["IDAUDIO"] = y_IDAUDIO
df_parkinson_escalado["GENERO"] = y_GENERO
df_parkinson_escalado["status"] = y_status
df_parkinson_escalado["EDAD"] = y_edad

In [None]:
df_parkinson_escalado

Unnamed: 0,duration,meanF0Hz,stdevF0Hz,HNR,localJitter,localabsoluteJitter,rapJitter,ppq5Jitter,ddpJitter,localShimmer,localdbShimmer,apq3Shimmer,apq5Shimmer,apq11Shimmer,ddaShimmer,f1_mean,f2_mean,f3_mean,f4_mean,f1_median,f2_median,f3_median,f4_median,voiceID,IDAUDIO,GENERO,status,EDAD
0,1.0,0.705031,-0.119330,0.026961,0.076613,-0.118970,0.002240,0.104903,0.002240,-0.527179,-0.411514,-0.528957,-0.406286,-0.483514,-0.528957,1.193573,0.609390,-0.346559,0.857561,1.197119,0.604692,-0.430227,0.932420,pva_4486749_2014-01-07-195653.wav,pva_4486749_2014-01-07-195653.wav,F,1,53
1,1.0,-0.454084,2.877858,0.344638,-0.247594,-0.240427,-0.251267,-0.211987,-0.251267,-0.613066,-0.579602,-0.590965,-0.570144,-0.672466,-0.590965,0.089669,-0.637033,-0.492673,-0.218708,0.016991,-0.643435,-0.427193,-0.208540,pva_0099762_2014-01-08-001919.wav,pva_0099762_2014-01-08-001919.wav,F,1,51
2,1.0,-0.660791,0.103922,-2.539850,4.761496,6.023914,5.246498,5.190402,5.246498,2.767545,2.559932,1.587833,,,1.587833,-0.278579,-0.284596,0.575380,0.737987,-0.272297,-0.266226,0.568792,0.699550,pva_0296853_2014-01-08-010230.wav,pva_0296853_2014-01-08-010230.wav,M,1,62
3,1.0,-1.088695,-0.396659,-0.329083,0.111147,0.419462,0.051828,0.074443,0.051828,0.587811,0.630426,0.439852,0.898251,1.134116,0.439852,-0.595417,-0.556526,-1.390603,0.218211,-0.625968,-0.402751,-1.374851,0.420332,pva_7796654_2014-01-08-031028.wav,pva_7796654_2014-01-08-031028.wav,M,1,67
4,1.0,0.301942,-0.151260,0.008161,-0.240283,-0.336894,-0.221908,-0.179107,-0.221908,-0.126160,-0.202974,-0.171138,-0.110631,-0.110581,-0.171138,0.966338,0.508685,1.596665,-0.066824,0.965168,0.484340,1.573842,-0.063233,pva_1748808_2014-01-08-102953.wav,pva_1748808_2014-01-08-102953.wav,M,1,68
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
249,1.0,-0.462462,-0.599371,1.189527,-0.551618,-0.580428,-0.550847,-0.468805,-0.550847,-0.814315,-0.838452,-0.865987,-0.755410,-0.671037,-0.865987,-0.309449,0.127262,-1.243257,-1.452062,-0.192022,0.098045,-1.050768,-1.406617,pva_7525265_2014-01-23-194046.wav,pva_7525265_2014-01-23-194046.wav,M,1,58
250,1.0,-0.398435,-0.533751,0.715969,-0.434049,-0.453690,-0.432826,-0.365476,-0.432826,-0.287538,-0.199855,-0.182390,-0.261947,-0.341424,-0.182390,-0.411642,-0.925881,1.335835,0.807344,-0.408653,-0.959490,1.512509,0.822007,pva_1655581_2014-01-24-033709.wav,pva_1655581_2014-01-24-033709.wav,M,1,78
251,1.0,-0.277960,0.364167,-0.580745,-0.333113,-0.358581,-0.362219,-0.305404,-0.362219,0.868098,1.075293,0.751551,0.917406,0.760315,0.751551,0.829454,0.200253,-0.432073,1.448119,0.590942,0.276145,-0.403484,1.458513,pva_2051504_2014-01-24-032302.wav,pva_2051504_2014-01-24-032302.wav,F,1,84
252,1.0,0.721856,-0.550819,0.865684,-0.502535,-0.594237,-0.474977,-0.414051,-0.474977,-0.539665,-0.613185,-0.451913,-0.506179,-0.663736,-0.451913,1.332604,-0.882761,-1.177705,1.174579,1.263053,-1.031691,-1.090136,1.144435,pva_8154836_2014-01-24-055847.wav,pva_8154836_2014-01-24-055847.wav,F,1,77


In [None]:
df_union = pd.concat([df_sanos, df_parkinson])

In [None]:
df_union_escalado = pd.concat([df_sanos_escalado, df_parkinson_escalado])

In [None]:
df_union

Unnamed: 0,voiceID,duration,meanF0Hz,stdevF0Hz,HNR,localJitter,localabsoluteJitter,rapJitter,ppq5Jitter,ddpJitter,localShimmer,localdbShimmer,apq3Shimmer,apq5Shimmer,apq11Shimmer,ddaShimmer,f1_mean,f2_mean,f3_mean,f4_mean,f1_median,f2_median,f3_median,f4_median,IDAUDIO,EDAD,GENERO,status
0,1000-a_n.wav,1.13120,106.344493,3.130821,21.645715,0.006009,0.000057,0.002307,0.003618,0.006920,0.031649,0.280745,0.016429,0.019516,0.027407,0.049287,493.748977,1031.263321,2279.838566,4140.811754,492.531721,1030.645536,2280.248380,4143.747930,1000-a_n.wav,31,M,0
1,145-a_n.wav,0.97002,224.922355,1.935428,23.573091,0.002358,0.000010,0.001238,0.001323,0.003714,0.024186,0.212943,0.012458,0.014558,0.020476,0.037374,690.083369,1355.901264,2985.243133,4071.046254,688.866021,1356.431954,2976.197315,4068.760685,145-a_n.wav,28,F,0
2,1278-a_n.wav,1.66352,200.923193,2.274124,25.433743,0.005071,0.000025,0.003127,0.002779,0.009380,0.030630,0.269344,0.017588,0.018340,0.019557,0.052763,799.080462,1320.220970,2902.116608,4050.867971,791.329638,1323.871591,2919.166164,4056.091223,1278-a_n.wav,34,F,0
3,1352-a_n.wav,1.34984,214.149866,1.105750,23.097469,0.007222,0.000034,0.004691,0.003468,0.014074,0.019403,0.167589,0.010699,0.011452,0.016516,0.032098,657.975046,1129.622174,2680.795743,3458.638514,656.745890,1128.599196,2677.815173,3458.432306,1352-a_n.wav,35,F,0
4,1180-a_n.wav,1.97650,220.829172,1.586846,21.516045,0.005504,0.000025,0.003326,0.003118,0.009978,0.024362,0.221930,0.012475,0.015435,0.020531,0.037426,753.356862,1281.375046,2476.119035,3746.676762,751.632864,1288.289241,2473.047470,3723.773439,1180-a_n.wav,28,F,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
249,pva_9227397_2014-01-13-190728.wav,1.34000,115.335579,2.463644,19.399020,0.004775,0.000041,0.002139,0.002171,0.006418,0.044065,0.405501,0.023233,0.028654,0.037740,0.069700,598.126849,1121.876784,2289.885995,2940.128002,596.538910,1127.717424,2293.476921,2940.294387,pva_9227397_2014-01-13-190728.wav,59,M,1
250,pva_9040339_2014-01-11-191437.wav,1.34000,251.244326,2.580495,21.321007,0.003770,0.000015,0.002097,0.001867,0.006291,0.038254,0.333767,0.020327,0.024234,0.030167,0.060982,716.296434,1232.842231,1853.325261,2871.533516,716.190940,1230.147721,1859.037024,2878.794528,pva_9040339_2014-01-11-191437.wav,71,F,1
251,pva_9245595_2014-01-12-201731.wav,1.34000,215.850798,6.011179,21.911093,0.003863,0.000018,0.001691,0.002437,0.005073,0.040784,0.358763,0.020680,0.026846,0.038979,0.062039,783.766292,1192.150521,1492.043931,2960.785840,786.437478,1193.335449,1455.385947,3001.089994,pva_9245595_2014-01-12-201731.wav,56,F,1
252,pva_9100523_2014-01-12-055713.wav,1.34000,186.423984,22.592263,7.674644,0.029789,0.000161,0.015419,0.018037,0.046257,0.159181,1.403237,0.088372,0.102084,0.126112,0.265117,835.637827,1356.036631,1894.254907,2796.264716,843.939702,1387.222313,1914.751981,2791.877854,pva_9100523_2014-01-12-055713.wav,68,F,1


In [None]:
df_union_escalado

Unnamed: 0,duration,meanF0Hz,stdevF0Hz,HNR,localJitter,localabsoluteJitter,rapJitter,ppq5Jitter,ddpJitter,localShimmer,localdbShimmer,apq3Shimmer,apq5Shimmer,apq11Shimmer,ddaShimmer,f1_mean,f2_mean,f3_mean,f4_mean,f1_median,f2_median,f3_median,f4_median,voiceID,IDAUDIO,GENERO,status,EDAD
0,-0.507866,-1.052190,0.004709,-0.594546,0.457746,0.851633,-0.072099,0.705331,-0.072099,0.095085,0.108571,0.057358,0.175047,0.230483,0.057358,-1.804638,-0.806369,-0.620600,1.569272,-1.739876,-0.716603,-0.586727,1.371111,1000-a_n.wav,1000-a_n.wav,M,0,31
1,-0.892201,1.252243,-0.163835,-0.083945,-0.626302,-0.760353,-0.567786,-0.755239,-0.567786,-0.252426,-0.254970,-0.230016,-0.244260,-0.280406,-0.230016,0.157070,1.575750,1.656310,1.351755,0.086778,1.281010,1.397586,1.158117,145-a_n.wav,145-a_n.wav,F,0,28
2,0.761453,0.785845,-0.116081,0.408978,0.179139,-0.243565,0.308373,0.171201,0.308373,0.047663,0.047441,0.141220,0.075554,-0.348159,0.141220,1.246133,1.313937,1.387994,1.288843,1.040079,1.081361,1.234977,1.122131,1278-a_n.wav,1278-a_n.wav,F,0,34
3,0.013482,1.042891,-0.280814,-0.209947,0.817894,0.053811,1.034264,0.609801,1.034264,-0.475126,-0.498156,-0.357296,-0.506949,-0.572325,-0.357296,-0.163746,-0.084634,0.673613,-0.557626,-0.212061,-0.115984,0.546829,-0.575460,1352-a_n.wav,1352-a_n.wav,F,0,35
4,1.507755,1.172696,-0.212983,-0.628898,0.307844,-0.254434,0.400842,0.387125,0.400842,-0.244195,-0.206786,-0.228762,-0.170083,-0.276367,-0.228762,0.789278,1.028894,0.012956,0.340428,0.670748,0.863182,-0.037012,0.178215,1180-a_n.wav,1180-a_n.wav,F,0,28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
249,0.000000,-1.079966,-0.537340,0.584331,-0.332986,-0.232331,-0.337728,-0.308727,-0.337728,-0.436943,-0.493373,-0.482746,-0.436784,-0.401749,-0.482746,-1.024963,-0.349751,0.971003,0.265457,-1.035782,-0.275860,0.946351,0.251232,pva_9227397_2014-01-13-190728.wav,pva_9227397_2014-01-13-190728.wav,M,1,59
250,0.000000,1.755378,-0.528526,0.955560,-0.404684,-0.539148,-0.342800,-0.338497,-0.342800,-0.538266,-0.658688,-0.612099,-0.604016,-0.600580,-0.612099,0.191802,0.633406,-0.474193,-0.290104,0.186350,0.601913,-0.433198,-0.216658,pva_9040339_2014-01-11-191437.wav,pva_9040339_2014-01-11-191437.wav,F,1,71
251,0.000000,1.016994,-0.269741,1.069535,-0.398074,-0.505411,-0.391456,-0.282714,-0.391456,-0.494155,-0.601083,-0.596411,-0.505220,-0.369232,-0.596411,0.886524,0.272876,-1.670182,0.432769,0.903853,0.286452,-1.714978,0.713764,pva_9245595_2014-01-12-201731.wav,pva_9245595_2014-01-12-201731.wav,F,1,56
252,0.000000,0.403089,0.981012,-1.680214,1.452493,1.154100,1.254330,1.243250,1.254330,1.570181,1.805986,2.416752,2.341128,1.918576,2.416752,1.420634,1.724912,-0.338699,-0.899722,1.491184,1.947958,-0.256277,-0.877919,pva_9100523_2014-01-12-055713.wav,pva_9100523_2014-01-12-055713.wav,F,1,68


Exportando Dataset Final

In [None]:
df_union.to_excel("/content/drive/MyDrive/IAA Trabajos/Modelos/union.xlsx", index=False)

In [None]:
df_union_escalado.to_excel("/content/drive/MyDrive/IAA Trabajos/Modelos/union_escalado.xlsx", index=False)