In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
import torchaudio
import torch
import numpy as np
import pandas as pd
import os
import pickle
import re
import torchaudio.transforms as T
import math
import librosa
import librosa.display
import matplotlib.patches as patches
from glob import glob
import util
from util import audio_file
from util import *
from Losses import FocalLoss,WeightedFocalLoss
from StackedLSTM2 import StackedLSTM
from AttentionLSTM import Attention_LSTM
from Toy_Model import ToyModel
import time
import parselmouth
from parselmouth.praat import call
from scipy.stats.mstats import zscore
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import statistics

In [3]:
#https://github.com/drfeinberg/PraatScripts/blob/master/Measure%20Pitch%2C%20HNR%2C%20Jitter%2C%20Shimmer%2C%20and%20Formants.ipynb

def measureFormants(sound, wave_file, f0min,f0max):
    sound = parselmouth.Sound(sound) # read the sound
    pitch = call(sound, "To Pitch (cc)", 0, f0min, 15, 'no', 0.03, 0.45, 0.01, 0.35, 0.14, f0max)
    pointProcess = call(sound, "To PointProcess (periodic, cc)", f0min, f0max)
    
    formants = call(sound, "To Formant (burg)", 0.0025, 5, 5000, 0.025, 50)
    numPoints = call(pointProcess, "Get number of points")

    f1_list = []
    f2_list = []
    f3_list = []
    f4_list = []
    
    # Measure formants only at glottal pulses
    for point in range(0, numPoints):
        point += 1
        t = call(pointProcess, "Get time from index", point)
        f1 = call(formants, "Get value at time", 1, t, 'Hertz', 'Linear')
        f2 = call(formants, "Get value at time", 2, t, 'Hertz', 'Linear')
        f3 = call(formants, "Get value at time", 3, t, 'Hertz', 'Linear')
        f4 = call(formants, "Get value at time", 4, t, 'Hertz', 'Linear')
        f1_list.append(f1)
        f2_list.append(f2)
        f3_list.append(f3)
        f4_list.append(f4)
    
    f1_list = [f1 for f1 in f1_list if str(f1) != 'nan']
    f2_list = [f2 for f2 in f2_list if str(f2) != 'nan']
    f3_list = [f3 for f3 in f3_list if str(f3) != 'nan']
    f4_list = [f4 for f4 in f4_list if str(f4) != 'nan']
    
    # calculate mean formants across pulses
    f1_mean = statistics.mean(f1_list)
    f2_mean = statistics.mean(f2_list)
    f3_mean = statistics.mean(f3_list)
    f4_mean = statistics.mean(f4_list)
    
    # calculate median formants across pulses, this is what is used in all subsequent calcualtions
    # you can use mean if you want, just edit the code in the boxes below to replace median with mean
    f1_median = statistics.median(f1_list)
    f2_median = statistics.median(f2_list)
    f3_median = statistics.median(f3_list)
    f4_median = statistics.median(f4_list)
    
    return f1_mean, f2_mean, f3_mean, f4_mean, f1_median, f2_median, f3_median, f4_median

def measurePitch(voiceID, f0min, f0max, unit):
    sound = parselmouth.Sound(voiceID) # read the sound
    duration = call(sound, "Get total duration") # duration
    pitch = call(sound, "To Pitch", 0.0, f0min, f0max) #create a praat pitch object
    meanF0 = call(pitch, "Get mean", 0, 0, unit) # get mean pitch
    stdevF0 = call(pitch, "Get standard deviation", 0 ,0, unit) # get standard deviation
    harmonicity = call(sound, "To Harmonicity (cc)", 0.01, f0min, 0.1, 1.0)
    hnr = call(harmonicity, "Get mean", 0, 0)
    pointProcess = call(sound, "To PointProcess (periodic, cc)", f0min, f0max)
    localJitter = call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3)
    localabsoluteJitter = call(pointProcess, "Get jitter (local, absolute)", 0, 0, 0.0001, 0.02, 1.3)
    rapJitter = call(pointProcess, "Get jitter (rap)", 0, 0, 0.0001, 0.02, 1.3)
    ppq5Jitter = call(pointProcess, "Get jitter (ppq5)", 0, 0, 0.0001, 0.02, 1.3)
    ddpJitter = call(pointProcess, "Get jitter (ddp)", 0, 0, 0.0001, 0.02, 1.3)
    localShimmer =  call([sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    localdbShimmer = call([sound, pointProcess], "Get shimmer (local_dB)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    apq3Shimmer = call([sound, pointProcess], "Get shimmer (apq3)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    aqpq5Shimmer = call([sound, pointProcess], "Get shimmer (apq5)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    apq11Shimmer =  call([sound, pointProcess], "Get shimmer (apq11)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    ddaShimmer = call([sound, pointProcess], "Get shimmer (dda)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    
    return duration, meanF0, stdevF0, hnr, localJitter, localabsoluteJitter, rapJitter, ppq5Jitter, ddpJitter, localShimmer, localdbShimmer, apq3Shimmer, aqpq5Shimmer, apq11Shimmer, ddaShimmer

def runPCA(df):
    # z-score the Jitter and Shimmer measurements
    measures = ['localJitter', 'localabsoluteJitter', 'rapJitter', 'ppq5Jitter', 'ddpJitter',
                'localShimmer', 'localdbShimmer', 'apq3Shimmer', 'apq5Shimmer', 'apq11Shimmer', 'ddaShimmer']
    x = df.loc[:, measures].values
    x = StandardScaler().fit_transform(x)
    # PCA
    pca = PCA(n_components=2)
    principalComponents = pca.fit_transform(x)
    principalDf = pd.DataFrame(data = principalComponents, columns = ['JitterPCA', 'ShimmerPCA'])
    principalDf
    return principalDf

In [None]:
pkl_path = '/project/graziul/ra/ajays/whitelisted_vad_dict.pkl'
file = open(pkl_path,'rb')
vad_dict = pickle.load(file)
file.close()

# create lists to put the results
file_list = []
duration_list = []
mean_F0_list = []
sd_F0_list = []
hnr_list = []
localJitter_list = []
localabsoluteJitter_list = []
rapJitter_list = []
ppq5Jitter_list = []
ddpJitter_list = []
localShimmer_list = []
localdbShimmer_list = []
apq3Shimmer_list = []
aqpq5Shimmer_list = []
apq11Shimmer_list = []
ddaShimmer_list = []
f1_mean_list = []
f2_mean_list = []
f3_mean_list = []
f4_mean_list = []
f1_median_list = []
f2_median_list = []
f3_median_list = []
f4_median_list = []

for idx,wave_file in enumerate(vad_dict):
        print(wave_file)
        sound = parselmouth.Sound(wave_file)
        (duration, meanF0, stdevF0, hnr, localJitter, localabsoluteJitter, rapJitter, ppq5Jitter, ddpJitter, 
         localShimmer, localdbShimmer, apq3Shimmer, aqpq5Shimmer, apq11Shimmer, ddaShimmer) = measurePitch(
            sound, 75, 300, "Hertz")
        (f1_mean, f2_mean, f3_mean, f4_mean, f1_median, f2_median, f3_median, f4_median) = measureFormants(
            sound, wave_file, 75, 300)
        file_list.append(wave_file) # make an ID list
        duration_list.append(duration) # make duration list
        mean_F0_list.append(meanF0) # make a mean F0 list
        sd_F0_list.append(stdevF0) # make a sd F0 list
        hnr_list.append(hnr) #add HNR data

        # add raw jitter and shimmer measures
        localJitter_list.append(localJitter)
        localabsoluteJitter_list.append(localabsoluteJitter)
        rapJitter_list.append(rapJitter)
        ppq5Jitter_list.append(ppq5Jitter)
        ddpJitter_list.append(ddpJitter)
        localShimmer_list.append(localShimmer)
        localdbShimmer_list.append(localdbShimmer)
        apq3Shimmer_list.append(apq3Shimmer)
        aqpq5Shimmer_list.append(aqpq5Shimmer)
        apq11Shimmer_list.append(apq11Shimmer)
        ddaShimmer_list.append(ddaShimmer)

        # add the formant data
        f1_mean_list.append(f1_mean)
        f2_mean_list.append(f2_mean)
        f3_mean_list.append(f3_mean)
        f4_mean_list.append(f4_mean)
        f1_median_list.append(f1_median)
        f2_median_list.append(f2_median)
        f3_median_list.append(f3_median)
        f4_median_list.append(f4_median)

/project/graziul/data/Zone1/2018_08_10/201808100021-669974-27730.mp3


In [None]:
df = pd.DataFrame(np.column_stack([file_list, duration_list, mean_F0_list, sd_F0_list, hnr_list, 
                                   localJitter_list, localabsoluteJitter_list, rapJitter_list, 
                                   ppq5Jitter_list, ddpJitter_list, localShimmer_list, 
                                   localdbShimmer_list, apq3Shimmer_list, aqpq5Shimmer_list, 
                                   apq11Shimmer_list, ddaShimmer_list, f1_mean_list, 
                                   f2_mean_list, f3_mean_list, f4_mean_list, 
                                   f1_median_list, f2_median_list, f3_median_list, 
                                   f4_median_list]),
                                   columns=['voiceID', 'duration', 'meanF0Hz', 'stdevF0Hz', 'HNR', 
                                            'localJitter', 'localabsoluteJitter', 'rapJitter', 
                                            'ppq5Jitter', 'ddpJitter', 'localShimmer', 
                                            'localdbShimmer', 'apq3Shimmer', 'apq5Shimmer', 
                                            'apq11Shimmer', 'ddaShimmer', 'f1_mean', 'f2_mean', 
                                            'f3_mean', 'f4_mean', 'f1_median', 
                                            'f2_median', 'f3_median', 'f4_median'])

pcaData = runPCA(df) # Run jitter and shimmer PCA
df = pd.concat([df, pcaData], axis=1) # Add PCA data
# reload the data so it's all numbers
df.to_csv("processed_results.csv", index=False)
df = pd.read_csv('processed_results.csv', header=0)
df.sort_values('voiceID').head(20)

In [1]:
import pickle
import opensmile
import time
import tracemalloc
%load_ext memory_profiler
from memory_profiler import profile


pkl_path = '/project/graziul/ra/ajays/whitelisted_vad_dict.pkl'
file = open(pkl_path,'rb')
vad_dict = pickle.load(file)
file.close()

In [2]:
from feature_extractor import *
lld_list = ['rms','log']
func_list = ['stddev','amean']
config_str = make_config_string(lld_list,func_list)

with open('my.conf', 'w') as fp:
    fp.truncate(0)
    fp.write(config_str)

test_file = list(vad_dict.keys())[0]
    
smile = opensmile.Smile(
    feature_set='my.conf',
    feature_level='func',
)

start_time = time.time()
tracemalloc.start()
%memit y = smile.process_file(test_file)
total_time = time.time() - start_time
print("Memory Usage: " + str(tracemalloc.get_traced_memory()[1]) + " bytes")
print("Time taken to load = " + str(total_time) + " seconds")
tracemalloc.stop()

rms = 1
log = 1

Moments.stddev = 1
Moments.amean = 1

peak memory: 1251.50 MiB, increment: 1139.00 MiB
Memory Usage: 565819202 bytes
Time taken to load = 6.766913175582886 seconds


In [3]:
y.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,pcm_RMSenergy_variance,pcm_RMSenergy_stddev,pcm_RMSenergy_skewness,pcm_RMSenergy_kurtosis,pcm_RMSenergy_amean,pcm_LOGenergy_variance,pcm_LOGenergy_stddev,pcm_LOGenergy_skewness,pcm_LOGenergy_kurtosis,pcm_LOGenergy_amean
file,start,end,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
/project/graziul/data/Zone1/2018_08_10/201808100021-669974-27730.mp3,0 days,0 days 00:30:32.202448980,8.525001e-08,0.000292,-0.454968,1.685212,0.00056,2.627155,1.62085,-1.027766,2.508324,-15.458009


In [60]:
test_file = list(vad_dict.keys())[0]

smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.ComParE_2016,
    feature_level=opensmile.FeatureLevel.Functionals,
)

start_time = time.time()
tracemalloc.start()
%memit y = smile.process_file(test_file)
print("Memory Usage: " + str(tracemalloc.get_traced_memory()[1]) + " bytes")
print("Time taken to load = " + str(total_time) + " seconds")
tracemalloc.stop()

peak memory: 1677.30 MiB, increment: 1454.83 MiB
Memory Usage: 565622259 bytes
Time taken to load = 40.93193602561951 seconds


In [51]:
tracemalloc.start()

summation = sum([elt for elt in range(2)])
print(summation)
snapshot = tracemalloc.take_snapshot()
print(tracemalloc.get_traced_memory())
tracemalloc.stop()
snapshot.statistics('lineno')

1
(27793, 51796)


[<Statistic traceback=<Traceback (<Frame filename='/home/ajays/.conda/envs/new_torch_env/lib/python3.9/site-packages/IPython/core/compilerop.py' lineno=101>,)> size=5079 count=91>,
 <Statistic traceback=<Traceback (<Frame filename='/home/ajays/.conda/envs/new_torch_env/lib/python3.9/json/decoder.py' lineno=353>,)> size=1525 count=18>,
 <Statistic traceback=<Traceback (<Frame filename='/home/ajays/.conda/envs/new_torch_env/lib/python3.9/site-packages/zmq/utils/jsonapi.py' lineno=25>,)> size=1359 count=8>,
 <Statistic traceback=<Traceback (<Frame filename='/home/ajays/.conda/envs/new_torch_env/lib/python3.9/site-packages/traitlets/traitlets.py' lineno=619>,)> size=1197 count=17>,
 <Statistic traceback=<Traceback (<Frame filename='/home/ajays/.conda/envs/new_torch_env/lib/python3.9/codeop.py' lineno=143>,)> size=1152 count=15>,
 <Statistic traceback=<Traceback (<Frame filename='/home/ajays/.conda/envs/new_torch_env/lib/python3.9/site-packages/zmq/sugar/socket.py' lineno=622>,)> size=1056 

In [59]:
%load_ext memory_profiler
from memory_profiler import profile

%memit summation = sum([elt for elt in range(10)])

The memory_profiler extension is already loaded. To reload it, use:
  %reload_ext memory_profiler
peak memory: 222.47 MiB, increment: 0.00 MiB


In [9]:
y.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,audspec_lengthL1norm_sma_range,audspec_lengthL1norm_sma_maxPos,audspec_lengthL1norm_sma_minPos,audspec_lengthL1norm_sma_quartile1,audspec_lengthL1norm_sma_quartile2,audspec_lengthL1norm_sma_quartile3,audspec_lengthL1norm_sma_iqr1-2,audspec_lengthL1norm_sma_iqr2-3,audspec_lengthL1norm_sma_iqr1-3,audspec_lengthL1norm_sma_percentile1.0,...,mfcc_sma_de[14]_peakRangeAbs,mfcc_sma_de[14]_peakRangeRel,mfcc_sma_de[14]_peakMeanAbs,mfcc_sma_de[14]_peakMeanMeanDist,mfcc_sma_de[14]_peakMeanRel,mfcc_sma_de[14]_minRangeRel,mfcc_sma_de[14]_meanRisingSlope,mfcc_sma_de[14]_stddevRisingSlope,mfcc_sma_de[14]_meanFallingSlope,mfcc_sma_de[14]_stddevFallingSlope
file,start,end,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
/project/graziul/data/Zone1/2018_08_10/201808100021-669974-27730.mp3,0 days,0 days 00:30:32.202448980,8.911002,0.113087,0.0,0.012762,0.013052,0.013359,0.00029,0.000307,0.000597,0.011712,...,12.431165,0.563843,2.155996,2.155959,20.0,0.626891,91.520363,51.032475,90.906898,51.721867


In [8]:
cols

['audspec_lengthL1norm_sma_range',
 'audspec_lengthL1norm_sma_maxPos',
 'audspec_lengthL1norm_sma_minPos',
 'audspec_lengthL1norm_sma_quartile1',
 'audspec_lengthL1norm_sma_quartile2',
 'audspec_lengthL1norm_sma_quartile3',
 'audspec_lengthL1norm_sma_iqr1-2',
 'audspec_lengthL1norm_sma_iqr2-3',
 'audspec_lengthL1norm_sma_iqr1-3',
 'audspec_lengthL1norm_sma_percentile1.0',
 'audspec_lengthL1norm_sma_percentile99.0',
 'audspec_lengthL1norm_sma_pctlrange0-1',
 'audspec_lengthL1norm_sma_stddev',
 'audspec_lengthL1norm_sma_skewness',
 'audspec_lengthL1norm_sma_kurtosis',
 'audspec_lengthL1norm_sma_meanSegLen',
 'audspec_lengthL1norm_sma_maxSegLen',
 'audspec_lengthL1norm_sma_minSegLen',
 'audspec_lengthL1norm_sma_segLenStddev',
 'audspec_lengthL1norm_sma_upleveltime25',
 'audspec_lengthL1norm_sma_upleveltime50',
 'audspec_lengthL1norm_sma_upleveltime75',
 'audspec_lengthL1norm_sma_upleveltime90',
 'audspec_lengthL1norm_sma_risetime',
 'audspec_lengthL1norm_sma_leftctime',
 'audspec_lengthL1

In [44]:
cols = list(y.columns)
features_wanted = ["mfcc"]
for feature in features_wanted:
    print("Searching for feature " + feature)
    print("Features found in columns: ", end = '')
    for col in cols:
        if feature in col:
            print(col,end='     ')
    print('\n')

Searching for feature mfcc
Features found in columns: mfcc_sma[1]_range     mfcc_sma[1]_maxPos     mfcc_sma[1]_minPos     mfcc_sma[1]_quartile1     mfcc_sma[1]_quartile2     mfcc_sma[1]_quartile3     mfcc_sma[1]_iqr1-2     mfcc_sma[1]_iqr2-3     mfcc_sma[1]_iqr1-3     mfcc_sma[1]_percentile1.0     mfcc_sma[1]_percentile99.0     mfcc_sma[1]_pctlrange0-1     mfcc_sma[1]_stddev     mfcc_sma[1]_skewness     mfcc_sma[1]_kurtosis     mfcc_sma[1]_meanSegLen     mfcc_sma[1]_maxSegLen     mfcc_sma[1]_minSegLen     mfcc_sma[1]_segLenStddev     mfcc_sma[1]_upleveltime25     mfcc_sma[1]_upleveltime50     mfcc_sma[1]_upleveltime75     mfcc_sma[1]_upleveltime90     mfcc_sma[1]_risetime     mfcc_sma[1]_leftctime     mfcc_sma[1]_lpgain     mfcc_sma[1]_lpc0     mfcc_sma[1]_lpc1     mfcc_sma[1]_lpc2     mfcc_sma[1]_lpc3     mfcc_sma[1]_lpc4     mfcc_sma[2]_range     mfcc_sma[2]_maxPos     mfcc_sma[2]_minPos     mfcc_sma[2]_quartile1     mfcc_sma[2]_quartile2     mfcc_sma[2]_quartile3     mfcc_sma[2]_iqr