In [87]:
import json
import numpy as np
import os
import math
import librosa
from sklearn.model_selection import train_test_split
from Praat import Praat_IdeaLogic as PLogic
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import librosa.display
import glob
import joblib

In [88]:
def extract(audio_file):
    name=os.path.basename(audio_file)
    name=os.path.splitext(name)[0]
    label = int(name[7:8]) - 1
    
    return label, audio_file
    
    
def get_feats(audio_file):
    X, sample_rate = librosa.load(audio_file,
                                  res_type='kaiser_fast')
    
    # mfcc 
    mfccs = librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40)
    # praat
    praat = PLogic.get_sound_report(audio_file).to_numpy()[0]
    
    #print(mfccs.shape)
    #print(praat.shape)
    
    # augmentation
    #conc =np.column_stack((mfccs, praat))
    #conc = np.concatenate((mfccs,np.concatenate(praat)[:,None]),axis=1)
    conc = np.concatenate((mfccs,praat),axis=0)
    conc = np.mean(conc.T, axis=0)
    
    return conc

In [89]:
def return_feats(dir_name):
    mfcc = []
    praat = []
    y = []
    err_cnt = 0
    err_list = []
    exeption_list = []
    
    #progression bar 
    file_count = len(dir_name)
    bar = tqdm(desc="proccessing files", total=file_count)
    tot = 0
    
    for file in dir_name:
        bar.update(1)
        tot += 1
        label, audio_file = extract(file)
        
        try:
            #X.append(get_feats(audio_file))
            X, sample_rate = librosa.load(audio_file,
                                  res_type='kaiser_fast')
            # praat
            praat.append(PLogic.get_sound_report(audio_file).to_numpy()[0])
            # mfcc 
            mfcc.append(np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0))
            y.append(label)
        except Exception as e:

            err_cnt += 1
            err_list.append(audio_file)
            exeption_list.append(str(e))
            print("{}.error at:".format(err_cnt), audio_file)
            
        
    return mfcc, praat, y, err_cnt, err_list, exeption_list

In [90]:
dir_name = glob.glob("../raw data/toronto_ds/conc_data/*.wav")

#dir_name = glob.glob("../raw data/Audio_Data/Actor_09/*.wav")
mfcc, praat, y,_,_,exception_list = return_feats(dir_name)

#X = np.asarray(X)
#y = np.asarray(y)

# X_name, y_name = '../feats/mfcc_and_praat/X1.joblib', '../feats/mfcc_and_praat/y2.joblib'
# X, y = np.asarray(X), np.asarray(y)
# y = np.asarray(y)
# joblib.dump(X, X_name)
# joblib.dump(y, y_name)

proccessing files:   0%|          | 0/5252 [00:00<?, ?it/s]

1.error at: ../raw data/toronto_ds/conc_data/03-01-04-01-01-02-05.wav
2.error at: ../raw data/toronto_ds/conc_data/03-01-04-01-01-01-09.wav
3.error at: ../raw data/toronto_ds/conc_data/03-01-04-02-01-02-09.wav


In [91]:
exception_list

['Unable to parse string "--undefined--" at position 22',
 'Unable to parse string "--undefined--" at position 22',
 'Unable to parse string "--undefined--" at position 22']

In [92]:
len(mfcc)

5249

In [93]:
len(praat)

5249

In [94]:
praat[0]

array([2.18133, 241.385, 261.606, 84.163, 164.979, 471.744, 391.0, 390.0,
       0.0038193809999999997, 0.001152713, 27.906999999999996, 0.0, 0.0,
       1.213, 4.6327e-05, 0.319, 0.469, 0.9570000000000001,
       5.6160000000000005, 0.555, 1.695, 2.745, 5.812, 5.086,
       0.9558629999999999, 0.056103999999999994, 18.000999999999998],
      dtype=object)

In [95]:
mfcc[0]

array([-4.60802155e+02,  9.50778580e+01,  1.56908245e+01, -3.87698460e+00,
       -1.35151701e+01,  7.23161793e+00, -9.96821785e+00,  8.15477943e+00,
       -2.18480358e+01, -8.31345022e-01, -1.66333771e+01, -5.64933109e+00,
       -3.32634783e+00,  2.75295830e+00, -1.21644902e+00,  6.06903028e+00,
       -1.64077926e+00,  1.20711823e+01,  1.93527591e+00, -4.95892382e+00,
        2.41293931e+00,  1.37753391e+00,  1.94042698e-01,  3.23400855e+00,
        2.00246423e-01,  3.43562508e+00,  5.11208153e+00,  1.05722771e+01,
        3.81392789e+00,  7.92001486e+00,  5.46202850e+00,  5.28175783e+00,
        5.18946791e+00,  5.83476830e+00,  7.77707100e+00,  9.17715168e+00,
        1.08836365e+01,  7.90964937e+00,  5.67073727e+00,  5.89068127e+00],
      dtype=float32)

In [96]:
praat = np.asarray(praat, dtype=float)

In [97]:
praat[0]

array([2.181330e+00, 2.413850e+02, 2.616060e+02, 8.416300e+01,
       1.649790e+02, 4.717440e+02, 3.910000e+02, 3.900000e+02,
       3.819381e-03, 1.152713e-03, 2.790700e+01, 0.000000e+00,
       0.000000e+00, 1.213000e+00, 4.632700e-05, 3.190000e-01,
       4.690000e-01, 9.570000e-01, 5.616000e+00, 5.550000e-01,
       1.695000e+00, 2.745000e+00, 5.812000e+00, 5.086000e+00,
       9.558630e-01, 5.610400e-02, 1.800100e+01])

In [98]:
X = np.concatenate((mfcc,praat),axis=1)

In [99]:
X.shape

(5249, 67)

In [101]:
X

array([[-4.60802155e+02,  9.50778580e+01,  1.56908245e+01, ...,
         9.55863000e-01,  5.61040000e-02,  1.80010000e+01],
       [-4.31706268e+02,  7.79564972e+01,  3.43442202e+00, ...,
         9.69402000e-01,  3.49360000e-02,  1.82910000e+01],
       [-4.76320831e+02,  2.66027317e+01, -3.15517826e+01, ...,
         8.23165000e-01,  2.52871000e-01,  8.00300000e+00],
       ...,
       [-6.63958801e+02,  4.46422653e+01, -2.06205139e+01, ...,
         9.73368000e-01,  3.46410000e-02,  2.26570000e+01],
       [-4.48102570e+02,  9.06678772e+01, -9.51643753e+00, ...,
         9.02997000e-01,  1.25523000e-01,  1.24860000e+01],
       [-5.04512207e+02,  7.59035797e+01,  8.97140217e+00, ...,
         9.83993000e-01,  1.76160000e-02,  2.17330000e+01]])

In [107]:
y_np = np.asarray(y)

In [108]:
type(y)

numpy.ndarray

In [111]:
X_name, y_name = '../feats/mfcc_and_praat/X1.joblib', '../feats/mfcc_and_praat/y2.joblib'
#X, y = np.asarray(X), np.asarray(y)
#y = np.asarray(y)
joblib.dump(X, X_name)
joblib.dump(y, y_name)

['../feats/mfcc_and_praat/y2.joblib']

In [110]:
X.shape

(5249, 67)