In [1]:
import json
import numpy as np
import os
import math
import librosa
from sklearn.model_selection import train_test_split
from Praat import Praat_IdeaLogic as PLogic
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import librosa.display
import glob
import joblib

In [None]:
def extract(audio_file):
    name=os.path.basename(audio_file)
    name=os.path.splitext(name)[0]
    label = int(name[7:8]) - 1
    
    return label, audio_file
    
    
def get_feats(audio_file):
    X, sample_rate = librosa.load(audio_file,
                                  res_type='kaiser_fast')
    
    # mfcc 
    mfccs = librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40)
    # praat
    praat = PLogic.get_sound_report(audio_file).to_numpy()[0]
    
    # augmentation
    conc = np.concatenate((mfccs, praat))
    conc = np.mean(conc.T, axis=0)
    
    return conc

In [None]:
def return_feats(dir_name):
    X = []
    y = []
    err_cnt = 0
    err_list = []
    exeption_list = []
    
    #progression bar 
    file_count = len(dir_name)
    bar = tqdm(desc="proccessing files", total=file_count)
    tot = 0
    
    for file in dir_name:
        bar.update(1)
        tot += 1
        label, audio_file = extract(file)
        
        try:
            X.append(get_feats(audio_file))
            y.append(label)
            
        except Exception as e:
            
            err_cnt += 1
            err_list.append(audio_file)
            exeption_list.append(str(e))
            print("{}.error at:".format(err_cnt), audio_file)
            
        
    return X, y, err_cnt, err_list, exeption_list

In [None]:
dir_name = glob.glob("toronto_ds/conc_data/*.wav")
#dir_name = glob.glob("Audio_Data/Actor_04/*.wav")
X, y,_,_,exception_list = return_feats(dir_name)

#X = np.asarray(X)
#y = np.asarray(y)

X_name, y_name = 'Praat_Mfcc_feats/X.joblib', 'Praat_Mfcc_feats/y.joblib'
X, y = np.asarray(X), np.asarray(y)
y = np.asarray(y)
joblib.dump(X, X_name)
joblib.dump(y, y_name)

In [None]:
X = joblib.load(X_name)
y = joblib.load(y_name)