# Load and test Model

Use this notebook to load a Spanish Accent classifier

Here is a list of available options

ES_ANDINO

ANDINO_CARIBE

ES_MX

ES_MX2

MX_ANDINO

MX_CARIBE

Multiclass model using OVO Classifier SVM

MULTI

MULTI_OVO

ES_MX_ANDINO_OVO

In [16]:
import python_speech_features as mfcc
import os
import numpy as np
import pandas as pd
import librosa
import pickle

from IPython.display import Audio 

# load model

# Binary Models

# ES_ANDINO
# ANDINO_CARIBE
# ES_MX
# ES_MX2
# MX_ANDINO
# MX_CARIBE

# Multiclass model using OVO Classifier SVM

# MULTI
# MULTI_OVO
# ES_MX_ANDINO_OVO

model_name = 'MX_CARIBE'

model_path = './models/'+model_name+'/'

model = pickle.load(open(model_path + 'model.pkl','rb'))
encoder = pickle.load( open(model_path + 'encoder.pkl','rb'))
scaler = pickle.load( open(model_path + 'scaler.pkl','rb'))
f_selector = pickle.load( open(model_path + 'f_selector.pkl','rb'))


df_features = pd.read_csv('mfcc_features.csv')
df_features = df_features.drop(['Unnamed: 0'],axis=1)
df_features.accent.value_counts()

df_new_data = pd.read_csv('./preprocessed/train.csv')

classes = encoder.classes_


In [17]:
ds_path = "./clips/"

train_csv = df_new_data

#this function is used to extract audio frequency features
def feature_extraction(filename, sampling_rate=48000, mfcc_num=20):
    path = "{}{}".format(ds_path, filename)
    features = list()
    audio, _ = librosa.load(path, sr=sampling_rate)
    
    accents = train_csv[train_csv['path'] == filename].accents.values[0]
    spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=audio, sr=sampling_rate))
    spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=audio, sr=sampling_rate))
    spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=audio, sr=sampling_rate))
    features.append(accents)
    features.append(spectral_centroid)
    features.append(spectral_bandwidth)
    features.append(spectral_rolloff)
    
    # Extract mfcc features. First experiment
    
    mfcc = librosa.feature.mfcc(y=audio, sr=sampling_rate, n_mfcc=mfcc_num) # it returns 20 by default.
    for el in mfcc:
        features.append(np.mean(el))
    
    return features

In [18]:
inx = 18765
df_new_data.iloc[inx]

Unnamed: 0                                                44613
client_id     0b7ae4a12ec82a5964a5e1bb271ea0106b65e34dc36c65...
path                               common_voice_es_19747165.wav
sentence      De nuevo, Crátero inició el asedio y Alejandro...
up_votes                                                      2
down_votes                                                    0
age                                                    fourties
gender                                                   female
accents                                                  México
locale                                                       es
segment                                                     NaN
Name: 18765, dtype: object

In [19]:

path = './clips/'+ df_new_data.iloc[inx]['path']

Audio(path, autoplay=True)

In [20]:
file_path = df_new_data.iloc[inx]['path']

file_features = feature_extraction(file_path,mfcc_num = 13)
print("features: ", file_features)

# This data needs to be scaled!!!!
# Next, use the feature selector transform


features:  ['México', 1884.622756923559, 1742.0640012319645, 3505.8549259681095, -353.76996, 116.289505, -11.551197, 8.730063, 2.543114, -8.989427, -3.0980225, -29.225952, -15.900945, 4.048154, -14.510168, -2.7871382, 3.3598535]


In [21]:

file_features = scaler.transform([file_features[1:]])

X_new_features = f_selector.transform(file_features)

X_new_features

array([[ 0.37935666, -0.56639967, -0.31544562, -0.70948822, -1.72154708,
        -0.58079178, -1.03388221, -3.36665712,  0.34766835, -1.29397315,
         0.1048925 ,  0.6290344 ]])

In [22]:
print(classes)

prediction = model.predict(X_new_features)

print(classes[prediction[0]])

['Caribe' 'México']
México


In [23]:
df_new_data.iloc[inx]['path']

'common_voice_es_19747165.wav'

# Test with Recording

Read a phrase or just say something.

In [24]:
import sounddevice as sd
from scipy.io.wavfile import write
import wavio as wv

In [25]:
def getMFCC(filename, accent, sampling_rate=48000, mfcc_num=20):
    path = filename
    features = list()
    audio, _ = librosa.load(path, sr=sampling_rate)
    
    #accents = train_csv[train_csv['path'] == filename].accents.values[0]
    spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=audio, sr=sampling_rate))
    spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=audio, sr=sampling_rate))
    spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=audio, sr=sampling_rate))
    features.append(accent)
    features.append(spectral_centroid)
    features.append(spectral_bandwidth)
    features.append(spectral_rolloff)
    
    # Extract mfcc features. First experiment
    
    mfcc = librosa.feature.mfcc(y=audio, sr=sampling_rate, n_mfcc=mfcc_num) # it returns 20 by default.
    for el in mfcc:
        features.append(np.mean(el))
    
    return features

In [26]:
def recordAudio():
    # Sampling frequency
    freq = 44100

    # Recording duration
    duration = 5

    # Start recorder with the given values 
    # of duration and sample frequency
    recording = sd.rec(int(duration * freq), 
                       samplerate=freq, channels=2)

    # Record audio for the given number of seconds
    sd.wait()

    # This will convert the NumPy array to an audio
    # file with the given sampling frequency
    write("./recordings/recording0.wav", freq, recording)

    # Convert the NumPy array to audio file
    wv.write("./recordings/recording1.wav", recording, freq, sampwidth=2)
    
recordAudio()

In [36]:
# load the audio file

# Pre recorded files

# caribe
# caribe2
# homero
# homero2
# betty


path = './recordings/homero.wav'

Audio(path, autoplay=True)

In [32]:
test_features = getMFCC(path,'_',mfcc_num=13)
print("features: ", test_features)

features:  ['_', 2983.644234600128, 3054.3904606547485, 5529.019788062284, -380.14337, 119.99996, -31.145166, -1.2101244, -0.8926419, 6.5436926, -12.706949, -25.984823, -9.341704, -12.41528, -15.938706, -9.043201, -4.9871845]


In [33]:
file_features = scaler.transform([test_features[1:]])

X_new_features = f_selector.transform(file_features)

X_new_features

array([[ 3.12140909, -0.41376054, -1.26700911, -1.41362146, -2.0661354 ,
         0.99766227, -2.00234076, -2.93762948, -1.96753536, -1.5308569 ,
        -0.98333981, -0.78834557]])

In [34]:
print(classes)
prediction = model.predict(X_new_features)

print(classes[prediction[0]])


['Caribe' 'México']
México
