In [13]:
import librosa
x,freq = librosa.load("Test_Set/0000.wav",sr=16000)
print("The duration of FR_001.wav in seconds:",len(x)/freq)

The duration of FR_001.wav in seconds: 5.3603125


In [14]:
x_mfcc=librosa.feature.mfcc(y=x,sr=freq, n_mfcc=40)
print(x_mfcc.shape)

(40, 168)


In [15]:
import numpy as np
def feature_extractor_1(audio_file_dir):

    #load the audio files
    x,freq = librosa.load(audio_file_dir,sr=16000)
    #extract 20 MFCCs
    mfcc=librosa.feature.mfcc(y=x,sr=freq,n_mfcc=20)
    #calculate the mean and variance of each MFFC 
    mean_mfccs=np.mean(mfcc,axis=1)
    var_mfccs=np.var(mfcc,axis=1)
    #return mean and variance as the audio file feature 
    return list(mean_mfccs)+list(var_mfccs)

In [16]:
def feature_extractor_2(audio_file_dir):

    #load the audio files
    x,freq = librosa.load(audio_file_dir,sr=16000)
    # trim the first 5 seconds (Sequence Truncation)
    length_of_5seconds=5*16000
    x_5sec=x[:length_of_5seconds]
    # extract 20 MFCCs
    mfccs_5sec=librosa.feature.mfcc(y=x_5sec,sr=freq,n_mfcc=20)
    # return mfcc of the first 5 sec as the audio file feature
    return mfccs_5sec

In [17]:
import csv
import numpy as np
#set data_dir to the directory of your data files
data_dir= "Dataset/"

# Read file info file to get the list of audio files and their labels
file_list=[]
label_list=[]
with open(data_dir+"Info.txt", 'r') as file:
    reader = csv.reader(file)
    for row in reader:
        # The first column contains the file name
        file_list.append(row[0])
        # The last column contains the lable (language)
        label_list.append(row[-1]) 
        
        
# create a dictionary for labels
lang_dic={'EN':0,'FR':1,'AR':2,'JP':3}

# create a list of extracted feature (MFCC) for files
x_data=[]

for audio_file in file_list:
    file_feature = feature_extractor_1(data_dir+audio_file)
    #add extracted feature to dataset 
    x_data.append(file_feature)

# create a list of labels for files
y_data=[]
for lang_label in label_list:
    #convert the label to a value in {0,1,2,3} as the class label
    y_data.append(lang_dic[lang_label])

In [18]:
import random

# shuffle two lists
temp_list = list(zip(x_data, y_data))
random.shuffle(temp_list)
x_data, y_data = zip(*temp_list)

**Modèle de RandomForest**

In [100]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

rf = RandomForestClassifier(max_depth=2)
rf.fit(x_data, y_data)
# the resulted accuracy is on a small set which is same for train and test
x_train,x_test,y_train,y_test=train_test_split(x_data,y_data,test_size=0.3,shuffle=True,random_state=42)

print("Accuracy",rf.score(x_data, y_data))

Accuracy 0.597623089983022


In [102]:
#set data_dir to the directory of your data files
data_dir= "Test_Set/"

#Change below file name:
ourputfile_name="BETCHEM_WARREN_RandomForest_v1"

# Read file info file to get the list of audio files and their labels
file_list=[]
label_list=[]
with open(data_dir+"Info.csv", 'r') as file:
    reader = csv.reader(file)
    for row in reader:
        # The first column contains the file name
        file_list.append(row[0])

lang_dic={'EN':0,'FR':1,'AR':2,'JP':3}
class2lang_dic={0:"EN",1:"FR",2:"AR",3:"JP"}
with open(data_dir+f"{ourputfile_name}.csv",'w') as file:
    file.write(f"ID,Label\n")
for test_sample in file_list[1:]:
    test_sample_feature=feature_extractor_1(data_dir+test_sample)
    predicted=class2lang_dic[rf.predict([test_sample_feature])[0]]
    print(f'{test_sample}:{predicted}')
    # save the predicted output in Output_evaluation.txt
    with open(data_dir+f"{ourputfile_name}.csv",'a+') as file:
        file.write(f"{test_sample},{predicted}\n")

0000.wav:JP
0001.wav:FR
0002.wav:EN
0003.wav:AR
0004.wav:FR
0005.wav:AR
0006.wav:FR
0007.wav:JP
0008.wav:AR
0009.wav:FR
0010.wav:EN
0011.wav:AR
0012.wav:AR
0013.wav:AR
0014.wav:FR
0015.wav:JP
0016.wav:FR
0017.wav:FR
0018.wav:AR
0019.wav:FR
0020.wav:FR
0021.wav:JP
0022.wav:JP
0023.wav:EN
0024.wav:EN
0025.wav:EN
0026.wav:EN
0027.wav:EN
0028.wav:AR
0029.wav:AR
0030.wav:EN
0031.wav:EN
0032.wav:FR
0033.wav:AR
0034.wav:AR
0035.wav:EN
0036.wav:FR
0037.wav:EN
0038.wav:JP
0039.wav:FR
0040.wav:FR
0041.wav:AR
0042.wav:EN
0043.wav:FR
0044.wav:EN
0045.wav:JP
0046.wav:FR
0047.wav:JP
0048.wav:FR
0049.wav:JP
0050.wav:FR
0051.wav:EN
0052.wav:AR
0053.wav:FR
0054.wav:EN
0055.wav:FR
0056.wav:EN
0057.wav:EN
0058.wav:FR
0059.wav:EN
0060.wav:EN
0061.wav:AR
0062.wav:FR
0063.wav:AR
0064.wav:FR
0065.wav:AR
0066.wav:FR
0067.wav:EN
0068.wav:AR
0069.wav:EN
0070.wav:AR
0071.wav:FR
0072.wav:EN
0073.wav:EN
0074.wav:FR
0075.wav:EN
0076.wav:FR
0077.wav:FR
0078.wav:EN
0079.wav:AR
0080.wav:AR
0081.wav:FR
0082.wav:FR
0083

**SVM model**

In [22]:
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test=train_test_split(x_data,y_data,test_size=0.3,shuffle=True,random_state=42)


**GNB Model**

In [53]:
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
x_train,x_test,y_train,y_test=train_test_split(x_data,y_data,test_size=0.2,shuffle=True,random_state=42)
gnb = GaussianNB(var_smoothing=1e-8)
pred_y=gnb.fit(x_train,y_train).predict(x_test)
print("Accuracy of Gaussian Naive Bayes:", accuracy_score(y_test, pred_y))


Accuracy of Gaussian Naive Bayes: 0.3898305084745763


In [94]:
from sklearn.model_selection import GridSearchCV
"""param_grid={'var_smoothing':[1e-11,1e-10,1e-9,1e-8,1e-7]}
gnb=GaussianNB()
grid_search=GridSearchCV(gnb,param_grid,cv=5,scoring='accuracy')
grid_search.fit(x_train,y_train)

print("Meilleurs paramètres :",grid_search.best_params_)
print("Meilleur exactitude :",grid_search.best_score_)
"""
# Définir la grille d'hyperparamètres
param_grid = {'var_smoothing': [1e-12,1e-11, 1e-10, 1e-9, 1e-8, 1e-7,1e-6,1e-5]}

gnb = GaussianNB()

grid_search = GridSearchCV(gnb, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(x_train, y_train)

# Afficher les meilleurs paramètres
print("Meilleurs paramètres :", grid_search.best_params_)
print("Meilleure exactitude (validation croisée) :", grid_search.best_score_)

best_gnb = grid_search.best_estimator_
pred_y = best_gnb.predict(x_test)
accuracy = accuracy_score(y_test, pred_y)
print(f"Exactitude sur l'ensemble de validation : {accuracy:.4f}")

Meilleurs paramètres : {'var_smoothing': 1e-06}
Meilleure exactitude (validation croisée) : 0.4163269876819709
Exactitude sur l'ensemble de validation : 0.3983


In [96]:
#set data_dir to the directory of your data files
data_dir= "Test_Set/"

#Change below file name:
ourputfile_name="BETCHEM_WARREN_GridSearchCV_V1"

# Read file info file to get the list of audio files and their labels
file_list=[]
label_list=[]
with open(data_dir+"Info.csv", 'r') as file:
    reader = csv.reader(file)
    for row in reader:
        # The first column contains the file name
        file_list.append(row[0])

lang_dic={'EN':0,'FR':1,'AR':2,'JP':3}
class2lang_dic={0:"EN",1:"FR",2:"AR",3:"JP"}
with open(data_dir+f"{ourputfile_name}.csv",'w') as file:
    file.write(f"ID,Label\n")
for test_sample in file_list[1:]:
    test_sample_feature=feature_extractor_1(data_dir+test_sample)
    predicted=class2lang_dic[best_gnb.predict([test_sample_feature])[0]]
    print(f'{test_sample}:{predicted}')
    # save the predicted output in Output_evaluation.txt
    with open(data_dir+f"{ourputfile_name}.csv",'a+') as file:
        file.write(f"{test_sample},{predicted}\n")

0000.wav:JP
0001.wav:FR
0002.wav:EN
0003.wav:FR
0004.wav:FR
0005.wav:FR
0006.wav:FR
0007.wav:FR
0008.wav:EN
0009.wav:FR
0010.wav:EN
0011.wav:FR
0012.wav:FR
0013.wav:JP
0014.wav:FR
0015.wav:JP
0016.wav:FR
0017.wav:FR
0018.wav:FR
0019.wav:FR
0020.wav:FR
0021.wav:EN
0022.wav:JP
0023.wav:EN
0024.wav:JP
0025.wav:AR
0026.wav:EN
0027.wav:AR
0028.wav:FR
0029.wav:FR
0030.wav:FR
0031.wav:EN
0032.wav:FR
0033.wav:FR
0034.wav:EN
0035.wav:FR
0036.wav:FR
0037.wav:JP
0038.wav:EN
0039.wav:FR
0040.wav:FR
0041.wav:FR
0042.wav:EN
0043.wav:FR
0044.wav:FR
0045.wav:JP
0046.wav:FR
0047.wav:FR
0048.wav:FR
0049.wav:JP
0050.wav:FR
0051.wav:EN
0052.wav:FR
0053.wav:FR
0054.wav:FR
0055.wav:FR
0056.wav:AR
0057.wav:EN
0058.wav:FR
0059.wav:EN
0060.wav:FR
0061.wav:JP
0062.wav:FR
0063.wav:FR
0064.wav:FR
0065.wav:FR
0066.wav:EN
0067.wav:FR
0068.wav:EN
0069.wav:AR
0070.wav:FR
0071.wav:FR
0072.wav:EN
0073.wav:AR
0074.wav:FR
0075.wav:EN
0076.wav:FR
0077.wav:FR
0078.wav:JP
0079.wav:FR
0080.wav:EN
0081.wav:FR
0082.wav:FR
0083

**MLP**

In [106]:
# Mélanger
import numpy as np
temp_list = list(zip(x_data, y_data))
random.shuffle(temp_list)
x_data, y_data = zip(*temp_list)
x_data = np.array(x_data)
y_data = np.array(y_data)


x_train, x_val, y_train, y_val = train_test_split(
    x_data, y_data, test_size=0.2, random_state=42, stratify=y_data
)
print(f"Entraînement : {len(x_train)}, Validation : {len(x_val)}")

Entraînement : 471, Validation : 118


In [108]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
x_train,x_val,y_train,y_val=train_test_split(x_data,y_data,test_size=0.3,shuffle=True,random_state=42)
param_grid = {
    'hidden_layer_sizes': [(100,), (100, 10)],  # Architectures simples
    'activation': ['relu', 'tanh'],
    'solver': ['adam']
}


mlp = MLPClassifier(max_iter=1000, random_state=42)  # max_iter pour convergence
grid_search = GridSearchCV(mlp, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(x_train, y_train)

# Résultats
print("Meilleurs paramètres :", grid_search.best_params_)
print("Meilleure exactitude (CV) :", grid_search.best_score_)

# Validation
best_mlp = grid_search.best_estimator_
y_val_pred = best_mlp.predict(x_val)
print(f"Exactitude validation : {accuracy_score(y_val, y_val_pred):.4f}")


Meilleurs paramètres : {'activation': 'relu', 'hidden_layer_sizes': (100,), 'solver': 'adam'}
Meilleure exactitude (CV) : 0.4322950337937114
Exactitude validation : 0.3503


In [None]:
#set data_dir to the directory of your data files
data_dir= "Test_Set/"

#Change below file name:
ourputfile_name="BETCHEM_WARREN_RandomForest_v1"

# Read file info file to get the list of audio files and their labels
file_list=[]
label_list=[]
with open(data_dir+"Info.csv", 'r') as file:
    reader = csv.reader(file)
    for row in reader:
        # The first column contains the file name
        file_list.append(row[0])

lang_dic={'EN':0,'FR':1,'AR':2,'JP':3}
class2lang_dic={0:"EN",1:"FR",2:"AR",3:"JP"}
with open(data_dir+f"{ourputfile_name}.csv",'w') as file:
    file.write(f"ID,Label\n")
for test_sample in file_list[1:]:
    test_sample_feature=feature_extractor_1(data_dir+test_sample)
    predicted=class2lang_dic[best_mlp.predict([test_sample_feature])[0]]
    print(f'{test_sample}:{predicted}')
    # save the predicted output in Output_evaluation.txt
    with open(data_dir+f"{ourputfile_name}.csv",'a+') as file:
        file.write(f"{test_sample},{predicted}\n")

0000.wav:JP
0001.wav:FR
0002.wav:FR
0003.wav:AR
0004.wav:FR
0005.wav:AR
0006.wav:FR
0007.wav:AR
0008.wav:AR
0009.wav:FR
0010.wav:JP
0011.wav:AR
0012.wav:FR
0013.wav:FR
0014.wav:FR
0015.wav:AR
0016.wav:FR
0017.wav:FR
0018.wav:FR
0019.wav:FR
0020.wav:FR
0021.wav:AR
0022.wav:JP
0023.wav:EN
0024.wav:FR
0025.wav:AR
0026.wav:EN
0027.wav:AR
0028.wav:AR
0029.wav:FR
0030.wav:FR
0031.wav:EN
0032.wav:FR
0033.wav:FR
0034.wav:AR
0035.wav:EN
0036.wav:FR
0037.wav:JP
0038.wav:FR
0039.wav:FR
0040.wav:FR
0041.wav:AR
0042.wav:AR
0043.wav:FR
0044.wav:FR
0045.wav:FR
0046.wav:FR
0047.wav:FR
0048.wav:FR
0049.wav:JP
0050.wav:FR
0051.wav:FR
0052.wav:FR
0053.wav:AR
0054.wav:FR
0055.wav:FR
0056.wav:FR
0057.wav:AR
0058.wav:FR
0059.wav:FR
0060.wav:EN
0061.wav:FR
0062.wav:EN
0063.wav:FR
0064.wav:FR
0065.wav:FR
0066.wav:FR
0067.wav:FR
0068.wav:AR
0069.wav:EN
0070.wav:AR
0071.wav:FR
0072.wav:EN
0073.wav:FR
0074.wav:FR
0075.wav:AR
0076.wav:FR
0077.wav:FR
0078.wav:JP
0079.wav:AR
0080.wav:AR
0081.wav:FR
0082.wav:FR
0083