In [1]:
import librosa

x,freq = librosa.load("Dataset/0002.wav",sr=16000)
print("The duration of FR_001.wav in seconds:",len(x)/freq)

The duration of FR_001.wav in seconds: 5.762


In [2]:
# This function will return n_mfcc number of MFCC per
#     a window of time in audio time series
x_mfcc=librosa.feature.mfcc(y=x,sr=freq, n_mfcc=20)
print(x_mfcc.shape)
# x_mfcc is an array with 40 values for a window of time
# The len(x_mfcc) is a proportion of wav file duration (5-6 seconds)

(20, 181)


In [None]:
import numpy as np
def feature_extractor_1(audio_file_dir):

    #load the audio files
    x,freq = librosa.load(audio_file_dir,sr=16000)
    #extract 20 MFCCs
    mfcc=librosa.feature.mfcc(y=x,sr=freq,n_mfcc=20)
    #calculate the mean and variance of each MFFC 
    mean_mfccs=np.mean(mfcc,axis=1)
    var_mfccs=np.var(mfcc,axis=1)
    #return mean and variance as the audio file feature 
    return list(mean_mfccs)+list(var_mfccs)

In [3]:
def feature_extractor_2(audio_file_dir):

    #load the audio files
    x,freq = librosa.load(audio_file_dir,sr=16000)
    # trim the first 5 seconds (Sequence Truncation)
    length_of_5seconds=5*16000
    x_5sec=x[:length_of_5seconds]
    # extract 20 MFCCs
    mfccs_5sec=librosa.feature.mfcc(y = x_5sec,sr=freq,n_mfcc=20)
    # return mfcc of the first 5 sec as the audio file feature
    return mfccs_5sec

In [None]:
def feature_extractor_2(audio_file_dir):
    import librosa
    import numpy as np

    # Charger le fichier audio
    x, freq = librosa.load(audio_file_dir, sr=16000)
    
    # Garder uniquement les 5 premières secondes
    length_of_5seconds = 5 * 16000
    x_5sec = x[:length_of_5seconds]

    # Extraire les MFCCs
    mfccs_5sec = librosa.feature.mfcc(y=x_5sec, sr=freq, n_mfcc=20)

    # Moyenne et variance des MFCCs
    mean_mfccs = np.mean(mfccs_5sec, axis=1)
    var_mfccs = np.var(mfccs_5sec, axis=1)

    # Retourner un vecteur concaténé
    return list(mean_mfccs) + list(var_mfccs)


In [4]:
import csv

#set data_dir to the directory of your data files
data_dir= "Dataset/"

# Read file info file to get the list of audio files and their labels
file_list=[]
label_list=[]
with open(data_dir+"Info.txt", 'r') as file:
    reader = csv.reader(file)
    for row in reader:
        # The first column contains the file name
        file_list.append(row[0])
        # The last column contains the lable (language)
        label_list.append(row[-1]) 
        
        
# create a dictionary for labels
lang_dic={'EN':0,'FR':1,'AR':2,'JP':3}

# create a list of extracted feature (MFCC) for files
x_data=[]

for audio_file in file_list:
    #file_feature = feature_extractor_2(data_dir+audio_file)
    file_feature = feature_extractor_1(data_dir+audio_file)
    #add extracted feature to dataset 
    x_data.append(file_feature)

# create a list of labels for files
y_data=[]
for lang_label in label_list:
    #convert the label to a value in {0,1,2,3} as the class label
    y_data.append(lang_dic[lang_label])

In [5]:
import random

# shuffle two lists
temp_list = list(zip(x_data, y_data))
random.shuffle(temp_list)
x_data, y_data = zip(*temp_list)

In [6]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report

# 1. Division du jeu de données
X_train, X_test, y_train, y_test = train_test_split(
    x_data, y_data,
    test_size=0.33,
    shuffle=True,
    stratify=y_data,
    random_state=42
)

# 2. Pipeline : StandardScaler + SVC
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', SVC())
])

# 3. Grille d'hyperparamètres étendue
param_grid = {
    'svm__C': [0.01, 0.1, 1, 10, 100],
    'svm__kernel': ['linear', 'rbf'],
    'svm__gamma': ['scale', 'auto', 0.01, 0.001, 0.0001]
}

# 4. GridSearchCV avec validation croisée
grid_search = GridSearchCV(
    pipeline,
    param_grid,
    cv=5,
    scoring='accuracy',
    verbose=1,
    n_jobs=-1
)
grid_search.fit(X_train, y_train)

# 5. Résultats
print("✅ Meilleurs hyperparamètres :", grid_search.best_params_)
print("✅ Score moyen en validation croisée :", grid_search.best_score_)

# 6. Évaluation sur le jeu de test
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

print("🎯 Accuracy sur le jeu de test :", accuracy_score(y_test, y_pred))
print("\n📊 Rapport de classification :\n")
print(classification_report(y_test, y_pred, target_names=["EN", "FR", "AR", "JP"]))


Fitting 5 folds for each of 50 candidates, totalling 250 fits


ValueError: 
All the 250 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
250 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\HP\anaconda3\envs\tp_ia_env\lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\HP\anaconda3\envs\tp_ia_env\lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "C:\Users\HP\anaconda3\envs\tp_ia_env\lib\site-packages\sklearn\pipeline.py", line 654, in fit
    Xt = self._fit(X, y, routed_params, raw_params=params)
  File "C:\Users\HP\anaconda3\envs\tp_ia_env\lib\site-packages\sklearn\pipeline.py", line 588, in _fit
    X, fitted_transformer = fit_transform_one_cached(
  File "C:\Users\HP\anaconda3\envs\tp_ia_env\lib\site-packages\joblib\memory.py", line 326, in __call__
    return self.func(*args, **kwargs)
  File "C:\Users\HP\anaconda3\envs\tp_ia_env\lib\site-packages\sklearn\pipeline.py", line 1551, in _fit_transform_one
    res = transformer.fit_transform(X, y, **params.get("fit_transform", {}))
  File "C:\Users\HP\anaconda3\envs\tp_ia_env\lib\site-packages\sklearn\utils\_set_output.py", line 319, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "C:\Users\HP\anaconda3\envs\tp_ia_env\lib\site-packages\sklearn\base.py", line 921, in fit_transform
    return self.fit(X, y, **fit_params).transform(X)
  File "C:\Users\HP\anaconda3\envs\tp_ia_env\lib\site-packages\sklearn\preprocessing\_data.py", line 894, in fit
    return self.partial_fit(X, y, sample_weight)
  File "C:\Users\HP\anaconda3\envs\tp_ia_env\lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "C:\Users\HP\anaconda3\envs\tp_ia_env\lib\site-packages\sklearn\preprocessing\_data.py", line 930, in partial_fit
    X = validate_data(
  File "C:\Users\HP\anaconda3\envs\tp_ia_env\lib\site-packages\sklearn\utils\validation.py", line 2944, in validate_data
    out = check_array(X, input_name="X", **check_params)
  File "C:\Users\HP\anaconda3\envs\tp_ia_env\lib\site-packages\sklearn\utils\validation.py", line 1101, in check_array
    raise ValueError(
ValueError: Found array with dim 3. StandardScaler expected <= 2.


In [None]:
from joblib import dump
dump(grid_search.best_estimator_, "best_svm_pipeline.joblib")


In [None]:
import csv
import numpy as np
from joblib import load
#from feature_extractor import feature_extractor_1  # adapte si besoin

# Dossier contenant les fichiers de test et Info.csv
data_dir = "Test_Set/"
output_filename = "Benewinde_TP1_SVM_Version5"

# Chargement du modèle entraîné (Pipeline)
model = load("best_svm_pipeline.joblib")

# Dictionnaires de correspondance
class2lang_dic = {0: "EN", 1: "FR", 2: "AR", 3: "JP"}

# Lecture du fichier Info.csv
file_list = []
with open(data_dir + "Info.csv", 'r') as f:
    reader = csv.reader(f)
    next(reader)  # Skip header
    for row in reader:
        file_list.append(row[0])  # nom du fichier audio

# Fichier de sortie
with open(data_dir + f"{output_filename}.csv", 'w') as f:
    f.write("ID,Label\n")

    for filename in file_list:
        filepath = data_dir + filename
        features = feature_extractor_2(filepath)  # → vecteur numpy
        features = np.array(features).reshape(1, -1)
        predicted_class = model.predict(features)[0]
        predicted_lang = class2lang_dic[predicted_class]

        print(f"{filename}: {predicted_lang}")
        f.write(f"{filename},{predicted_lang}\n")
