# Importing necassary libraries

In [2]:
import numpy as np
import scipy.io
from python_speech_features import mfcc
import matplotlib.pyplot as plt
import IPython.display as ipd
import os
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import soundfile as sf
import random
from sklearn.mixture import GaussianMixture
from sklearn.preprocessing import StandardScaler
import joblib
import math
import statistics




# Defining the function that reads the audios from a given path using scipy and returns 3 lists : audios, freqs, filepaths

In [3]:
def read_audios(path):
    audios = []
    freqs = []
    filepaths = []
    #walking through the directory that contains the dataset and reading each file that has the .wav extension
    for dp, dn, filenames in os.walk(path):
        for filename in filenames:
            if filename.endswith('.wav'):
                filepath = os.path.join(dp, filename)
                filepaths.append(filepath)
                with open(filepath, "rb") as f:
                    # load the audio using scipy
                    freq, data = scipy.io.wavfile.read(f, mmap=False)
                    # append the data and frequency to the respective lists
                    audios.append(data)
                    freqs.append(freq)
    return audios, freqs, filepaths
    

# Defining the funcyion that extracts the mfcc features then removes the frames of silence finally it saves the mffc features into a .txt file according to gender 

In [4]:
def extractMfccs_RemoveSilence_saveMfccs(audios,freqs,filepaths, directory):
    mfccs = []
 
    for audio, freq, filepath in zip(audios, freqs,filepaths):
        # extract the MFCC features
        mfcc_features = mfcc(audio, freq, winlen=0.025, winstep=0.01, numcep=13, nfilt=26, nfft= 2048, lowfreq=0,
                         highfreq=None, preemph=0.97, ceplifter=22, appendEnergy=False)
        
        # calculate the energy
        energy = np.sum(mfcc_features**2, axis=1)
        # calculate the threshold for silence
        threshold = np.mean(energy) * 0.4
        #removing silence frames from mfccs
        voiced_indices = np.where(energy > threshold)[0]
        mfccs_voiced = mfcc_features[voiced_indices,:]
        mfccs.append(mfccs_voiced)
        
        # print the shape of the MFCCs before and after removing silence
        print(f"MFCCs before removing silence: {mfcc_features.shape}")
        print(f"MFCCs after removing silence: {mfccs_voiced.shape}")
        
       #saving mffcs 
       # extract the gender information from the file name
        gender = None
        if 'hommes' in filepath:
            gender = 'Hommes'
        elif 'femmes' in filepath:
            gender = 'Femmes'

        # save the MFCCs to the appropriate directory based on gender
        if gender is not None:
            gender_dir = os.path.join(directory, gender)
            if not os.path.exists(gender_dir):
                os.makedirs(gender_dir)
            mfcc_file = os.path.join(gender_dir,  os.path.splitext(os.path.basename(filepath))[0] + ".mfcc")
            np.savetxt(mfcc_file, mfccs_voiced, delimiter=',')
            
      
    
    return  mfccs

# Defining the function that splits the extraced mfccs into training and testing sets : 2/3 from male directory for training and 2/3 from female 

In [5]:
def train_test_split(mfcc_dir):
    # create separate lists for male and female file paths
    male_files = []
    female_files = []
    for root, dirs, files in os.walk(mfcc_dir):
        for file in files:
            if file.endswith('.mfcc'):
                if 'Hommes' in root:
                    male_files.append(os.path.join(root, file))
                elif 'Femmes' in root:
                    female_files.append(os.path.join(root, file))

    # shuffle the male and female lists independently
    random.shuffle(male_files)
    random.shuffle(female_files)

    # split the male and female lists into train and test based on the desired ratio
    male_train = male_files[:int(2/3*len(male_files))]
    male_test = male_files[int(2/3*len(male_files)):]

    female_train = female_files[:int(2/3*len(female_files))]
    female_test = female_files[int(2/3*len(female_files)):]

    
    
    # merge the train and test lists for both male and female
    train_files = male_train + female_train
    test_files = male_test + female_test

    # load the MFCC features from the saved files for the train and test sets
    train_mfccs = []
    test_mfccs = []

    for file in train_files:
        train_mfccs.append(np.loadtxt(file, delimiter=','))

    for file in test_files:
        test_mfccs.append(np.loadtxt(file, delimiter=','))

    # print the shapes of the train and test MFCC feature arrays
    print(f"Train male MFCCs shape: {np.array(male_train).shape}")
    print(f"Test male MFCCs shape: {np.array( male_test).shape}")
    print(f"Train female MFCCs shape: {np.array(female_train).shape}")
    print(f"Test female MFCCs shape: {np.array( female_test).shape}")
    print(f"Train MFCCs shape: {np.array(train_mfccs).shape}")
    print(f"Test MFCCs shape: {np.array(test_mfccs).shape}")
    
    return train_mfccs, test_mfccs


# Defining the functions that train the different GMM models and than save them as a pkl file

In [5]:
def gmm16(train_mfccs):
    # Initialize the GMM model with 16 classes
    gmm = GaussianMixture(n_components=16, covariance_type='diag', random_state=0)

    # Fit the GMM model to the training data
    gmm.fit(train_mfccs)
    
    # Save the trained GMM model to a file
    joblib.dump(gmm, r'C:\Users\ASUS ROG STRIX\Desktop\Projet\Langues\gmm\arabic\gmm_model16_arabic.pkl')

    return gmm


In [6]:
def gmm32(train_mfccs):
    # Initialize the GMM model with 32 classes
    gmm = GaussianMixture(n_components=32, covariance_type='diag', random_state=0)

    # Fit the GMM model to the training data
    gmm.fit(train_mfccs)
    
    # Save the trained GMM model to a file
    joblib.dump(gmm, r'C:\Users\ASUS ROG STRIX\Desktop\Projet\Langues\gmm\arabic\gmm_model32_arabic.pkl')

    return gmm

In [7]:
def gmm64(train_mfccs):
    # Initialize the GMM model with 64 classes
    gmm = GaussianMixture(n_components=64, covariance_type='diag', random_state=0)

    # Fit the GMM model to the training data
    gmm.fit(train_mfccs)
    
    # Save the trained GMM model to a file
    joblib.dump(gmm, r'C:\Users\ASUS ROG STRIX\Desktop\Projet\Langues\gmm\arabic\gmm_model64_arabic.pkl')

    return gmm

In [8]:
def gmm128(train_mfccs):
    # Initialize the GMM model with 128 classes
    gmm = GaussianMixture(n_components=128, covariance_type='diag', random_state=0)

    # Fit the GMM model to the training data
    gmm.fit(train_mfccs)
    
    # Save the trained GMM model to a file
    joblib.dump(gmm, r'C:\Users\ASUS ROG STRIX\Desktop\Projet\Langues\gmm\arabic\gmm_model128_arabic.pkl')

    return gmm

In [9]:
def gmm256(train_mfccs):
    # Initialize the GMM model with 254 classes
    gmm = GaussianMixture(n_components=256, covariance_type='diag', random_state=0)

    # Fit the GMM model to the training data
    gmm.fit(train_mfccs)
    
    # Save the trained GMM model to a file
    joblib.dump(gmm, r'C:\Users\ASUS ROG STRIX\Desktop\Projet\Langues\gmm\arabic\gmm_model256_arabic.pkl')

    return gmm

In [10]:
def gmm512(train_mfccs):
    # Initialize the GMM model with 254 classes
    gmm = GaussianMixture(n_components=512, covariance_type='diag', random_state=0)

    # Fit the GMM model to the training data
    gmm.fit(train_mfccs)
    
    # Save the trained GMM model to a file
    joblib.dump(gmm, r'C:\Users\ASUS ROG STRIX\Desktop\Projet\Langues\gmm\arabic\gmm_model512_arabic.pkl')

    return gmm

In [11]:
def gmm1024(train_mfccs):
    # Initialize the GMM model with 254 classes
    gmm = GaussianMixture(n_components=1024, covariance_type='diag', random_state=0)

    # Fit the GMM model to the training data
    gmm.fit(train_mfccs)
    
    # Save the trained GMM model to a file
    joblib.dump(gmm, r'C:\Users\ASUS ROG STRIX\Desktop\Projet\Langues\gmm\arabic\gmm_model1024_arabic.pkl')

    return gmm

# Getting the audios frequencies and filepaths from the russe directory using the function defined above

In [6]:
audios, freqs, filepaths= read_audios(r'C:\Users\ASUS ROG STRIX\Desktop\Projet\Langues\dataset\arabic')

  freq, data = scipy.io.wavfile.read(f, mmap=False)


# Extracting the mfcc features - Removing silence and saving the mfccs

In [7]:
mfccs = extractMfccs_RemoveSilence_saveMfccs(audios, freqs, filepaths,r'C:\Users\ASUS ROG STRIX\Desktop\Projet\Langues\mfcc\arabic')

MFCCs before removing silence: (11999, 13)
MFCCs after removing silence: (11674, 13)
MFCCs before removing silence: (397, 13)
MFCCs after removing silence: (397, 13)
MFCCs before removing silence: (357, 13)
MFCCs after removing silence: (265, 13)
MFCCs before removing silence: (345, 13)
MFCCs after removing silence: (211, 13)
MFCCs before removing silence: (733, 13)
MFCCs after removing silence: (519, 13)
MFCCs before removing silence: (385, 13)
MFCCs after removing silence: (250, 13)
MFCCs before removing silence: (385, 13)
MFCCs after removing silence: (285, 13)
MFCCs before removing silence: (277, 13)
MFCCs after removing silence: (162, 13)
MFCCs before removing silence: (385, 13)
MFCCs after removing silence: (281, 13)
MFCCs before removing silence: (297, 13)
MFCCs after removing silence: (216, 13)
MFCCs before removing silence: (405, 13)
MFCCs after removing silence: (220, 13)
MFCCs before removing silence: (297, 13)
MFCCs after removing silence: (201, 13)
MFCCs before removing si

MFCCs before removing silence: (577, 13)
MFCCs after removing silence: (415, 13)
MFCCs before removing silence: (421, 13)
MFCCs after removing silence: (311, 13)
MFCCs before removing silence: (373, 13)
MFCCs after removing silence: (238, 13)
MFCCs before removing silence: (361, 13)
MFCCs after removing silence: (236, 13)
MFCCs before removing silence: (289, 13)
MFCCs after removing silence: (187, 13)
MFCCs before removing silence: (361, 13)
MFCCs after removing silence: (238, 13)
MFCCs before removing silence: (4499, 13)
MFCCs after removing silence: (4294, 13)
MFCCs before removing silence: (453, 13)
MFCCs after removing silence: (240, 13)
MFCCs before removing silence: (565, 13)
MFCCs after removing silence: (404, 13)
MFCCs before removing silence: (277, 13)
MFCCs after removing silence: (160, 13)
MFCCs before removing silence: (325, 13)
MFCCs after removing silence: (190, 13)
MFCCs before removing silence: (517, 13)
MFCCs after removing silence: (395, 13)
MFCCs before removing sile

MFCCs before removing silence: (499, 13)
MFCCs after removing silence: (498, 13)
MFCCs before removing silence: (467, 13)
MFCCs after removing silence: (306, 13)
MFCCs before removing silence: (957, 13)
MFCCs after removing silence: (939, 13)
MFCCs before removing silence: (726, 13)
MFCCs after removing silence: (616, 13)
MFCCs before removing silence: (561, 13)
MFCCs after removing silence: (452, 13)
MFCCs before removing silence: (359, 13)
MFCCs after removing silence: (299, 13)
MFCCs before removing silence: (298, 13)
MFCCs after removing silence: (262, 13)
MFCCs before removing silence: (273, 13)
MFCCs after removing silence: (211, 13)
MFCCs before removing silence: (633, 13)
MFCCs after removing silence: (433, 13)
MFCCs before removing silence: (647, 13)
MFCCs after removing silence: (462, 13)
MFCCs before removing silence: (1353, 13)
MFCCs after removing silence: (1139, 13)
MFCCs before removing silence: (1115, 13)
MFCCs after removing silence: (1066, 13)
MFCCs before removing si

## -----> We can see here that the size of the mfcc features has decreased after removing the frames ot silence

# Splitting into teest and train sets according to gender

In [8]:
train_mfccs, test_mfccs = train_test_split(r'C:\Users\ASUS ROG STRIX\Desktop\Projet\Langues\mfcc\arabic')

Train male MFCCs shape: (84,)
Test male MFCCs shape: (43,)
Train female MFCCs shape: (90,)
Test female MFCCs shape: (45,)
Train MFCCs shape: (174,)
Test MFCCs shape: (88,)


  print(f"Train MFCCs shape: {np.array(train_mfccs).shape}")
  print(f"Test MFCCs shape: {np.array(test_mfccs).shape}")


# Stacking vertically the train and test MFCC features so that we can fit the gmm models

In [10]:
#stack vertically the train MFCC features 
mfcc_train = []
for train_mfcc in train_mfccs:
    mfcc_train.append(train_mfcc)
mfcc_train = np.concatenate(mfcc_train, axis=0)

#stack vertically the test MFCC features 
mfcc_test = []
for test_mfcc in test_mfccs:
    mfcc_test.append(test_mfcc)
mfcc_test = np.concatenate(mfcc_test, axis=0)


# Saving the test set into a txt file 

In [16]:
#Save the test mfccs in a file
test_mfccs = np.vstack(test_mfccs)
test_mfccs = np.array(test_mfccs, dtype=float)
np.savetxt(r'C:\Users\ASUS ROG STRIX\Desktop\Projet\Langues\gmm\Test\arabicTest', test_mfccs, delimiter=',')



In [11]:
mfcc_train.shape

(171671, 13)

# Training the different Gmm Models 

In [17]:
gmm16 = gmm16(mfcc_train)

In [18]:
gmm32 = gmm32(mfcc_train)

In [19]:
gmm64 = gmm64(mfcc_train)

In [20]:
gmm128= gmm128(mfcc_train)

In [21]:
gmm256= gmm256(mfcc_train)

In [22]:
gmm512= gmm512(mfcc_train)

In [23]:
gmm1024= gmm1024(mfcc_train)

# Evaluate the performance of each GMM model on the test set using the score_samples() function that returns an array containing the log-likelihood of each frame of the mfcc features

In [33]:
scores = []
for model in [gmm16, gmm32, gmm64, gmm128, gmm256, gmm512, gmm1024]:
    score = model.score_samples(mfcc_test)
    scores.append(score)

# Print the scores
print('GMM16 score:', scores[0])
print('GMM32 score:', scores[1])
print('GMM64 score:', scores[2])
print('GMM128 score:', scores[3])
print('GMM256 score:', scores[4])
print('GMM512 score:', scores[5])
print('GMM1024 score:', scores[6])



GMM16 score: [ 74.38788144  74.38788144  74.38788144 ... -53.0198821  -52.35035207
 -53.71331347]
GMM32 score: [ 74.38788144  74.38788144  74.38788144 ... -51.54623094 -50.37937909
 -49.50712375]
GMM64 score: [ 74.38788144  74.38788144  74.38788144 ... -50.44499313 -49.61367804
 -48.61092702]
GMM128 score: [ 74.38788144  74.38788144  74.38788144 ... -50.8258868  -49.52303145
 -49.38540205]
GMM256 score: [ 74.38788144  74.38788144  74.38788144 ... -50.51583039 -50.00529989
 -51.511581  ]
GMM512 score: [ 74.38788144  74.38788144  74.38788144 ... -47.39984341 -46.38587491
 -51.8235976 ]
GMM1024 score: [ 74.38788144  74.38788144  74.38788144 ... -45.89844145 -44.95651933
 -49.60262791]


# As we can see in the following lines our mfcc_test set contains 121202 frames so we expect the same size of the scores array 

In [25]:
mfcc_test.shape

(121202, 13)

# Indeed the size of the scores array is the same 

In [26]:
 scores[0].shape

(121202,)

# In order to compare between the different GMM Models we need to calculate the score for the hole test set and we can do that by calculating the mean of the individual scores

In [34]:
#calculationg the score of the hole test set
print('GMM16 score:', scores[0].mean())
print('GMM32 score:', scores[1].mean())
print('GMM64 score:', scores[2].mean())
print('GMM128 score:', scores[3].mean())
print('GMM256 score:', scores[4].mean())
print('GMM512 score:', scores[5].mean())
print('GMM1024 score:', scores[6].mean())

GMM16 score: -49.0838183651333
GMM32 score: -48.79275546945979
GMM64 score: -48.6336561571651
GMM128 score: -48.43012194174043
GMM256 score: -48.38550958441413
GMM512 score: -48.39897400435833
GMM1024 score: -48.503701691179806


### From the results above we can see that the best score (the closest one to 0) is given by the model using 256 gaussians 