## Imports

In [1]:
!pip install python_speech_features

Collecting python_speech_features
  Downloading https://files.pythonhosted.org/packages/ff/d1/94c59e20a2631985fbd2124c45177abaa9e0a4eee8ba8a305aa26fc02a8e/python_speech_features-0.6.tar.gz
Building wheels for collected packages: python-speech-features
  Building wheel for python-speech-features (setup.py) ... [?25l[?25hdone
  Created wheel for python-speech-features: filename=python_speech_features-0.6-cp36-none-any.whl size=5887 sha256=2fd7eba659e1d9c5165efc2a4c83c5d9c26ce928c672ef6b15bac42a5c277449
  Stored in directory: /root/.cache/pip/wheels/3c/42/7c/f60e9d1b40015cd69b213ad90f7c18a9264cd745b9888134be
Successfully built python-speech-features
Installing collected packages: python-speech-features
Successfully installed python-speech-features-0.6


In [2]:
import python_speech_features as mfcc
from scipy.io.wavfile import read
import numpy as np
import pandas as pd
import os
import time

from matplotlib import pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Dataset

In [4]:
!unzip "/content/drive/MyDrive/Colab Notebooks/ML-Project/LUMS_FALL2020_PROJECT_DATA.zip"

Archive:  /content/drive/MyDrive/Colab Notebooks/ML-Project/LUMS_FALL2020_PROJECT_DATA.zip
   creating: Gender_Recognition/Test/
   creating: Gender_Recognition/Test/SPK083_M/
  inflating: Gender_Recognition/Test/SPK083_M/1.wav  
  inflating: Gender_Recognition/Test/SPK083_M/10.wav  
  inflating: Gender_Recognition/Test/SPK083_M/2.wav  
  inflating: Gender_Recognition/Test/SPK083_M/3.wav  
  inflating: Gender_Recognition/Test/SPK083_M/4.wav  
  inflating: Gender_Recognition/Test/SPK083_M/5.wav  
  inflating: Gender_Recognition/Test/SPK083_M/6.wav  
  inflating: Gender_Recognition/Test/SPK083_M/7.wav  
  inflating: Gender_Recognition/Test/SPK083_M/8.wav  
  inflating: Gender_Recognition/Test/SPK083_M/9.wav  
   creating: Gender_Recognition/Test/SPK084_M/
  inflating: Gender_Recognition/Test/SPK084_M/1.wav  
  inflating: Gender_Recognition/Test/SPK084_M/10.wav  
  inflating: Gender_Recognition/Test/SPK084_M/2.wav  
  inflating: Gender_Recognition/Test/SPK084_M/3.wav  
  inflating: Gender

## Data Reading Function

In [5]:
def get_MFCC(audio, sr):
    features = mfcc.mfcc(audio, sr, 0.025, 0.01, 13, appendEnergy = True)
    return np.mean(features, axis=0)

In [6]:
def dataset_SR(path):
  speaker=os.listdir(path)
  output=np.array([]);
  for a,s in enumerate(speaker):
    speaker_id=s
    # print(speaker_id)
    path_to_speaker=os.path.join(path,speaker_id)
    speaker_wav_files=os.listdir(path_to_speaker)
    for b,wav_files in enumerate(speaker_wav_files):
      wav_path=os.path.join(path_to_speaker,wav_files)
      sr, audio = read(wav_path)
      features = get_MFCC(audio, sr)
      # print(speaker_id)
      features=np.append(features,speaker_id)
      features=np.reshape(features,(1,features.shape[0]))
      if a==0 and b==0:
        # first iteration
        output= features
        continue
      output=np.append(output,features,axis=0)
  return output

In [7]:
def dataset_GR(path):
  speaker=os.listdir(path)
  output=np.array([]);
  for a,s in enumerate(speaker):
    speaker_id=s
    # print(speaker_id)
    path_to_speaker=os.path.join(path,speaker_id)
    speaker_wav_files=os.listdir(path_to_speaker)
    for b,wav_files in enumerate(speaker_wav_files):
      wav_path=os.path.join(path_to_speaker,wav_files)
      sr, audio = read(wav_path)
      features = get_MFCC(audio, sr)
      features=np.append(features,speaker_id[-1])
      features=np.reshape(features,(1,features.shape[0]))
      if a==0 and b==0:
        # first iteration
        output= features
        continue
      output=np.append(output,features,axis=0)
  return output

## Preprocession Functions

In [8]:
def normalize(array):
  means=np.mean(array, axis=0)
  std=np.std(array, axis=0)
  array=(array-means)/std
  return array

In [9]:
def preprocessing(df_train, df_valid, df_test):
  # merge train and validation data
  df_train = pd.concat([df_train, df_valid])
  # seperate out labels
  Y_train=df_train[13]
  Y_test=df_test[13]
  # seperate out features
  X_train=df_train.drop(columns=[13])
  X_test=df_test.drop(columns=[13])
  # convert to numeric
  X_train = X_train.apply(pd.to_numeric)
  X_test = X_test.apply(pd.to_numeric)
  # normalize
  normalised_train=normalize(X_train)
  normalised_test=normalize(X_test)

  return normalised_train, Y_train, normalised_test, Y_test

## Printing

In [21]:
def printing(pred, true):
       
        print("Accuracy: ",accuracy_score(pred,true))
        
        print("Classification report: ")
        print(classification_report(pred, true))

        print("Confusion matrix: ")
        print(confusion_matrix(pred,true))

## Import Models

In [11]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB

## Multi Layer Perceptron

In [23]:
def mlp(X_train, Y_train, X_test, Y_test):
  parameters = {'random_state':[1],
                'activation':['logistic'],
                'solver':['sgd'],
                'max_iter':[5000],
                'learning_rate_init':[0.4,0.1,0.01],
                'hidden_layer_sizes':[(128,64),(64,),(64,32),(32,)],}

  MLP = MLPClassifier()
  #girdi search
  GSC = GridSearchCV(MLP, param_grid=parameters,scoring='f1_macro',cv=3,n_jobs=-1)
  #fitting 
  GSC.fit(X_train,Y_train)
  #getting predictions
  pred = GSC.predict(X_test)
  #pritnting 
  printing(pred, Y_test)

## Linear Support Vector

In [24]:
def lsv(X_train, Y_train, X_test, Y_test, epochs):
  LS = LinearSVC(random_state=0, verbose=1, max_iter=epochs,dual=False)
  #fitting
  LS.fit(X_train,Y_train)
  #getting prediction
  pred = LS.predict(X_test)
  #printing
  printing(pred, Y_test)

## Gaussian Naive Bayes

In [25]:
def gnb(X_train, Y_train, X_test, Y_test):
  GNB = GaussianNB()
  #fitting
  GNB.fit(X_train,Y_train)
  #getting predictions
  pred = GNB.predict(X_test)
  #pritnting
  printing(pred, Y_test)

# Speaker Recognition

In [26]:
train_directory = '/content/Speaker_Recognition/Train'
validation_directory = '/content/Speaker_Recognition/Valid'
test_directory = '/content/Speaker_Recognition/Test'

start = time.time()
df_train=dataset_SR(train_directory)
df_test=dataset_SR(test_directory)
df_valid=dataset_SR(validation_directory)
end = time.time()
print("Time taken: ", end-start)

Time taken:  40.54160284996033


In [27]:
df_train=pd.DataFrame(df_train)
df_test=pd.DataFrame(df_test)
df_valid=pd.DataFrame(df_valid)

X_train, Y_train, X_test, Y_test = preprocessing(df_train, df_valid, df_test)
X_train[:5]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,0.706351,-1.25918,-1.192947,0.344377,0.504414,0.347208,-0.12206,0.108936,-0.089015,-0.511568,0.934145,-1.386556,-0.262353
1,0.800554,-1.004594,-1.188084,0.400683,0.523351,0.40166,0.146293,-0.045304,0.113524,-0.018626,0.903321,-1.446329,-0.584506
2,0.815667,-0.942063,-1.516333,0.528028,0.492324,0.322008,-0.008886,-0.204547,-0.141738,0.147593,0.743696,-1.626888,-0.025419
3,0.541876,-0.767699,-0.699089,0.349104,0.189481,-0.516332,-0.325511,-0.261449,0.737683,-0.478824,0.870766,0.65874,-0.971759
4,0.949246,-0.934397,-1.335288,0.394932,0.343917,0.536746,0.22494,-0.251582,-0.319061,0.072074,0.934603,-1.699131,-0.74034


## Multi Layer Perceptron

In [28]:
mlp(X_train, Y_train, X_test, Y_test)

Accuracy:  0.9612676056338029
Classification report: 
              precision    recall  f1-score   support

    SPK001_M       1.00      1.00      1.00         2
    SPK002_M       1.00      1.00      1.00         2
    SPK003_M       1.00      1.00      1.00         2
    SPK004_M       1.00      1.00      1.00         2
    SPK005_F       1.00      1.00      1.00         2
    SPK006_M       1.00      1.00      1.00         2
    SPK007_M       1.00      1.00      1.00         2
    SPK008_M       1.00      1.00      1.00         2
    SPK009_M       1.00      1.00      1.00         2
    SPK010_M       1.00      1.00      1.00         2
    SPK011_F       1.00      1.00      1.00         2
    SPK012_F       1.00      1.00      1.00         2
    SPK013_M       1.00      1.00      1.00         2
    SPK014_M       1.00      1.00      1.00         2
    SPK015_M       1.00      1.00      1.00         2
    SPK016_F       1.00      1.00      1.00         2
    SPK017_F       1.00    

### Result


*   Accuracy  96 %
*   F1 Score  96 %



## Linear Support Vector

In [29]:
lsv(X_train, Y_train, X_test, Y_test, 2000)

[LibLinear]Accuracy:  0.9366197183098591
Classification report: 
              precision    recall  f1-score   support

    SPK001_M       1.00      1.00      1.00         2
    SPK002_M       1.00      1.00      1.00         2
    SPK003_M       1.00      0.67      0.80         3
    SPK004_M       1.00      1.00      1.00         2
    SPK005_F       1.00      1.00      1.00         2
    SPK006_M       1.00      1.00      1.00         2
    SPK007_M       1.00      0.50      0.67         4
    SPK008_M       1.00      1.00      1.00         2
    SPK009_M       1.00      1.00      1.00         2
    SPK010_M       1.00      0.67      0.80         3
    SPK011_F       1.00      1.00      1.00         2
    SPK012_F       1.00      1.00      1.00         2
    SPK013_M       1.00      1.00      1.00         2
    SPK014_M       1.00      1.00      1.00         2
    SPK015_M       1.00      1.00      1.00         2
    SPK016_F       1.00      1.00      1.00         2
    SPK017_F    

  _warn_prf(average, modifier, msg_start, len(result))


### Result

*   Accuracy  94 %
*   F1 Score  93 %

## Gaussian Naive Bayes

In [31]:
gnb(X_train, Y_train, X_test, Y_test)

Accuracy:  0.926056338028169
Classification report: 
              precision    recall  f1-score   support

    SPK001_M       1.00      1.00      1.00         2
    SPK002_M       1.00      1.00      1.00         2
    SPK003_M       1.00      1.00      1.00         2
    SPK004_M       1.00      1.00      1.00         2
    SPK005_F       1.00      1.00      1.00         2
    SPK006_M       1.00      1.00      1.00         2
    SPK007_M       1.00      1.00      1.00         2
    SPK008_M       1.00      1.00      1.00         2
    SPK009_M       1.00      1.00      1.00         2
    SPK010_M       1.00      1.00      1.00         2
    SPK011_F       1.00      1.00      1.00         2
    SPK012_F       1.00      1.00      1.00         2
    SPK013_M       1.00      1.00      1.00         2
    SPK014_M       1.00      1.00      1.00         2
    SPK015_M       1.00      1.00      1.00         2
    SPK016_F       1.00      1.00      1.00         2
    SPK017_F       1.00     

  _warn_prf(average, modifier, msg_start, len(result))


### Result


*   Accuracy  93 %
*   F1 Score  92 %

# Gender Recognition

In [37]:
train_directory = '/content/Gender_Recognition/Train'
validation_directory = '/content/Gender_Recognition/Valid'
test_directory = '/content/Gender_Recognition/Test'

start = time.time()
df_train=dataset_GR(train_directory)
df_test=dataset_GR(test_directory)
df_valid=dataset_GR(validation_directory)
end = time.time()
print("Time taken: ", end-start)

Time taken:  41.69181847572327


In [38]:
df_train=pd.DataFrame(df_train)
df_test=pd.DataFrame(df_test)
df_valid=pd.DataFrame(df_valid)

X_train, Y_train, X_test, Y_test = preprocessing(df_train, df_valid, df_test)
X_train[:5]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,0.733551,-1.337125,-1.247207,0.381128,0.520135,0.309692,-0.142023,0.13958,-0.121805,-0.51261,0.960405,-1.35266,-0.246407
1,0.704131,-1.356192,-0.947108,0.340502,0.522066,0.213659,0.508412,-0.050513,-0.460753,-0.163316,0.967768,-1.129964,-0.56059
2,0.828374,-1.067164,-1.242303,0.437738,0.539174,0.364807,0.136633,-0.013852,0.077864,-0.03771,0.929068,-1.410477,-0.567978
3,0.843586,-1.000857,-1.573315,0.565769,0.507981,0.284186,-0.024504,-0.172263,-0.17378,0.122426,0.766783,-1.585123,-0.009903
4,0.82316,-1.31533,-1.33135,0.157453,0.380109,0.568063,0.125015,-0.315372,0.051533,0.417568,0.755349,-1.306833,-0.509923


## Multilayer Perceptron

In [39]:
mlp(X_train, Y_train, X_test, Y_test)

Accuracy:  0.8588235294117647
Classification report: 
              precision    recall  f1-score   support

           F       0.68      0.71      0.69        38
           M       0.92      0.90      0.91       132

    accuracy                           0.86       170
   macro avg       0.80      0.81      0.80       170
weighted avg       0.86      0.86      0.86       170

Confusion matrix: 
[[ 27  11]
 [ 13 119]]


### Result

*   Accuracy  86 %
*   F1 Score  80 %

Linear Support Vector

In [40]:
lsv(X_train, Y_train, X_test, Y_test, 2000)

[LibLinear]Accuracy:  0.8235294117647058
Classification report: 
              precision    recall  f1-score   support

           F       0.53      0.66      0.58        32
           M       0.92      0.86      0.89       138

    accuracy                           0.82       170
   macro avg       0.72      0.76      0.74       170
weighted avg       0.84      0.82      0.83       170

Confusion matrix: 
[[ 21  11]
 [ 19 119]]


### Result
*   Accuracy  82 %
*   F1 Score  74 %

Gaussian Naive Bayes

In [41]:
gnb(X_train, Y_train, X_test, Y_test)

Accuracy:  0.8470588235294118
Classification report: 
              precision    recall  f1-score   support

           F       0.72      0.66      0.69        44
           M       0.88      0.91      0.90       126

    accuracy                           0.85       170
   macro avg       0.80      0.79      0.79       170
weighted avg       0.84      0.85      0.84       170

Confusion matrix: 
[[ 29  15]
 [ 11 115]]


### Result
*   Accuracy  85 %
*   F1 Score  79 %