**Importing libraries**

In [26]:
import random
import os
import numpy as np
from scipy.io import wavfile as wav
from scipy.signal import spectrogram
from librosa.feature import melspectrogram, mfcc
# had to uses pip to install librosa
import matplotlib.pyplot as plt
import itertools

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, recall_score

**Defining path** 

In [27]:
path = os.getcwd() + '\\data\\'
print(path)

c:\Users\Emili\DSIM_project\VER\data\


In [28]:
len(os.listdir(path + '\\auth_speaker'))

1428

In [29]:
def load_data_feature_selection(feature_extractor, val_split, train_size=0.8):
  
  # Note: train size must be 0.8 (see preprocessing.ipynb)
  train2 = []
  X_train2 = []
  y_train2 = []

  val = []
  X_val = []
  y_val = []
  
  unauth_split = train_size/(23-22*train_size)

  random.seed(10) # For reproducibility

  
  # Authorized speaker
  track_num = 0
  tracks = os.listdir(path + 'auth_speaker')
  random.shuffle(tracks)
  for track in tracks:
    track_num = track_num + 1
    _, signal = wav.read(path + 'auth_speaker\\' + track)
    if track_num <=np.floor(train_size*len(tracks)):
      if track_num <=np.floor(train_size*len(tracks)*(1-val_split)):
        train2.append((feature_extractor(signal), 'authorized'))
      else:
        val.append((feature_extractor(signal), 'authorized'))
        
  # Impostor speaker
  for speaker in os.listdir(path + 'unauth_speakers'):
    track_num = 0
    tracks = os.listdir(path + 'unauth_speakers\\' + speaker)
    random.shuffle(tracks)
    for track in tracks:
      track_num = track_num + 1
      _, signal = wav.read(path + 'unauth_speakers\\' + speaker + '\\' + track)
      if track_num <=np.floor(unauth_split*len(tracks)):
        if track_num <=np.floor(unauth_split*len(tracks)*(1-val_split)):
          train2.append((feature_extractor(signal), 'impostor'))
        else:
          val.append((feature_extractor(signal), 'impostor'))
          
  random.shuffle(train2)
  random.shuffle(val)
  
  # Separate features and labels
  X_train2 = [row[0] for row in train2]
  y_train2 = [row[1] for row in train2]
  X_val = [row[0] for row in val]
  y_val = [row[1] for row in val]


  # Normalize          
  eps = 0.001
  X_train2 = np.array(X_train2)
  X_train2_mean = X_train2.mean(axis=0)
  X_train2_std = X_train2.std(axis=0)
  X_train2 = (X_train2 - X_train2_mean + eps)/(X_train2_std + eps)
  X_train2 = [row for row in X_train2]
  X_val = [row for row in (np.array(X_val) - X_train2_mean + eps)/(X_train2_std + eps)]


  return X_train2, X_val, y_train2, y_val

## **Features**

**Temporal features**

We are going to use all these features since they are all scalar (-> they don't slow the training down compared to the frequency features)

In [30]:
def energy(input):
    return np.sum((input*1.0)**2, keepdims=True)

In [31]:
def standard_dev(input):
    return np.std(input, keepdims=True)

In [32]:
def zcr (input):
  k=0
  for i in range(0, len(input)-1):
    if input[i]*input[i+1]<0:
      k=k+1

  return np.array(k, ndmin = 1)

**Frequency features**

We are going to apply a selection process for these features since they can be pretty "heavy" computationally-wise

In [33]:
def feats_spectrogram(input, rate = 8000):
  _, _, spec = spectrogram(input, fs = rate)
  out_spec = spec.flatten()
  return out_spec

In [34]:
def feats_melspec(input, rate = 8000):
  input = melspectrogram(y = input*1.0, sr = rate)
  output = input.flatten()
  return output

In [35]:
def feats_mfcc(input, rate = 8000):
  input = mfcc(y = input*1.0, sr = rate)
  output = input.flatten()
  return output

## **Feature selection**

We are going to use the random forest classifier to select the most important features because it is generally a fast and robust method. The performance criteria will be recall on the impostor class since for the verification task the most important thing is that we don't let impostors in (the same criteria will be used for the selection of the classifiers and of the hyperparameters of the classifier).

In [37]:
function_set = {feats_melspec, feats_mfcc, feats_spectrogram}

for combo in range(len(function_set) + 1):
    for subset in itertools.combinations(function_set, combo):
        
        def combo(input):
            return np.concatenate([standard_dev(input),energy(input), zcr(input)] +
                                  [f(input) for f in subset])
        
        X_train2, X_val, y_train2, y_val = load_data_feature_selection(feature_extractor = combo,
                                                                       train_size = 0.8,
                                                                       val_split = 0.3)
        
        
        model = RandomForestClassifier(random_state=10)
        model.fit(X_train2, y_train2)

        predictions = model.predict(X_val)
        

        print('\n\nFrequency features used:', [element.__name__ for element in subset])

        # Accuracy (on train2 set)
        print(f"\nAccuracy on train set: {model.score(X_train2, y_train2)}")


        # Recall on impostor class (on val set)
        val_impostor_recall = recall_score(y_val, predictions, pos_label='impostor')
        print(f"\nRecall on impostor class, val set: {val_impostor_recall}")



Frequency features used: []

Accuracy on train set: 1.0

Recall on impostor class, val set: 0.7130434782608696


Frequency features used: ['feats_melspec']

Accuracy on train set: 1.0

Recall on impostor class, val set: 0.8


Frequency features used: ['feats_spectrogram']

Accuracy on train set: 1.0

Recall on impostor class, val set: 0.7391304347826086


Frequency features used: ['feats_mfcc']

Accuracy on train set: 1.0

Recall on impostor class, val set: 0.8434782608695652


Frequency features used: ['feats_melspec', 'feats_spectrogram']

Accuracy on train set: 1.0

Recall on impostor class, val set: 0.8289855072463768


Frequency features used: ['feats_melspec', 'feats_mfcc']

Accuracy on train set: 1.0

Recall on impostor class, val set: 0.8347826086956521


Frequency features used: ['feats_spectrogram', 'feats_mfcc']

Accuracy on train set: 1.0

Recall on impostor class, val set: 0.8173913043478261


Frequency features used: ['feats_melspec', 'feats_spectrogram', 'feats_mfcc']


This time the best frequency feature seems to be **feats_mfcc**, which is also a shorter feature vector than feats_melspec, used in the identification task

In [36]:
def combo(input):
  return np.concatenate((standard_dev(input),energy(input), zcr(input), feats_mfcc(input)))