In [1]:
import glob
import os
from os.path import join

import librosa
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from tqdm import tqdm

In [2]:
def get_name(path):
    return os.path.splitext(os.path.split(path)[1])[0]

In [3]:
def get_y_frames(X, y):
    y_res = []
    for i in range(len(y)):
        y_res.append(np.ones(X[i].shape[0]) * y[i])

    y_res = np.hstack(y_res)
    return y_res.astype(np.bool)


def extract_male_female(X, y):
    y_frames = get_y_frames(X, y)
    X_frames = np.vstack(X)

    X_male = X_frames[y_frames]
    X_female = X_frames[~y_frames]

    return X_male, X_female


In [4]:
from sklearn.mixture import GaussianMixture

def fit_gmm(X_male, X_female, n_components, cov_type):
    gmm_male = GaussianMixture(n_components=n_components, covariance_type=cov_type, n_init=3, max_iter=1000, reg_covar=1e-3)
    gmm_female = GaussianMixture(n_components=n_components, covariance_type=cov_type, n_init=3, max_iter=1000, reg_covar=1e-3)

    gmm_male.fit(X_male)
    gmm_female.fit(X_female)
    return gmm_male, gmm_female


def predict_gmm(gmm_male, gmm_female, X_test):
    n_frames = X_test[0].shape[0]

    X_frames = np.vstack(X_test)
    log_prob_male_frames = gmm_male.score_samples(X_frames)
    log_prob_female_frames = gmm_female.score_samples(X_frames)

    log_prob_male = np.zeros(len(X_test))
    log_prob_female = np.zeros(len(X_test))

    idx = 0
    for i in range(len(X_test)):
        log_prob_male[i] = log_prob_male_frames[idx:idx + X_test[i].shape[0]].sum()
        log_prob_female[i] = log_prob_female_frames[idx:idx + X_test[i].shape[0]].sum()
        idx += X_test[i].shape[0]

    prediction = log_prob_male > log_prob_female
    return prediction


In [5]:
def fit_predict(mfcc_list, labels_list, test_size=0.33, n_components=20, cov_type='diag', random_state=42):
    X_train, X_test, y_train, y_test = train_test_split(mfcc_list, labels_list, test_size=test_size,
                                                            stratify=labels_list)
    y_train = np.array(y_train) == 'M'
    y_test = np.array(y_test) == 'M'

    X_train_male, X_train_female = extract_male_female(X_train, y_train)
    gmm_male, gmm_female = fit_gmm(X_train_male, X_train_female, n_components, cov_type)
    y_pred = predict_gmm(gmm_male, gmm_female, X_test)

    return y_test, y_pred


def calc_metrics(y_test, y_pred, metrics, metrics_names):
    res = dict()
    for i in range(len(metrics)):
        res[metrics_names[i]] = metrics[i](y_test, y_pred)

    return res

In [6]:
def cross_val(mfcc_list, labels_list, random_state_list, metrics, metrics_names, test_size=0.33, n_components=20, cov_type='spherical'):
    stats = None
    for random_state in random_state_list:
        y_test, y_pred = fit_predict(mfcc_list, labels_list, test_size, n_components, cov_type, random_state)
        if stats is None:
            stats = calc_metrics(y_test, y_pred, metrics, metrics_names)
        else:
            cur_stats = calc_metrics(y_test, y_pred, metrics, metrics_names)
            for metric_name in metrics_names:
                stats[metric_name] += cur_stats[metric_name]

    for key in stats.keys():
        stats[key] /= len(random_state_list)

    return stats

In [7]:
from sklearn.metrics import accuracy_score

def male_acc(y_test, y_pred):
    idx = y_test == 1
    return accuracy_score(y_test[idx], y_pred[idx])


def female_acc(y_test, y_pred):
    idx = y_test == 0
    return accuracy_score(y_test[idx], y_pred[idx])

In [8]:
def cross_val_many(data_path, n_mfcc_many, random_state_list,
        metrics, metrics_names, test_size=0.33, n_components=20, cov_type='spherical'):

    res_stats = dict()
    for n_mfcc in n_mfcc_many:
        mfcc_list = []
        labels_list = []
        for path in tqdm(data_path):
            y, sr = librosa.load(path)
            mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc).T
            mfcc_list.append(mfcc)
            labels_list.append(get_name(path)[0])

        res_stats[n_mfcc] = cross_val(mfcc_list, labels_list, random_state_list, metrics, metrics_names, test_size, n_components, cov_type)
    
    return res_stats

In [9]:
def cross_val_many_datasets(dataset_names, n_mfcc_many, random_state_list,
        metrics, metrics_names, test_size=0.33, n_components=20, cov_type='spherical'):
    
    res_stats = dict()
    for dataset_name in dataset_names:
        paths = sorted(glob.glob('data/' + dataset_name + '/*.wav'))
        res_stats[dataset_name] = cross_val_many(paths, n_mfcc_many, random_state_list,
                            metrics, metrics_names, test_size, n_components, cov_type)

    return res_stats

In [10]:
np.random.seed(42)
random_state_list = np.random.randint(0, 1000000, size=5)

In [11]:
metrics = [accuracy_score, male_acc, female_acc]
metrics_names = ['Accuracy', 'Male Accuracy', 'Female Accuracy']

In [12]:
dataset_names = ['khanty_4', 'SHRUTI_small', 'TIMIT_DR1']

In [13]:
n_mfcc_many = [13, 20, 26]

In [14]:
stats = cross_val_many_datasets(dataset_names, n_mfcc_many, random_state_list, metrics, metrics_names, test_size=0.33, n_components=15)

100%|██████████| 520/520 [01:24<00:00,  6.14it/s]
100%|██████████| 520/520 [01:19<00:00,  6.55it/s]
100%|██████████| 520/520 [01:17<00:00,  6.72it/s]
100%|██████████| 539/539 [01:12<00:00,  7.42it/s]
100%|██████████| 539/539 [01:18<00:00,  6.91it/s]
100%|██████████| 539/539 [01:15<00:00,  7.17it/s]
100%|██████████| 490/490 [00:29<00:00, 16.66it/s]
100%|██████████| 490/490 [00:30<00:00, 15.95it/s]
100%|██████████| 490/490 [00:30<00:00, 15.85it/s]


In [21]:
for dataset in stats.keys():
    print('Results for dataset ' + dataset)
    for dim in stats[dataset].keys():
        print('  Num of mfcc features: ' + str(dim))
        for metric in stats[dataset][dim].keys():
            print('    %s: %.4g' %(metric, stats[dataset][dim][metric]))

Results for dataset khanty_4
  Num of mfcc features: 13
    Accuracy: 0.9465
    Male Accuracy: 0.9143
    Female Accuracy: 0.9686
  Num of mfcc features: 20
    Accuracy: 0.9244
    Male Accuracy: 0.8971
    Female Accuracy: 0.9431
  Num of mfcc features: 26
    Accuracy: 0.9256
    Male Accuracy: 0.9
    Female Accuracy: 0.9431
Results for dataset SHRUTI_small
  Num of mfcc features: 13
    Accuracy: 0.9652
    Male Accuracy: 0.9372
    Female Accuracy: 0.9913
  Num of mfcc features: 20
    Accuracy: 0.9685
    Male Accuracy: 0.9465
    Female Accuracy: 0.9891
  Num of mfcc features: 26
    Accuracy: 0.9652
    Male Accuracy: 0.9465
    Female Accuracy: 0.9826
Results for dataset TIMIT_DR1
  Num of mfcc features: 13
    Accuracy: 0.9963
    Male Accuracy: 0.9961
    Female Accuracy: 0.9967
  Num of mfcc features: 20
    Accuracy: 0.9963
    Male Accuracy: 0.998
    Female Accuracy: 0.9933
  Num of mfcc features: 26
    Accuracy: 0.9938
    Male Accuracy: 0.9961
    Female Accuracy: 0