In [7]:
import glob
import os
from os.path import join

import librosa
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from tqdm import tqdm

In [8]:
from google_drive_downloader import GoogleDriveDownloader as gdd

In [9]:
gdd.download_file_from_google_drive(
    file_id='1Gb0lEZUxqYBHuDaDpLkwd-6x420zdWvE',
    dest_path='data/khanty_4.zip'
)

Downloading 1Gb0lEZUxqYBHuDaDpLkwd-6x420zdWvE into data/khanty_4.zip... Done.


In [None]:
!unzip data/khanty_4.zip -d khanty_4

In [11]:
def get_name(path):
    return os.path.splitext(os.path.split(path)[1])[0]

In [17]:
paths = sorted(glob.glob('khanty_4/*.wav'))

In [18]:
mfcc_list = []
labels_list = []
for path in tqdm(paths):
    y, sr = librosa.load(path)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=26).T
    mfcc_list.append(mfcc)
    labels_list.append(get_name(path)[0])

100%|██████████| 520/520 [01:16<00:00,  6.81it/s]


In [279]:
def get_y_frames(X, y):
    n_frames = X[0].shape[0]
    y_res = np.vstack([y] * n_frames).T.flatten()
    return y_res


def extract_male_female(X, y):
    y_frames = get_y_frames(X, y)
    X_frames = np.vstack(X)

    X_male = X_frames[y_frames]
    X_female = X_frames[~y_frames]

    return X_male, X_female


In [294]:
from sklearn.mixture import GaussianMixture

def fit_gmm(X_male, X_female, n_components, cov_type):
    gmm_male = GaussianMixture(n_components=n_components, covariance_type=cov_type, n_init=3, max_iter=1000, reg_covar=1e-3)
    gmm_female = GaussianMixture(n_components=n_components, covariance_type=cov_type, n_init=3, max_iter=1000, reg_covar=1e-3)

    gmm_male.fit(X_male)
    gmm_female.fit(X_female)
    return gmm_male, gmm_female


def predict_gmm(gmm_male, gmm_female, X_test):
    n_frames = X_test[0].shape[0]

    X_frames = np.vstack(X_test)
    log_prob_male = gmm_male.score_samples(X_frames)
    log_prob_female = gmm_female.score_samples(X_frames)

    log_prob_male = log_prob_male.reshape(-1, n_frames).sum(axis=1)
    log_prob_female = log_prob_female.reshape(-1, n_frames).sum(axis=1)

    prediction = log_prob_male > log_prob_female
    return prediction


In [295]:
def fit_predict(mfcc_list, labels_list, test_size=0.33, n_components=20, cov_type='diag', random_state=42):
    X_train, X_test, y_train, y_test = train_test_split(mfcc_list, labels_list, test_size=test_size,
                                                            stratify=labels_list)
    y_train = np.array(y_train) == 'M'
    y_test = np.array(y_test) == 'M'

    X_train_male, X_train_female = extract_male_female(X_train, y_train)
    gmm_male, gmm_female = fit_gmm(X_train_male, X_train_female, n_components, cov_type)
    y_pred = predict_gmm(gmm_male, gmm_female, X_test)

    return y_test, y_pred


def calc_metrics(y_test, y_pred, metrics, metrics_names):
    res = dict()
    for i in range(len(metrics)):
        res[metrics_names[i]] = metrics[i](y_test, y_pred)

    return res

In [305]:
def cross_val(mfcc_list, labels_list, random_state_list, metrics, metrics_names, test_size=0.33, n_components=17, cov_type='diag'):
    stats = None
    for random_state in random_state_list:
        y_test, y_pred = fit_predict(mfcc_list, labels_list, test_size, n_components, cov_type, random_state)
        if stats is None:
            stats = calc_metrics(y_test, y_pred, metrics, metrics_names)
        else:
            cur_stats = calc_metrics(y_test, y_pred, metrics, metrics_names)
            for metric_name in metrics_names:
                stats[metric_name] += cur_stats[metric_name]

    for key in stats.keys():
        stats[key] /= len(random_state_list)

    return stats

In [306]:
np.random.seed(42)
random_state_list = np.random.randint(0, 1000000, size=1)

In [307]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

metrics = [accuracy_score, precision_score, recall_score, f1_score]
metrics_names = ['Accuracy', 'Recision', 'Precall', 'F-measure']

In [308]:
stats = cross_val(mfcc_list, labels_list, random_state_list, metrics, metrics_names)

In [309]:
stats

{'Accuracy': 0.9534883720930233,
 'Recision': 0.9428571428571428,
 'Precall': 0.9428571428571428,
 'F-measure': 0.9428571428571428}