In [1]:
import warnings
for warn in [UserWarning, FutureWarning]: warnings.filterwarnings("ignore", category = warn)

import os
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch import hub
from torch.utils.data import Dataset, DataLoader
import torchaudio

import numpy as np
import pandas as pd

from sklearn.metrics import f1_score, recall_score, precision_score, balanced_accuracy_score, accuracy_score
from sklearn.model_selection import train_test_split

import scipy

from tqdm import tqdm

from datasets import load_dataset, Dataset, Audio
import librosa

from flaml import AutoML

In [2]:
SEED = 1984

np.random.seed(SEED)
torch.manual_seed(SEED)

gen = torch.Generator()
gen.manual_seed(SEED)

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
SR = 8_000
SEQUENCE_LENGTH = 300 * SR
MFCC = 64
print(f"It's {DEVICE} time!!!")

It's cuda time!!!


In [3]:
DATA_DIR = os.path.join('..', 'data')
VOICES_DIR = os.path.join(DATA_DIR, 'Voices_wav')
APHASIA_DIR = os.path.join(VOICES_DIR, 'Aphasia')
NORM_DIR = os.path.join(VOICES_DIR, 'Norm')

In [4]:
meta_data_aphasia = pd.read_excel(os.path.join(DATA_DIR, "Demo RAT discourse production.xlsx"), sheet_name='пациенты')
meta_data_norm = pd.read_excel(os.path.join(DATA_DIR, "Demo RAT discourse production.xlsx"), sheet_name='норма')

meta_data_norm.drop(meta_data_norm.index[-1], axis=0, inplace=True)

In [5]:
meta_data_aphasia.rename(columns={'Stroked hemisphere (L/R/LR)': 'Stroked hemisphere',
                          'Aphasia/Norm (A/N), A includes all patients, even with just dysarthria': 'Aphasia/Norm', 
                          'Aphasia_Severity \n0 - no aphasia\n1 - very mild\n2 - mild\n3 - mild-moderate\n4 - moderate\n5 - moderate-severe\n6 - severe\n7 - very severe': 'Aphasia_Severity',
                          'Aphasia_Types\n1 - efferent motor\n2 - afferent motor\n3 - complex motor\n4 - dynamic\n5 - acoustic-mnestic\n6 - sensory\n7 - semantic\n8 - other': 'Aphasia_Types',
                          'Dominant_Aphasia\n1 - efferent motor\n2 - afferent motor\n3 - complex motor\n4 - dynamic\n5 - acoustic-mnestic\n6 - sensory\n7 - semantic\n8 - other': 'Dominant_Aphasia',
                          'Dysarthria (1/0)': 'Dysarthria',
                          'Dysarthria_Severity \n0 - no dysarthria\n1 - very mild\n2 - mild\n3 - mild-moderate\n4 - moderate\n5 - moderate-severe\n6 - severe\n7 - very severe': 'Dysarthria_Severity',
                          }, inplace=True)

In [6]:
all_data = {"filename": [], "label": []}

for filename in os.listdir(APHASIA_DIR):
    all_data["filename"].append(os.path.join(APHASIA_DIR, filename))
    all_data["label"].append(1)
    
for filename in os.listdir(NORM_DIR):
    all_data["filename"].append(os.path.join(NORM_DIR, filename))
    all_data["label"].append(0)
    
all_data = pd.DataFrame(all_data)

In [7]:
mfcc_class = torchaudio.transforms.MFCC(sample_rate=SR, n_mfcc=MFCC, log_mels=True, melkwargs={"n_fft": 20_000, "win_length": 10_000, "hop_length": 5_000, "n_mels": 200})

In [8]:
def preprocess_function_mfcc(path):
    x, sr = librosa.load(path, sr=SR)
    x = x[..., :SEQUENCE_LENGTH]
    x = np.pad(x, (0, SEQUENCE_LENGTH - x.shape[0]), mode='constant')
    mfcc = mfcc_class(torch.Tensor(x)).numpy().flatten().squeeze()
    # print(mfcc.shape)
    return mfcc

In [9]:
all_data["mfcc"] = all_data["filename"].apply(preprocess_function_mfcc)

In [10]:
train_data, test_data = train_test_split(all_data, shuffle=True, stratify=all_data["label"], random_state=SEED, test_size=0.2)

In [11]:
train_data, val_data = train_test_split(train_data, shuffle=True, stratify=train_data["label"], random_state=SEED, test_size=0.2)

In [12]:
def custom_balanced_accuracy(
    X_val,
    y_val,
    estimator,
    labels,
    X_train,
    y_train,
    weight_val=None,
    weight_train=None,
    *args,
):
    start = time.time()
    y_pred = estimator.predict_proba(X_val)
    pred_time = (time.time() - start) / len(X_val)
    val_acc = balanced_accuracy_score(y_val, np.argmax(y_pred, axis=-1), sample_weight=weight_val)
    return 1 - val_acc, {
        "val_acc": val_acc,
        "pred_time": pred_time,
    }

In [13]:
pre_automl = AutoML()
pre_automl.fit(np.vstack(train_data["mfcc"]), train_data["label"], task="classification", time_budget=300, X_val=np.vstack(val_data["mfcc"]), y_val=val_data["label"], metric=custom_balanced_accuracy, seed=SEED, estimator_list=['lgbm', 'xgboost', 'xgb_limitdepth', 'rf', 'extra_tree', 'catboost'])

automl = AutoML()
automl.fit(np.vstack(train_data["mfcc"]), train_data["label"], task="classification", time_budget=1200, X_val=np.vstack(val_data["mfcc"]), y_val=val_data["label"], metric=custom_balanced_accuracy, seed=SEED, estimator_list=['lgbm', 'xgboost', 'xgb_limitdepth', 'rf', 'extra_tree', 'catboost'], starting_points=pre_automl.best_config_per_estimator)

[flaml.automl.logger: 02-15 12:10:07] {1728} INFO - task = classification
[flaml.automl.logger: 02-15 12:10:07] {1736} INFO - Data split method: stratified
[flaml.automl.logger: 02-15 12:10:07] {1739} INFO - Evaluation method: holdout
[flaml.automl.logger: 02-15 12:10:07] {1838} INFO - Minimizing error metric: customized metric
[flaml.automl.logger: 02-15 12:10:07] {1955} INFO - List of ML learners in AutoML Run: ['lgbm', 'xgboost', 'xgb_limitdepth', 'catboost', 'rf', 'extra_tree', 'catboost']
[flaml.automl.logger: 02-15 12:10:07] {2258} INFO - iteration 0, current learner lgbm
[flaml.automl.logger: 02-15 12:10:10] {2393} INFO - Estimated sufficient time budget=30609s. Estimated necessary time budget=308s.
[flaml.automl.logger: 02-15 12:10:10] {2442} INFO -  at 3.1s,	estimator lgbm's best error=0.5000,	best estimator lgbm's best error=0.5000
[flaml.automl.logger: 02-15 12:10:10] {2258} INFO - iteration 1, current learner lgbm
[flaml.automl.logger: 02-15 12:10:14] {2442} INFO -  at 6.3s

In [14]:
preds = automl.predict(np.vstack(test_data["mfcc"])) 

print(f"Accuracy: {accuracy_score(test_data["label"], preds):.3f}")
print(f"Precision: {precision_score(test_data["label"], preds):.3f}")
print(f"Recall: {recall_score(test_data["label"], preds):.3f}")
print(f"F1 Score: {f1_score(test_data["label"], preds):.3f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(test_data["label"], preds):.3f}")

Accuracy: 0.889
Precision: 0.913
Recall: 0.943
F1 Score: 0.927
Balanced Accuracy: 0.834


In [15]:
preds

array([1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0,
       1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1,
       0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0,
       1, 0, 1, 1, 1, 0, 1, 0])

In [16]:
print(automl.best_estimator)

xgboost


In [17]:
print(automl.best_config)

{'n_estimators': 14, 'max_leaves': 6, 'min_child_weight': 0.020084476498521228, 'learning_rate': 0.9322339457205505, 'subsample': 0.6985476489319331, 'colsample_bylevel': 0.7881305523753805, 'colsample_bytree': 0.8613798823867586, 'reg_alpha': 0.001814934398047496, 'reg_lambda': 1.1573797309448801}


In [18]:
automl.model.estimator

In [19]:
automl = AutoML()
automl.fit(np.vstack(train_data["mfcc"]), train_data["label"], task="classification", time_budget=1200,
           X_val=np.vstack(val_data["mfcc"]), y_val=val_data["label"], metric=custom_balanced_accuracy, seed=SEED,
           estimator_list=['lgbm', 'xgboost', 'xgb_limitdepth', 'rf', 'extra_tree', 'catboost'])
preds = automl.predict(np.vstack(test_data["mfcc"]))

[flaml.automl.logger: 02-15 12:35:08] {1728} INFO - task = classification
[flaml.automl.logger: 02-15 12:35:08] {1736} INFO - Data split method: stratified
[flaml.automl.logger: 02-15 12:35:08] {1739} INFO - Evaluation method: holdout
[flaml.automl.logger: 02-15 12:35:08] {1838} INFO - Minimizing error metric: customized metric
[flaml.automl.logger: 02-15 12:35:08] {1955} INFO - List of ML learners in AutoML Run: ['lgbm', 'xgboost', 'xgb_limitdepth', 'catboost', 'rf', 'extra_tree', 'catboost']
[flaml.automl.logger: 02-15 12:35:08] {2258} INFO - iteration 0, current learner lgbm
[flaml.automl.logger: 02-15 12:35:09] {2393} INFO - Estimated sufficient time budget=14264s. Estimated necessary time budget=143s.
[flaml.automl.logger: 02-15 12:35:09] {2442} INFO -  at 1.4s,	estimator lgbm's best error=0.5000,	best estimator lgbm's best error=0.5000
[flaml.automl.logger: 02-15 12:35:09] {2258} INFO - iteration 1, current learner lgbm
[flaml.automl.logger: 02-15 12:35:13] {2442} INFO -  at 4.9s

In [20]:
print(f"Accuracy: {accuracy_score(test_data["label"], preds):.3f}")
print(f"Precision: {precision_score(test_data["label"], preds):.3f}")
print(f"Recall: {recall_score(test_data["label"], preds):.3f}")
print(f"F1 Score: {f1_score(test_data["label"], preds):.3f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(test_data["label"], preds):.3f}")

Accuracy: 0.889
Precision: 0.913
Recall: 0.943
F1 Score: 0.927
Balanced Accuracy: 0.834


In [21]:
preds

array([1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0,
       1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1,
       0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0,
       1, 0, 1, 1, 1, 0, 1, 0])

In [22]:
print(automl.best_estimator)
print(automl.best_config)
automl.model.estimator

xgboost
{'n_estimators': 14, 'max_leaves': 6, 'min_child_weight': 0.020084476498521228, 'learning_rate': 0.9322339457205505, 'subsample': 0.6985476489319331, 'colsample_bylevel': 0.7881305523753805, 'colsample_bytree': 0.8613798823867586, 'reg_alpha': 0.001814934398047496, 'reg_lambda': 1.1573797309448801}
