In [1]:
import warnings

for warn in [UserWarning, FutureWarning]: warnings.filterwarnings("ignore", category = warn)

import os
import time
import torch
import torchaudio

import numpy as np
import pandas as pd

from sklearn.metrics import f1_score, recall_score, precision_score, balanced_accuracy_score, accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

import scipy

from tqdm import tqdm
import librosa

from src.utils import *

from flaml import AutoML

In [2]:
SEED = 1984

np.random.seed(SEED)
torch.manual_seed(SEED)

gen = torch.Generator()
gen.manual_seed(SEED)

SR = 16_000

In [3]:
DATA_DIR = os.path.join('..', 'data')
VOICES_DIR = os.path.join(DATA_DIR, 'Voices_wav')
APHASIA_DIR = os.path.join(VOICES_DIR, 'Aphasia')
NORM_DIR = os.path.join(VOICES_DIR, 'Norm')

In [4]:
train_data = np.load(os.path.join(DATA_DIR, 'train_data_careful_whisper.npy'), allow_pickle=True)
val_data = np.load(os.path.join(DATA_DIR, 'val_data_careful_whisper.npy'), allow_pickle=True)
test_data = np.load(os.path.join(DATA_DIR, 'test_data_careful_whisper.npy'), allow_pickle=True)

In [5]:
train_features, train_target = train_data[..., :-1], train_data[..., -1]
val_features, val_target = val_data[..., :-1], val_data[..., -1]
test_features, test_target = test_data[..., :-1], test_data[..., -1]

In [6]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
train_features = scaler.fit_transform(train_features)
val_features = scaler.transform(val_features)
test_features = scaler.transform(test_features)

In [7]:
train_features.shape, train_target.shape, val_features.shape, val_target.shape, test_features.shape, test_target.shape

((472, 23), (472,), (159, 23), (159,), (172, 23), (172,))

In [8]:
pre_automl = AutoML()
pre_automl.fit(train_features, train_target, task="classification", time_budget=150, X_val=val_features, y_val=val_target, seed=SEED, estimator_list=['lgbm', 'xgboost', 'xgb_limitdepth', 'rf', 'extra_tree', 'catboost'], verbose=False)

automl = AutoML()
automl.fit(train_features, train_target, task="classification", time_budget=800, X_val=val_features, y_val=val_target, seed=SEED, estimator_list=['lgbm', 'xgboost', 'xgb_limitdepth', 'rf', 'extra_tree', 'catboost'], starting_points=pre_automl.best_config_per_estimator, verbose=False)

In [9]:
preds = automl.predict(test_features) 

print(classification_report(test_target, preds))

              precision    recall  f1-score   support

         0.0       0.73      0.71      0.72        42
         1.0       0.91      0.92      0.91       130

    accuracy                           0.87       172
   macro avg       0.82      0.81      0.82       172
weighted avg       0.87      0.87      0.87       172



In [10]:
train_data = np.load(os.path.join(DATA_DIR, 'train_data_careful_whisper_mc.npy'), allow_pickle=True)
val_data = np.load(os.path.join(DATA_DIR, 'val_data_careful_whisper_mc.npy'), allow_pickle=True)
test_data = np.load(os.path.join(DATA_DIR, 'test_data_careful_whisper_mc.npy'), allow_pickle=True)
train_features, train_target = train_data[..., :-1], train_data[..., -1]
val_features, val_target = val_data[..., :-1], val_data[..., -1]
test_features, test_target = test_data[..., :-1], test_data[..., -1]

In [11]:
scaler = StandardScaler()
train_features = scaler.fit_transform(train_features)
val_features = scaler.transform(val_features)
test_features = scaler.transform(test_features)

In [12]:
pre_automl = AutoML()
pre_automl.fit(train_features, train_target, task="classification", time_budget=150, X_val=val_features, y_val=val_target, seed=SEED, estimator_list=['lgbm', 'xgboost', 'xgb_limitdepth', 'rf', 'extra_tree', 'catboost'], verbose=False)

automl = AutoML()
automl.fit(train_features, train_target, task="classification", time_budget=800, X_val=val_features, y_val=val_target, seed=SEED, estimator_list=['lgbm', 'xgboost', 'xgb_limitdepth', 'rf', 'extra_tree', 'catboost'], starting_points=pre_automl.best_config_per_estimator, verbose=False)

In [13]:
preds = automl.predict(test_features) 

print(classification_report(test_target, preds))

              precision    recall  f1-score   support

         0.0       0.74      0.83      0.79        42
         1.0       0.27      0.46      0.34        26
         2.0       0.43      0.28      0.34        36
         3.0       0.36      0.30      0.33        30
         4.0       0.00      0.00      0.00         6

    accuracy                           0.47       140
   macro avg       0.36      0.37      0.36       140
weighted avg       0.46      0.47      0.46       140

