In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="7"
from cuml.ensemble import RandomForestClassifier
from cuml import LogisticRegression
from sklearn.ensemble import VotingClassifier
from cuml.common.device_selection import set_global_device_type, get_global_device_type
from model.tuning import *
from dataclasses import dataclass, asdict
from itertools import product
from tqdm import tqdm

set_global_device_type('gpu')
print('new device type:', get_global_device_type())

new device type: DeviceType.device


In [2]:
def ensemble_classifier(max_depth, n_estimators, C, l1_ratio, penalty, rfc_weight, n_bins: int = 128):
    # define classifiers
    rfc = RandomForestClassifier(max_depth=max_depth, n_estimators=n_estimators, n_bins=n_bins)
    lr = LogisticRegression(C=C, l1_ratio=l1_ratio, penalty=penalty)
    
    # define voting ensemble
    voting_model = VotingClassifier(
        estimators=[
            ('rfc', rfc), 
            ('lr', lr),
        ],
        voting='soft',
        weights=[rfc_weight, 1.0-rfc_weight],
    )
    return voting_model

@dataclass
class AblationEvalObj:
    token_length: bool
    patient_demographics: bool
    mds_updrs: bool
    moca: bool
    acc: float
    micro_f1: float
    macro_f1: float
    dict = asdict

In [3]:
# define model to optimize
model_function = ensemble_classifier
model_name = "voting_ensemble"

# define hyperparameter ranges to search
# we aren't optimizing hyperparams so we'll leave this blank
hparam_range_dict = {}

# define optimizer
hparam_optimizer = HyperParamOptimizer(
    model_function=model_function, 
    model_name=model_name, 
    hparam_range_dict=hparam_range_dict
)

In [4]:
# load data into optimizer
train_val_path = "./data/fallreports_2023-9-21_train.csv"
test_path = "./data/fallreports_2023-9-21_test.csv"
hparam_optimizer.load_data(
    train_val_path=train_val_path, 
    test_path=test_path
)

Data loaded...


In [5]:
# set model parameters
ngram_list = [1, 2]
target_feature = 'fog_q_class'
vector_type = 'tf-idf'
n_features = 250
seed = 13
k = 5
constant_dict = {}

In [6]:
ensemble_best_param_dict = {
    'C': 2045.7891312130546,
    'l1_ratio': 0.049833394439413,
    'max_depth': 48.12718850106587,
    'n_estimators': 97.06033604146855,
    'penalty': 'elasticnet',
    'rfc_weight': 0.855137554079295
}

# set sample size to -1 to bootstrap results
sample_size = -1

n_replications = 3

augment_cat_list = ['token_length', 'patient_demographics', 'mds_updrs', 'moca']
level_list = [True, False]

eval_list = []

# iterate over all factor combinations and repeat all n_replications times
for curr_level_list in tqdm(list(product(level_list, repeat=len(augment_cat_list)))*n_replications):
    # build current augmentation dict
    curr_aug_dict = dict(zip(augment_cat_list, curr_level_list))
    
    print(f"\nAugmentations: {curr_aug_dict}")
    
    # update model params
    hparam_optimizer.set_params(
        ngram_list=ngram_list, 
        target_feature=target_feature, 
        vector_type=vector_type, 
        n_features=n_features, 
        augment_dict=curr_aug_dict, 
        seed=seed,
        k=k,
        constant_dict=constant_dict
    )
    
    # train and evaluate model
    acc, micro_f1, macro_f1 = hparam_optimizer.objective_function(
        ensemble_best_param_dict, 
        sample_size=sample_size, 
        log=False, 
        return_all=True,
        rfc=True
    )
    eval_list.append(
        AblationEvalObj(
            token_length=curr_aug_dict['token_length'],
            patient_demographics=curr_aug_dict['token_length'],
            mds_updrs=curr_aug_dict['mds_updrs'],
            moca=curr_aug_dict['moca'], 
            acc=acc, 
            micro_f1=micro_f1, 
            macro_f1=macro_f1
        )
    )

  0%|          | 0/48 [00:00<?, ?it/s]


Augmentations: {'token_length': True, 'patient_demographics': True, 'mds_updrs': True, 'moca': True}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:37:53.015226] QWL-QN: max iterations reached
[W] [15:37:53.016017] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:37:57.727871] QWL-QN: max iterations reached
[W] [15:37:57.728052] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:38:00.910103] QWL-QN: max iterations reached
[W] [15:38:00.910200] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:38:05.645536] QWL-QN: max iterations reached
[W] [15:38:

  2%|▏         | 1/48 [00:27<21:15, 27.14s/it]

[W] [15:38:08.817514] QWL-QN: max iterations reached
[W] [15:38:08.817688] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.

Accuracy: 0.8676
Micro-Averaged F1: 0.8646
Macro-Averaged F1: 0.8639

Augmentations: {'token_length': True, 'patient_demographics': True, 'mds_updrs': True, 'moca': False}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:38:13.500099] QWL-QN: max iterations reached
[W] [15:38:13.500201] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:38:16.814978] QWL-QN: max iterations reached
[W] [15:38:16.815122] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input

  4%|▍         | 2/48 [00:44<16:28, 21.48s/it]

[W] [15:38:26.337222] QWL-QN: max iterations reached
[W] [15:38:26.337330] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.

Accuracy: 0.8451
Micro-Averaged F1: 0.8433
Macro-Averaged F1: 0.8426

Augmentations: {'token_length': True, 'patient_demographics': True, 'mds_updrs': False, 'moca': True}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:38:29.598721] QWL-QN: max iterations reached
[W] [15:38:29.598923] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:38:32.647579] QWL-QN: max iterations reached
[W] [15:38:32.647679] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input

  6%|▋         | 3/48 [01:00<14:13, 18.97s/it]

[W] [15:38:42.311463] QWL-QN: max iterations reached
[W] [15:38:42.311622] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.

Accuracy: 0.8986
Micro-Averaged F1: 0.9006
Macro-Averaged F1: 0.9006

Augmentations: {'token_length': True, 'patient_demographics': True, 'mds_updrs': False, 'moca': False}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:38:45.466053] QWL-QN: max iterations reached
[W] [15:38:45.466236] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:38:48.663168] QWL-QN: max iterations reached
[W] [15:38:48.663318] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the inpu

  8%|▊         | 4/48 [01:16<13:01, 17.77s/it]

[W] [15:38:58.281621] QWL-QN: max iterations reached
[W] [15:38:58.281856] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.

Accuracy: 0.8141
Micro-Averaged F1: 0.8103
Macro-Averaged F1: 0.8098

Augmentations: {'token_length': True, 'patient_demographics': False, 'mds_updrs': True, 'moca': True}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:39:01.472344] QWL-QN: max iterations reached
[W] [15:39:01.472508] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:39:04.711719] QWL-QN: max iterations reached
[W] [15:39:04.711929] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input

 10%|█         | 5/48 [01:32<12:15, 17.11s/it]


Accuracy: 0.8085
Micro-Averaged F1: 0.8023
Macro-Averaged F1: 0.8025

Augmentations: {'token_length': True, 'patient_demographics': False, 'mds_updrs': True, 'moca': False}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...


 12%|█▎        | 6/48 [01:48<11:40, 16.68s/it]


Accuracy: 0.6761
Micro-Averaged F1: 0.6761
Macro-Averaged F1: 0.6750

Augmentations: {'token_length': True, 'patient_demographics': False, 'mds_updrs': False, 'moca': True}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:39:33.450809] QWL-QN: max iterations reached
[W] [15:39:33.450919] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:39:36.628754] QWL-QN: max iterations reached
[W] [15:39:36.628889] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:39:39.861215] QWL-QN: max iterations reached
[W] [15:39:39.861398] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the inpu

 15%|█▍        | 7/48 [02:04<11:16, 16.51s/it]


Accuracy: 0.8113
Micro-Averaged F1: 0.8035
Macro-Averaged F1: 0.8035

Augmentations: {'token_length': True, 'patient_demographics': False, 'mds_updrs': False, 'moca': False}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...


 17%|█▋        | 8/48 [02:20<10:50, 16.26s/it]


Accuracy: 0.6535
Micro-Averaged F1: 0.6455
Macro-Averaged F1: 0.6455

Augmentations: {'token_length': False, 'patient_demographics': True, 'mds_updrs': True, 'moca': True}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:40:05.371757] QWL-QN: max iterations reached
[W] [15:40:05.371899] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:40:08.691330] QWL-QN: max iterations reached
[W] [15:40:08.691520] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:40:11.764045] QWL-QN: max iterations reached
[W] [15:40:11.764140] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input

 19%|█▉        | 9/48 [02:36<10:35, 16.29s/it]

[W] [15:40:18.284225] QWL-QN: max iterations reached
[W] [15:40:18.284353] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.

Accuracy: 0.8676
Micro-Averaged F1: 0.8653
Macro-Averaged F1: 0.8652

Augmentations: {'token_length': False, 'patient_demographics': True, 'mds_updrs': True, 'moca': False}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:40:21.281990] QWL-QN: max iterations reached
[W] [15:40:21.282148] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:40:24.433712] QWL-QN: max iterations reached
[W] [15:40:24.433859] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the inpu

 21%|██        | 10/48 [02:52<10:12, 16.12s/it]

[W] [15:40:34.018333] QWL-QN: max iterations reached
[W] [15:40:34.018598] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.

Accuracy: 0.8479
Micro-Averaged F1: 0.8457
Macro-Averaged F1: 0.8454

Augmentations: {'token_length': False, 'patient_demographics': True, 'mds_updrs': False, 'moca': True}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:40:37.137885] QWL-QN: max iterations reached
[W] [15:40:37.138094] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:40:40.335653] QWL-QN: max iterations reached
[W] [15:40:40.335956] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the inpu

 23%|██▎       | 11/48 [03:08<09:53, 16.04s/it]

[W] [15:40:49.877609] QWL-QN: max iterations reached
[W] [15:40:49.877908] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.

Accuracy: 0.8873
Micro-Averaged F1: 0.8864
Macro-Averaged F1: 0.8864

Augmentations: {'token_length': False, 'patient_demographics': True, 'mds_updrs': False, 'moca': False}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:40:53.075408] QWL-QN: max iterations reached
[W] [15:40:53.075662] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:40:56.255721] QWL-QN: max iterations reached
[W] [15:40:56.255873] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the inp

 25%|██▌       | 12/48 [03:24<09:36, 16.00s/it]


Augmentations: {'token_length': False, 'patient_demographics': False, 'mds_updrs': True, 'moca': True}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:41:18.451896] QWL-QN: max iterations reached
[W] [15:41:18.452075] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.


 27%|██▋       | 13/48 [03:40<09:18, 15.97s/it]

[W] [15:41:21.669670] QWL-QN: max iterations reached
[W] [15:41:21.669766] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.

Accuracy: 0.8197
Micro-Averaged F1: 0.8118
Macro-Averaged F1: 0.8122

Augmentations: {'token_length': False, 'patient_demographics': False, 'mds_updrs': True, 'moca': False}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...


 29%|██▉       | 14/48 [03:55<08:58, 15.84s/it]


Accuracy: 0.7042
Micro-Averaged F1: 0.6957
Macro-Averaged F1: 0.6963

Augmentations: {'token_length': False, 'patient_demographics': False, 'mds_updrs': False, 'moca': True}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...


 31%|███▏      | 15/48 [04:11<08:46, 15.96s/it]


Accuracy: 0.8254
Micro-Averaged F1: 0.8229
Macro-Averaged F1: 0.8229

Augmentations: {'token_length': False, 'patient_demographics': False, 'mds_updrs': False, 'moca': False}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...


 33%|███▎      | 16/48 [04:27<08:27, 15.85s/it]


Accuracy: 0.6676
Micro-Averaged F1: 0.6609
Macro-Averaged F1: 0.6595

Augmentations: {'token_length': True, 'patient_demographics': True, 'mds_updrs': True, 'moca': True}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:42:12.584221] QWL-QN: max iterations reached
[W] [15:42:12.584442] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:42:15.728128] QWL-QN: max iterations reached
[W] [15:42:15.728258] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:42:20.468780] QWL-QN: max iterations reached
[W] [15:42:20.468907] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input 

 35%|███▌      | 17/48 [04:46<08:43, 16.90s/it]

[W] [15:42:28.422385] QWL-QN: max iterations reached
[W] [15:42:28.422512] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.

Accuracy: 0.8479
Micro-Averaged F1: 0.8402
Macro-Averaged F1: 0.8396

Augmentations: {'token_length': True, 'patient_demographics': True, 'mds_updrs': True, 'moca': False}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:42:31.652541] QWL-QN: max iterations reached
[W] [15:42:31.652707] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:42:36.459881] QWL-QN: max iterations reached
[W] [15:42:36.459999] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input

 38%|███▊      | 18/48 [05:06<08:55, 17.84s/it]


Accuracy: 0.8310
Micro-Averaged F1: 0.8295
Macro-Averaged F1: 0.8295

Augmentations: {'token_length': True, 'patient_demographics': True, 'mds_updrs': False, 'moca': True}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:42:50.871397] QWL-QN: max iterations reached
[W] [15:42:50.871516] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:42:55.500874] QWL-QN: max iterations reached
[W] [15:42:55.501004] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:42:58.753363] QWL-QN: max iterations reached
[W] [15:42:58.753711] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input

 40%|███▉      | 19/48 [05:25<08:44, 18.08s/it]


Accuracy: 0.8789
Micro-Averaged F1: 0.8789
Macro-Averaged F1: 0.8788

Augmentations: {'token_length': True, 'patient_demographics': True, 'mds_updrs': False, 'moca': False}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:43:11.152903] QWL-QN: max iterations reached
[W] [15:43:11.153031] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:43:14.556272] QWL-QN: max iterations reached
[W] [15:43:14.556625] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:43:17.705191] QWL-QN: max iterations reached
[W] [15:43:17.705296] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the inpu

 42%|████▏     | 20/48 [05:44<08:33, 18.33s/it]


Accuracy: 0.8197
Micro-Averaged F1: 0.8118
Macro-Averaged F1: 0.8113

Augmentations: {'token_length': True, 'patient_demographics': False, 'mds_updrs': True, 'moca': True}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:43:30.365915] QWL-QN: max iterations reached
[W] [15:43:30.366184] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:43:34.978135] QWL-QN: max iterations reached
[W] [15:43:34.978253] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:43:43.189550] QWL-QN: max iterations reached
[W] [15:43:43.189773] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input

 44%|████▍     | 21/48 [06:07<08:51, 19.69s/it]


Accuracy: 0.7831
Micro-Averaged F1: 0.7806
Macro-Averaged F1: 0.7809

Augmentations: {'token_length': True, 'patient_demographics': False, 'mds_updrs': True, 'moca': False}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...


 46%|████▌     | 22/48 [06:27<08:39, 19.99s/it]


Accuracy: 0.7042
Micro-Averaged F1: 0.6974
Macro-Averaged F1: 0.6974

Augmentations: {'token_length': True, 'patient_demographics': False, 'mds_updrs': False, 'moca': True}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:44:27.571995] QWL-QN: max iterations reached
[W] [15:44:27.572238] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.


 48%|████▊     | 23/48 [06:46<08:07, 19.52s/it]


Accuracy: 0.8141
Micro-Averaged F1: 0.8059
Macro-Averaged F1: 0.8063

Augmentations: {'token_length': True, 'patient_demographics': False, 'mds_updrs': False, 'moca': False}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...


 50%|█████     | 24/48 [07:06<07:51, 19.63s/it]


Accuracy: 0.6648
Micro-Averaged F1: 0.6551
Macro-Averaged F1: 0.6553

Augmentations: {'token_length': False, 'patient_demographics': True, 'mds_updrs': True, 'moca': True}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:44:51.492333] QWL-QN: max iterations reached
[W] [15:44:51.492627] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:44:54.636261] QWL-QN: max iterations reached
[W] [15:44:54.636525] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:44:59.388652] QWL-QN: max iterations reached
[W] [15:44:59.388884] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input

 52%|█████▏    | 25/48 [07:26<07:35, 19.79s/it]


Accuracy: 0.8592
Micro-Averaged F1: 0.8563
Macro-Averaged F1: 0.8559

Augmentations: {'token_length': False, 'patient_demographics': True, 'mds_updrs': True, 'moca': False}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:45:10.615031] QWL-QN: max iterations reached
[W] [15:45:10.615132] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:45:15.270381] QWL-QN: max iterations reached
[W] [15:45:15.270622] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:45:20.044806] QWL-QN: max iterations reached
[W] [15:45:20.044976] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the inpu

 54%|█████▍    | 26/48 [07:44<07:06, 19.40s/it]

[W] [15:45:26.503170] QWL-QN: max iterations reached
[W] [15:45:26.503488] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.

Accuracy: 0.8563
Micro-Averaged F1: 0.8539
Macro-Averaged F1: 0.8535

Augmentations: {'token_length': False, 'patient_demographics': True, 'mds_updrs': False, 'moca': True}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:45:29.689592] QWL-QN: max iterations reached
[W] [15:45:29.689814] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:45:32.884665] QWL-QN: max iterations reached
[W] [15:45:32.884892] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the inpu

 56%|█████▋    | 27/48 [08:00<06:25, 18.34s/it]


Accuracy: 0.8789
Micro-Averaged F1: 0.8761
Macro-Averaged F1: 0.8753

Augmentations: {'token_length': False, 'patient_demographics': True, 'mds_updrs': False, 'moca': False}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:45:45.635417] QWL-QN: max iterations reached
[W] [15:45:45.635571] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:45:48.802620] QWL-QN: max iterations reached
[W] [15:45:48.802834] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:45:51.926686] QWL-QN: max iterations reached
[W] [15:45:51.926851] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the inp

 58%|█████▊    | 28/48 [08:16<05:52, 17.63s/it]

[W] [15:45:58.315524] QWL-QN: max iterations reached
[W] [15:45:58.315671] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.

Accuracy: 0.8507
Micro-Averaged F1: 0.8473
Macro-Averaged F1: 0.8473

Augmentations: {'token_length': False, 'patient_demographics': False, 'mds_updrs': True, 'moca': True}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...


 60%|██████    | 29/48 [08:32<05:25, 17.12s/it]


Accuracy: 0.8141
Micro-Averaged F1: 0.8103
Macro-Averaged F1: 0.8104

Augmentations: {'token_length': False, 'patient_demographics': False, 'mds_updrs': True, 'moca': False}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...


 62%|██████▎   | 30/48 [08:48<05:00, 16.69s/it]


Accuracy: 0.6930
Micro-Averaged F1: 0.6841
Macro-Averaged F1: 0.6841

Augmentations: {'token_length': False, 'patient_demographics': False, 'mds_updrs': False, 'moca': True}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:46:39.775999] QWL-QN: max iterations reached
[W] [15:46:39.776161] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:46:42.988561] QWL-QN: max iterations reached
[W] [15:46:42.988759] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.


 65%|██████▍   | 31/48 [09:04<04:41, 16.55s/it]

[W] [15:46:46.162124] QWL-QN: max iterations reached
[W] [15:46:46.162252] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.

Accuracy: 0.8169
Micro-Averaged F1: 0.8071
Macro-Averaged F1: 0.8056

Augmentations: {'token_length': False, 'patient_demographics': False, 'mds_updrs': False, 'moca': False}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...


 67%|██████▋   | 32/48 [09:19<04:19, 16.21s/it]


Accuracy: 0.6930
Micro-Averaged F1: 0.6746
Macro-Averaged F1: 0.6764

Augmentations: {'token_length': True, 'patient_demographics': True, 'mds_updrs': True, 'moca': True}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:47:05.114748] QWL-QN: max iterations reached
[W] [15:47:05.114925] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:47:08.402475] QWL-QN: max iterations reached
[W] [15:47:08.402609] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:47:11.580561] QWL-QN: max iterations reached
[W] [15:47:11.580736] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input 

 69%|██████▉   | 33/48 [09:36<04:03, 16.25s/it]

[W] [15:47:17.947631] QWL-QN: max iterations reached
[W] [15:47:17.947761] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.

Accuracy: 0.8732
Micro-Averaged F1: 0.8696
Macro-Averaged F1: 0.8698

Augmentations: {'token_length': True, 'patient_demographics': True, 'mds_updrs': True, 'moca': False}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:47:21.062195] QWL-QN: max iterations reached
[W] [15:47:21.062300] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:47:24.244531] QWL-QN: max iterations reached
[W] [15:47:24.244652] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input

 71%|███████   | 34/48 [09:57<04:09, 17.79s/it]


Accuracy: 0.8225
Micro-Averaged F1: 0.8195
Macro-Averaged F1: 0.8194

Augmentations: {'token_length': True, 'patient_demographics': True, 'mds_updrs': False, 'moca': True}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:47:41.846123] QWL-QN: max iterations reached
[W] [15:47:41.846283] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:47:44.949045] QWL-QN: max iterations reached
[W] [15:47:44.949215] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:47:48.060911] QWL-QN: max iterations reached
[W] [15:47:48.061211] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input

 73%|███████▎  | 35/48 [10:16<03:56, 18.17s/it]


Accuracy: 0.8761
Micro-Averaged F1: 0.8728
Macro-Averaged F1: 0.8725

Augmentations: {'token_length': True, 'patient_demographics': True, 'mds_updrs': False, 'moca': False}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:48:02.265726] QWL-QN: max iterations reached
[W] [15:48:02.265851] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:48:07.029583] QWL-QN: max iterations reached
[W] [15:48:07.029782] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:48:10.224687] QWL-QN: max iterations reached
[W] [15:48:10.224976] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the inpu

 75%|███████▌  | 36/48 [10:39<03:53, 19.42s/it]


Accuracy: 0.8028
Micro-Averaged F1: 0.7953
Macro-Averaged F1: 0.7953

Augmentations: {'token_length': True, 'patient_demographics': False, 'mds_updrs': True, 'moca': True}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:48:24.496648] QWL-QN: max iterations reached
[W] [15:48:24.496796] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:48:27.595343] QWL-QN: max iterations reached
[W] [15:48:27.595511] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:48:30.772111] QWL-QN: max iterations reached
[W] [15:48:30.772267] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input

 77%|███████▋  | 37/48 [10:57<03:29, 19.01s/it]


Accuracy: 0.8254
Micro-Averaged F1: 0.8155
Macro-Averaged F1: 0.8147

Augmentations: {'token_length': True, 'patient_demographics': False, 'mds_updrs': True, 'moca': False}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...


 79%|███████▉  | 38/48 [11:15<03:09, 18.94s/it]


Accuracy: 0.6761
Micro-Averaged F1: 0.6647
Macro-Averaged F1: 0.6643

Augmentations: {'token_length': True, 'patient_demographics': False, 'mds_updrs': False, 'moca': True}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:49:04.352592] QWL-QN: max iterations reached
[W] [15:49:04.352896] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:49:07.486893] QWL-QN: max iterations reached
[W] [15:49:07.487093] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:49:10.729547] QWL-QN: max iterations reached
[W] [15:49:10.729803] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the inpu

 81%|████████▏ | 39/48 [11:32<02:43, 18.20s/it]

[W] [15:49:13.975942] QWL-QN: max iterations reached
[W] [15:49:13.976125] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.

Accuracy: 0.7972
Micro-Averaged F1: 0.7870
Macro-Averaged F1: 0.7865

Augmentations: {'token_length': True, 'patient_demographics': False, 'mds_updrs': False, 'moca': False}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...


 83%|████████▎ | 40/48 [11:47<02:19, 17.39s/it]


Accuracy: 0.6479
Micro-Averaged F1: 0.6459
Macro-Averaged F1: 0.6462

Augmentations: {'token_length': False, 'patient_demographics': True, 'mds_updrs': True, 'moca': True}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:49:32.912176] QWL-QN: max iterations reached
[W] [15:49:32.912312] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:49:36.136726] QWL-QN: max iterations reached
[W] [15:49:36.136881] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:49:39.419144] QWL-QN: max iterations reached
[W] [15:49:39.419275] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input

 85%|████████▌ | 41/48 [12:04<02:00, 17.18s/it]


Accuracy: 0.8845
Micro-Averaged F1: 0.8845
Macro-Averaged F1: 0.8843

Augmentations: {'token_length': False, 'patient_demographics': True, 'mds_updrs': True, 'moca': False}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:49:49.017277] QWL-QN: max iterations reached
[W] [15:49:49.017409] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:49:52.205111] QWL-QN: max iterations reached
[W] [15:49:52.205277] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:49:55.400854] QWL-QN: max iterations reached
[W] [15:49:55.401099] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the inpu

 88%|████████▊ | 42/48 [12:20<01:40, 16.70s/it]

[W] [15:50:01.763967] QWL-QN: max iterations reached
[W] [15:50:01.764225] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.

Accuracy: 0.8254
Micro-Averaged F1: 0.8218
Macro-Averaged F1: 0.8203

Augmentations: {'token_length': False, 'patient_demographics': True, 'mds_updrs': False, 'moca': True}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:50:04.974851] QWL-QN: max iterations reached
[W] [15:50:04.975002] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:50:08.188327] QWL-QN: max iterations reached
[W] [15:50:08.188595] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the inpu

 90%|████████▉ | 43/48 [12:36<01:22, 16.51s/it]

[W] [15:50:17.828884] QWL-QN: max iterations reached
[W] [15:50:17.829049] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.

Accuracy: 0.8986
Micro-Averaged F1: 0.8989
Macro-Averaged F1: 0.8982

Augmentations: {'token_length': False, 'patient_demographics': True, 'mds_updrs': False, 'moca': False}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...
[W] [15:50:21.000927] QWL-QN: max iterations reached
[W] [15:50:21.001082] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
[W] [15:50:24.204683] QWL-QN: max iterations reached
[W] [15:50:24.205074] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the inp

 92%|█████████▏| 44/48 [12:52<01:05, 16.36s/it]

[W] [15:50:33.863508] QWL-QN: max iterations reached
[W] [15:50:33.863631] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.

Accuracy: 0.8254
Micro-Averaged F1: 0.8218
Macro-Averaged F1: 0.8215

Augmentations: {'token_length': False, 'patient_demographics': False, 'mds_updrs': True, 'moca': True}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...


 94%|█████████▍| 45/48 [13:08<00:48, 16.24s/it]


Accuracy: 0.8169
Micro-Averaged F1: 0.8159
Macro-Averaged F1: 0.8161

Augmentations: {'token_length': False, 'patient_demographics': False, 'mds_updrs': True, 'moca': False}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...


 96%|█████████▌| 46/48 [13:23<00:32, 16.08s/it]


Accuracy: 0.7127
Micro-Averaged F1: 0.7052
Macro-Averaged F1: 0.7054

Augmentations: {'token_length': False, 'patient_demographics': False, 'mds_updrs': False, 'moca': True}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...


 98%|█████████▊| 47/48 [13:40<00:16, 16.12s/it]


Accuracy: 0.8225
Micro-Averaged F1: 0.8195
Macro-Averaged F1: 0.8191

Augmentations: {'token_length': False, 'patient_demographics': False, 'mds_updrs': False, 'moca': False}
Parameters updated...
Randomly sampling 5 sets with size -1 from train set...


100%|██████████| 48/48 [13:54<00:00, 17.38s/it]


Accuracy: 0.6648
Micro-Averaged F1: 0.6448
Macro-Averaged F1: 0.6444





In [7]:
eval_df = pd.DataFrame([eval_obj.dict() for eval_obj in eval_list])
eval_df

Unnamed: 0,token_length,patient_demographics,mds_updrs,moca,acc,micro_f1,macro_f1
0,True,True,True,True,0.867606,0.864553,0.863915
1,True,True,True,False,0.84507,0.843304,0.842623
2,True,True,False,True,0.898592,0.900552,0.900593
3,True,True,False,False,0.814085,0.810344,0.809766
4,True,True,True,True,0.808451,0.802325,0.802504
5,True,True,True,False,0.676056,0.676056,0.674984
6,True,True,False,True,0.811268,0.803519,0.803505
7,True,True,False,False,0.653521,0.645533,0.645528
8,False,False,True,True,0.867606,0.865329,0.865225
9,False,False,True,False,0.847887,0.845714,0.845444


In [8]:
eval_df.to_csv("./ablation_study_results.csv")

In [15]:
len(list(product(level_list, repeat=len(augment_cat_list)))*3)

48