# LOS Tabular

## Setup

In [9]:
# First install package from terminal:
!pip install -U pip
!pip install -U setuptools wheel
!pip install autogluon  # autogluon==0.4.1

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
[0mLooking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
[0mLooking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
[0m

In [10]:
# import all required modules
import pandas as pd
import numpy as np
import os
import re
import random
import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option('display.max_rows', None)  ###
pd.set_option('display.max_columns', None)  ###
pd.set_option('display.width', None)  ###
pd.set_option('display.max_colwidth', None)  ###

import warnings
warnings.filterwarnings('ignore')

from sklearn.metrics import roc_auc_score
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import roc_curve
from sklearn.metrics import classification_report
from sklearn.metrics import auc
from sklearn.metrics import cohen_kappa_score
from sklearn.inspection import permutation_importance
from autogluon.tabular import TabularPredictor

In [11]:
try:
  from google.colab import drive
  IN_COLAB=True
except:
  IN_COLAB=False

if IN_COLAB:
  print("We're running Colab")

if IN_COLAB:  
  # Mount the Google Drive at mount
  mount='/content/gdrive'
  print("Colab: mounting Google drive on ", mount)
  # connect your colab with the drive
  drive.mount(mount)

 # Switch to the directory on the Google Drive that you want to use
  import os
  path_to_repo = mount + "/MyDrive/MIMIC-III Text Mining/LOS_FINAL/"

else:
  # Setup Repository
  with open("repo_info.txt", "r") as repo_info:
      path_to_repo = repo_info.readline()

  
print(path_to_repo)

path_to_data = f"{path_to_repo}data/"
path_to_raw = f"{path_to_data}raw/"
path_to_processed = f"{path_to_data}processed/"
path_to_lda = f"{path_to_data}lda/"
path_to_icd = f"{path_to_data}icd_codes/"
path_to_models = f"{path_to_repo}models/"
path_to_results = f"{path_to_repo}results/"

We're running Colab
Colab: mounting Google drive on  /content/gdrive
Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
/content/gdrive/MyDrive/MIMIC-III Text Mining/LOS_FINAL/


## Import the dataset

In [12]:
def load_datasets(method):
    """
    Function to load train, test and validation set based on the chosen method
    method: string for the processing method we want to load
    """
    global path_to_processed
    # load it back
    train = pd.read_feather('{}{}train_{}{}{}'.format(path_to_processed, method, seed_tag, preproc_tag, lemma_tag))
    test = pd.read_feather('{}{}test_{}{}{}'.format(path_to_processed, method, seed_tag, preproc_tag, lemma_tag))
    return train, test

In [13]:
# PARAMETERS

session_seed = 42 # set seed for our session
seed_tag = f'_{session_seed}'

random.seed(session_seed)

## Train the Tabular Model (Structured DataSet)

In [14]:
# Model Parameters
save_path = f'{path_to_models}tabular'
label = 'los_cat'
metric = 'roc_auc'

In [15]:
# compute other metrics
def perf_evaluator(y_test, y_pred, y_pred_proba):
    """ Function to display the main classification performance metrics """
    kappa = cohen_kappa_score(y_test, y_pred)
    precision, recall, prc_th = precision_recall_curve(y_test, y_pred_proba)
    prc_auc = auc(recall, precision)
    return kappa, prc_auc

In [16]:
# Iterate over our main methods of vectorization

vect_dict = {'tabular': (False, False),
             'stemming': (False, False),
             'spacy': (True, True)}

for key, value in vect_dict.items():
    print(key)
    # PARAMETERS

    lemmatize = value[0] # set to false if we want to do stemming
    lemma_tag = str(np.where(lemmatize, "_lemma",""))
    spacy = value[1]
    if spacy: lemma_tag = str(np.where(lemmatize, "_lemma_spacy",""))

    preprocessing = True # set to true if we want to clean and perform some preprocessing
    preproc_heavier = True # set to True if we want a heavier preprocessing
    preproc_tag_2 = np.where(preproc_heavier, '_heavier', '')
    preproc_tag = np.where(preprocessing, f'_preproc{preproc_tag_2}', f'{preproc_tag_2}')

    # Iterate over all the methods
    if key != 'tabular':
        method_list = ['frequency', 'onehot','tf_idf']
    else:
        method_list = ['frequency']

    for method in method_list:
        print(method)
        # Load the LDA datasets
        train, test = load_datasets(f'lda_{method}')
        if key == 'tabular':
            # If we are just running the tabular dataset, drop the LDA columns
            lda_topics = [col for col in train.columns if re.match(r"F[0-9]*", col)]
            train.drop(columns = lda_topics, inplace = True)
            test.drop(columns = lda_topics, inplace = True)
            assert len(lda_topics) == 300

        # run the tabular predictor ensemble of models
        predictor = TabularPredictor(label=label, eval_metric=metric,path=save_path)
        predictor.fit(train)

        # evaluate performance on the test set
        per_tab = predictor.evaluate(test)
        print(f"\nTest set performance:\n{per_tab}")
        # save the class and probability predictions
        y_pred = predictor.predict(test)
        y_pred_proba = predictor.predict_proba(test).iloc[:,1]
        perf = perf_evaluator(test['los_cat'], y_pred, y_pred_proba)
        perf_dict = {"Cohen's Kappa": perf[0], "PRC AUC": perf[1]}
        print(f"\nPerformance metrics:\n{perf_dict}")

        # compare the different models
        leaderboard = predictor.leaderboard(test)
        print(f"\nModel Leaderboard:\n{leaderboard}")

        # compute feature importance
        importance = predictor.feature_importance(test)
        # feature importance in percentage
        importance['percent'] = 100*importance['importance']/importance['importance'].max()
        print(f"\nFeature Importance:\n{importance}")
        perf_dict.update(per_tab)
        # save performances
        df_perf = pd.DataFrame.from_dict(perf_dict, orient='index', columns=['performances'])
        df_perf.to_excel(path_to_results+f'{key}/df_perf_{method}.xlsx')
        # save leaderboard
        leaderboard.to_excel(path_to_results+f'{key}/leaderboard_{method}.xlsx')
        # save importance
        importance.to_excel(path_to_results+f'{key}/importance_{method}.xlsx')



tabular
frequency


Beginning AutoGluon training ...
AutoGluon will save models to "/content/gdrive/MyDrive/MIMIC-III Text Mining/LOS_FINAL/models/tabular/"
AutoGluon Version:  0.6.0
Python Version:     3.7.15
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Fri Aug 26 08:44:51 UTC 2022
Train Data Rows:    24611
Train Data Columns: 46
Label Column: los_cat
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
	2 unique label values:  [False, True]
	If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping:  class 1 = True, class 0 = False
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    52812.1 MB
	Train Data (Original)  Memory Usage: 28.97 MB (0.1% o


Test set performance:
{'roc_auc': 0.9441635626643294, 'accuracy': 0.9471802372826263, 'balanced_accuracy': 0.7051531003505697, 'mcc': 0.5348751697099995, 'f1': 0.5376955903271693, 'precision': 0.7411764705882353, 'recall': 0.421875}

Performance metrics:
{"Cohen's Kappa": 0.5119144639388208, 'PRC AUC': 0.6552421524932525}


Computing feature importance via permutation shuffling for 46 features using 5000 rows with 5 shuffle sets...


                  model  score_test  score_val  pred_time_test  pred_time_val   fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0   WeightedEnsemble_L2    0.944164   0.947699        1.041939       0.656059  73.975200                 0.007094                0.000753           0.783005            2       True         14
1            LightGBMXT    0.942363   0.941051        0.058727       0.016287   1.245087                 0.058727                0.016287           1.245087            1       True          3
2              CatBoost    0.942151   0.942311        0.027428       0.015730  10.103743                 0.027428                0.015730          10.103743            1       True          7
3              LightGBM    0.939862   0.943836        0.029292       0.057942   1.338549                 0.029292                0.057942           1.338549            1       True          4
4         LightGBMLarge    0.939767   0.

	238.18s	= Expected runtime (47.64s per shuffle set)
	89.1s	= Actual runtime (Completed 5 of 5 shuffle sets)



Feature Importance:
                    importance    stddev   p_value  n  p99_high   p99_low  \
urea_n_max            0.040859  0.005030  0.000027  5  0.051215  0.030503   
platelets_min         0.037140  0.004077  0.000017  5  0.045534  0.028746   
platelets_max         0.025476  0.003742  0.000054  5  0.033180  0.017772   
urea_n_min            0.015645  0.001430  0.000008  5  0.018590  0.012700   
platelets_mean        0.008963  0.001691  0.000145  5  0.012446  0.005481   
temp_max              0.003965  0.001156  0.000776  5  0.006344  0.001586   
dest_discharge        0.003747  0.001076  0.000734  5  0.005962  0.001531   
calcium_min           0.003073  0.000855  0.000651  5  0.004834  0.001312   
icd_chapter           0.002958  0.001045  0.001592  5  0.005109  0.000807   
type_stay             0.002354  0.000970  0.002797  5  0.004351  0.000357   
sapsii                0.001872  0.000872  0.004326  5  0.003668  0.000076   
magnesium_max         0.001794  0.002063  0.061858  5  

	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Beginning AutoGluon training ...
AutoGluon will save models to "/content/gdrive/MyDrive/MIMIC-III Text Mining/LOS_FINAL/models/tabular/"
AutoGluon Version:  0.6.0
Python Version:     3.7.15
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Fri Aug 26 08:44:51 UTC 2022
Train Data Rows:    24611
Train Data Columns: 346
Label Column: los_cat
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
	2 unique label values:  [False, True]
	If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping:  class 1 = True, class 


Test set performance:
{'roc_auc': 0.9614525948416177, 'accuracy': 0.9556313993174061, 'balanced_accuracy': 0.75701824057844, 'mcc': 0.6251368235402176, 'f1': 0.6325706594885598, 'precision': 0.7966101694915254, 'recall': 0.5245535714285714}

Performance metrics:
{"Cohen's Kappa": 0.6100234505644875, 'PRC AUC': 0.741333430988384}


Computing feature importance via permutation shuffling for 346 features using 5000 rows with 5 shuffle sets...


                  model  score_test  score_val  pred_time_test  pred_time_val    fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0   WeightedEnsemble_L2    0.961453   0.967880        1.555887       0.837194  132.059252                 0.006361                0.001035           0.795139            2       True         14
1            LightGBMXT    0.961382   0.960593        0.122420       0.034318    7.969986                 0.122420                0.034318           7.969986            1       True          3
2               XGBoost    0.959168   0.964295        0.242553       0.077601   11.218064                 0.242553                0.077601          11.218064            1       True         11
3              LightGBM    0.958845   0.965110        0.057483       0.025610    7.573507                 0.057483                0.025610           7.573507            1       True          4
4              CatBoost    0.958015

	2729.14s	= Expected runtime (545.83s per shuffle set)
	1552.23s	= Actual runtime (Completed 5 of 5 shuffle sets)



Feature Importance:
                      importance    stddev   p_value  n      p99_high  \
urea_n_max          1.551558e-02  0.001623  0.000014  5  1.885725e-02   
platelets_max       1.069421e-02  0.001842  0.000102  5  1.448671e-02   
urea_n_min          7.634106e-03  0.000427  0.000001  5  8.512764e-03   
platelets_min       7.552602e-03  0.001921  0.000462  5  1.150842e-02   
F274                3.007885e-03  0.001187  0.002394  5  5.452509e-03   
F87                 2.323465e-03  0.000579  0.000428  5  3.516305e-03   
F195                1.560149e-03  0.000238  0.000063  5  2.050978e-03   
temp_max            1.532089e-03  0.000547  0.001663  5  2.659280e-03   
F32                 1.498479e-03  0.000368  0.000405  5  2.256923e-03   
F242                7.356131e-04  0.000383  0.006367  5  1.524858e-03   
type_stay           7.078836e-04  0.000504  0.017403  5  1.745432e-03   
platelets_mean      6.936652e-04  0.000420  0.010444  5  1.557499e-03   
temp_min            6.420014e-

	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Beginning AutoGluon training ...
AutoGluon will save models to "/content/gdrive/MyDrive/MIMIC-III Text Mining/LOS_FINAL/models/tabular/"
AutoGluon Version:  0.6.0
Python Version:     3.7.15
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Fri Aug 26 08:44:51 UTC 2022
Train Data Rows:    24611
Train Data Columns: 346
Label Column: los_cat
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
	2 unique label values:  [False, True]
	If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping:  class 1 = True, class 


Test set performance:
{'roc_auc': 0.9574801239514211, 'accuracy': 0.9525434747277751, 'balanced_accuracy': 0.7409550284837861, 'mcc': 0.5947600104023104, 'f1': 0.6021798365122616, 'precision': 0.7727272727272727, 'recall': 0.49330357142857145}

Performance metrics:
{"Cohen's Kappa": 0.5782496245345707, 'PRC AUC': 0.7100290688264739}


Computing feature importance via permutation shuffling for 346 features using 5000 rows with 5 shuffle sets...


                  model  score_test  score_val  pred_time_test  pred_time_val    fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0   WeightedEnsemble_L2    0.957480   0.962171        2.189123       1.246771  171.293816                 0.006998                0.000728           0.778202            2       True         14
1            LightGBMXT    0.956786   0.955055        0.113779       0.028713    6.659258                 0.113779                0.028713           6.659258            1       True          3
2              LightGBM    0.955568   0.958349        0.053634       0.025345    7.282212                 0.053634                0.025345           7.282212            1       True          4
3              CatBoost    0.954848   0.959577        0.048685       0.034009   55.866892                 0.048685                0.034009          55.866892            1       True          7
4               XGBoost    0.952576

	3558.71s	= Expected runtime (711.74s per shuffle set)
	1904.28s	= Actual runtime (Completed 5 of 5 shuffle sets)



Feature Importance:
                      importance    stddev   p_value  n      p99_high  \
urea_n_max          1.564855e-02  0.001403  0.000008  5  1.853806e-02   
platelets_max       9.237703e-03  0.002218  0.000370  5  1.380532e-02   
platelets_min       8.693818e-03  0.001550  0.000116  5  1.188509e-02   
urea_n_min          7.748114e-03  0.000780  0.000012  5  9.354886e-03   
F99                 4.960271e-03  0.001409  0.000703  5  7.860947e-03   
F43                 4.285802e-03  0.000713  0.000089  5  5.754090e-03   
F27                 1.871785e-03  0.000571  0.000923  5  3.047750e-03   
type_stay           1.659208e-03  0.000915  0.007699  5  3.542585e-03   
F255                1.428691e-03  0.000547  0.002140  5  2.554631e-03   
temp_max            1.419779e-03  0.000720  0.005808  5  2.902606e-03   
sofa                1.220288e-03  0.000340  0.000652  5  1.919911e-03   
temp_min            1.033541e-03  0.000459  0.003646  5  1.978038e-03   
platelets_mean      9.261352e-

	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Beginning AutoGluon training ...
AutoGluon will save models to "/content/gdrive/MyDrive/MIMIC-III Text Mining/LOS_FINAL/models/tabular/"
AutoGluon Version:  0.6.0
Python Version:     3.7.15
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Fri Aug 26 08:44:51 UTC 2022
Train Data Rows:    24611
Train Data Columns: 346
Label Column: los_cat
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
	2 unique label values:  [False, True]
	If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping:  class 1 = True, class 


Test set performance:
{'roc_auc': 0.9509077250532114, 'accuracy': 0.9470177149358037, 'balanced_accuracy': 0.7040370289219983, 'mcc': 0.5329663154297363, 'f1': 0.5356125356125355, 'precision': 0.7401574803149606, 'recall': 0.41964285714285715}

Performance metrics:
{"Cohen's Kappa": 0.5097836611661015, 'PRC AUC': 0.672867661949538}


Computing feature importance via permutation shuffling for 346 features using 5000 rows with 5 shuffle sets...


                  model  score_test  score_val  pred_time_test  pred_time_val    fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0   WeightedEnsemble_L2    0.950908   0.957590        1.990423       1.047642  149.293048                 0.008681                0.000823           0.791242            2       True         14
1        NeuralNetTorch    0.946431   0.950866        0.815032       0.445904   39.438359                 0.815032                0.445904          39.438359            1       True         12
2            LightGBMXT    0.946172   0.951509        0.099565       0.026177    6.467818                 0.099565                0.026177           6.467818            1       True          3
3              CatBoost    0.945572   0.951813        0.047403       0.034631   30.464145                 0.047403                0.034631          30.464145            1       True          7
4               XGBoost    0.944515

	3310.97s	= Expected runtime (662.19s per shuffle set)
	1820.74s	= Actual runtime (Completed 5 of 5 shuffle sets)



Feature Importance:
                      importance    stddev   p_value  n  p99_high  \
urea_n_max          3.355475e-02  0.003378  0.000012  5  0.040509   
platelets_min       2.522345e-02  0.004034  0.000076  5  0.033530   
platelets_max       1.888511e-02  0.003485  0.000133  5  0.026060   
urea_n_min          1.548192e-02  0.001470  0.000010  5  0.018509   
F1                  4.646741e-03  0.000816  0.000109  5  0.006326   
type_stay           3.732115e-03  0.001389  0.001932  5  0.006592   
platelets_mean      3.599073e-03  0.001221  0.001372  5  0.006113   
temp_max            3.221781e-03  0.001075  0.001290  5  0.005435   
F123                1.916931e-03  0.000348  0.000124  5  0.002633   
calcium_min         1.800385e-03  0.000671  0.001938  5  0.003181   
sapsii              1.658637e-03  0.000923  0.007951  5  0.003560   
sofa                1.477088e-03  0.000588  0.002461  5  0.002687   
F162                1.463540e-03  0.000760  0.006284  5  0.003028   
F57          

	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Beginning AutoGluon training ...
AutoGluon will save models to "/content/gdrive/MyDrive/MIMIC-III Text Mining/LOS_FINAL/models/tabular/"
AutoGluon Version:  0.6.0
Python Version:     3.7.15
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Fri Aug 26 08:44:51 UTC 2022
Train Data Rows:    24611
Train Data Columns: 346
Label Column: los_cat
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
	2 unique label values:  [False, True]
	If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping:  class 1 = True, class 


Test set performance:
{'roc_auc': 0.9632367440841367, 'accuracy': 0.9549813099301154, 'balanced_accuracy': 0.7515255258545135, 'mcc': 0.6177787853794582, 'f1': 0.6241519674355495, 'precision': 0.7958477508650519, 'recall': 0.5133928571428571}

Performance metrics:
{"Cohen's Kappa": 0.6013905646570001, 'PRC AUC': 0.7470710652889236}


Computing feature importance via permutation shuffling for 346 features using 5000 rows with 5 shuffle sets...


                  model  score_test  score_val  pred_time_test  pred_time_val    fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0   WeightedEnsemble_L2    0.963237   0.965553        1.258567       0.639527  102.629956                 0.007984                0.000741           0.764444            2       True         14
1              LightGBM    0.960444   0.961802        0.048933       0.029830    6.622396                 0.048933                0.029830           6.622396            1       True          4
2              CatBoost    0.960042   0.963860        0.049791       0.034942   44.193413                 0.049791                0.034942          44.193413            1       True          7
3               XGBoost    0.959787   0.961949        0.219402       0.077141   12.973148                 0.219402                0.077141          12.973148            1       True         11
4            LightGBMXT    0.958592

	1901.48s	= Expected runtime (380.3s per shuffle set)
	1397.8s	= Actual runtime (Completed 5 of 5 shuffle sets)



Feature Importance:
                    importance    stddev   p_value  n      p99_high  \
urea_n_max            0.014754  0.001142  0.000004  5  1.710619e-02   
platelets_max         0.011392  0.001991  0.000108  5  1.549158e-02   
platelets_min         0.008261  0.001899  0.000312  5  1.217037e-02   
urea_n_min            0.007854  0.000708  0.000008  5  9.311197e-03   
F163                  0.003327  0.000704  0.000227  5  4.777442e-03   
F281                  0.002342  0.000421  0.000120  5  3.209467e-03   
F284                  0.001982  0.000346  0.000107  5  2.693686e-03   
temp_max              0.001900  0.000493  0.000498  5  2.915383e-03   
F182                  0.001122  0.000243  0.000249  5  1.623270e-03   
platelets_mean        0.000954  0.000436  0.004043  5  1.851463e-03   
type_stay             0.000901  0.000490  0.007333  5  1.909080e-03   
F216                  0.000824  0.000683  0.027106  5  2.230835e-03   
F53                   0.000811  0.000224  0.000637  5  1

	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Beginning AutoGluon training ...
AutoGluon will save models to "/content/gdrive/MyDrive/MIMIC-III Text Mining/LOS_FINAL/models/tabular/"
AutoGluon Version:  0.6.0
Python Version:     3.7.15
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Fri Aug 26 08:44:51 UTC 2022
Train Data Rows:    24611
Train Data Columns: 346
Label Column: los_cat
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
	2 unique label values:  [False, True]
	If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping:  class 1 = True, class 


Test set performance:
{'roc_auc': 0.9576980562163515, 'accuracy': 0.9489679830976759, 'balanced_accuracy': 0.7287426051709027, 'mcc': 0.5627475824471101, 'f1': 0.5733695652173914, 'precision': 0.7326388888888888, 'recall': 0.47098214285714285}

Performance metrics:
{"Cohen's Kappa": 0.5475904799370575, 'PRC AUC': 0.6977060081125575}


Computing feature importance via permutation shuffling for 346 features using 5000 rows with 5 shuffle sets...


                  model  score_test  score_val  pred_time_test  pred_time_val    fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0   WeightedEnsemble_L2    0.957698   0.964112        1.701102       0.964519  129.680367                 0.005869                0.000674           0.774817            2       True         14
1            LightGBMXT    0.954369   0.956888        0.099353       0.026811    7.471813                 0.099353                0.026811           7.471813            1       True          3
2               XGBoost    0.953163   0.957512        0.226892       0.077332   12.658849                 0.226892                0.077332          12.658849            1       True         11
3              CatBoost    0.953030   0.958926        0.049984       0.033946   39.381089                 0.049984                0.033946          39.381089            1       True          7
4        NeuralNetTorch    0.952196

	2905.49s	= Expected runtime (581.1s per shuffle set)
	1612.3s	= Actual runtime (Completed 5 of 5 shuffle sets)



Feature Importance:
                      importance    stddev       p_value  n  p99_high  \
urea_n_max          1.622854e-02  0.001668  1.320748e-05  5  0.019663   
platelets_min       1.094566e-02  0.002111  1.581980e-04  5  0.015293   
urea_n_min          9.830424e-03  0.000743  3.896476e-06  5  0.011361   
platelets_max       8.231864e-03  0.002034  4.131128e-04  5  0.012420   
F39                 4.592533e-03  0.000238  8.562927e-07  5  0.005082   
type_stay           2.548440e-03  0.000905  1.625790e-03  5  0.004412   
F23                 2.250251e-03  0.000717  1.083675e-03  5  0.003726   
F175                2.143587e-03  0.000545  4.613502e-04  5  0.003266   
temp_max            1.996354e-03  0.001008  5.723929e-03  5  0.004073   
platelets_mean      1.787784e-03  0.000670  1.981797e-03  5  0.003167   
F278                1.418181e-03  0.000273  1.561124e-04  5  0.001980   
sofa                1.302693e-03  0.000261  1.827681e-04  5  0.001840   
F256                1.194887e-

	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Beginning AutoGluon training ...
AutoGluon will save models to "/content/gdrive/MyDrive/MIMIC-III Text Mining/LOS_FINAL/models/tabular/"
AutoGluon Version:  0.6.0
Python Version:     3.7.15
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Fri Aug 26 08:44:51 UTC 2022
Train Data Rows:    24611
Train Data Columns: 346
Label Column: los_cat
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
	2 unique label values:  [False, True]
	If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping:  class 1 = True, class 


Test set performance:
{'roc_auc': 0.9531876017278077, 'accuracy': 0.9488054607508533, 'balanced_accuracy': 0.7173422436459247, 'mcc': 0.5543387184765703, 'f1': 0.5594405594405595, 'precision': 0.7490636704119851, 'recall': 0.44642857142857145}

Performance metrics:
{"Cohen's Kappa": 0.534105995366047, 'PRC AUC': 0.6828141267633003}


Computing feature importance via permutation shuffling for 346 features using 5000 rows with 5 shuffle sets...


                  model  score_test  score_val  pred_time_test  pred_time_val    fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0   WeightedEnsemble_L2    0.953188   0.957610        1.946153       1.098867  133.877510                 0.006784                0.000717           0.670780            2       True         14
1              CatBoost    0.951734   0.951627        0.046385       0.032380   26.638586                 0.046385                0.032380          26.638586            1       True          7
2            LightGBMXT    0.949801   0.949965        0.101268       0.025269    6.258527                 0.101268                0.025269           6.258527            1       True          3
3              LightGBM    0.948555   0.952760        0.045216       0.021150    6.107953                 0.045216                0.021150           6.107953            1       True          4
4        NeuralNetTorch    0.947751

	3036.56s	= Expected runtime (607.31s per shuffle set)
	1604.95s	= Actual runtime (Completed 5 of 5 shuffle sets)



Feature Importance:
                      importance    stddev   p_value  n  p99_high   p99_low  \
urea_n_max          2.692636e-02  0.002261  0.000006  5  0.031581  0.022271   
platelets_min       2.049098e-02  0.003156  0.000065  5  0.026989  0.013993   
platelets_max       1.808519e-02  0.003357  0.000136  5  0.024997  0.011174   
urea_n_min          1.385697e-02  0.001052  0.000004  5  0.016023  0.011690   
F205                9.817731e-03  0.002183  0.000275  5  0.014312  0.005324   
type_stay           4.665051e-03  0.001606  0.001450  5  0.007972  0.001358   
platelets_mean      3.820603e-03  0.001213  0.001073  5  0.006319  0.001322   
temp_max            3.201463e-03  0.001079  0.001339  5  0.005423  0.000980   
F123                2.379211e-03  0.000292  0.000027  5  0.002980  0.001779   
F122                2.130760e-03  0.000284  0.000037  5  0.002715  0.001547   
F16                 1.982486e-03  0.000583  0.000804  5  0.003183  0.000782   
F185                1.827332e-0