# LOS Tabular

## Setup

In [3]:
# First install package from terminal:
!pip install -U pip
!pip install -U setuptools wheel
!pip install autogluon  # autogluon==0.4.1

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
[0mLooking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
[0mLooking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting autogluon
  Using cached autogluon-0.6.0-py3-none-any.whl (9.8 kB)
Collecting autogluon.timeseries[all]==0.6.0
  Using cached autogluon.timeseries-0.6.0-py3-none-any.whl (101 kB)
Collecting autogluon.tabular[all]==0.6.0
  Using cached autogluon.tabular-0.6.0-py3-none-any.whl (285 kB)
Collecting autogluon.core[all]==0.6.0
  Using cached autogluon.core-0.6.0-py3-none-any.whl (224 kB)
Collecting autogluon.features==0.6.0
  Using cached autogluon.features-0.6.0-py3-none-any.whl (59 kB)
Collecting autogluon.vision==0.6.0
  Using cached autogluon.vision-0.6.0-py3-none-any.whl (49 kB)
Collecting autogluon.text==0.6.0
  Using cached autogluon.text-0.6.0-py3-none-any.whl (62 kB)
Colle

In [4]:
# import all required modules
import pandas as pd
import numpy as np
import os
import re
import random
import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option('display.max_rows', None)  ###
pd.set_option('display.max_columns', None)  ###
pd.set_option('display.width', None)  ###
pd.set_option('display.max_colwidth', None)  ###

import warnings
warnings.filterwarnings('ignore')

from sklearn.metrics import roc_auc_score
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import roc_curve
from sklearn.metrics import classification_report
from sklearn.metrics import auc
from sklearn.metrics import cohen_kappa_score
from sklearn.inspection import permutation_importance
from autogluon.tabular import TabularPredictor

In [5]:
try:
  from google.colab import drive
  IN_COLAB=True
except:
  IN_COLAB=False

if IN_COLAB:
  print("We're running Colab")

if IN_COLAB:  
  # Mount the Google Drive at mount
  mount='/content/gdrive'
  print("Colab: mounting Google drive on ", mount)
  # connect your colab with the drive
  drive.mount(mount)

 # Switch to the directory on the Google Drive that you want to use
  import os
  path_to_repo = mount + "/MyDrive/MIMIC-III Text Mining/LOS_FINAL/"

else:
  # Setup Repository
  with open("repo_info.txt", "r") as repo_info:
      path_to_repo = repo_info.readline()

  
print(path_to_repo)

path_to_data = f"{path_to_repo}data/"
path_to_raw = f"{path_to_data}raw/"
path_to_processed = f"{path_to_data}processed/"
path_to_lda = f"{path_to_data}lda/"
path_to_icd = f"{path_to_data}icd_codes/"
path_to_models = f"{path_to_repo}models/"
path_to_results = f"{path_to_repo}results/"

We're running Colab
Colab: mounting Google drive on  /content/gdrive
Mounted at /content/gdrive
/content/gdrive/MyDrive/MIMIC-III Text Mining/LOS_FINAL/


## Import the dataset

In [6]:
def load_datasets(method):
    """
    Function to load train, test and validation set based on the chosen method
    method: string for the processing method we want to load
    """
    global path_to_processed
    # load it back
    train = pd.read_feather('{}{}train_{}{}{}'.format(path_to_processed, method, seed_tag, preproc_tag, lemma_tag))
    test = pd.read_feather('{}{}test_{}{}{}'.format(path_to_processed, method, seed_tag, preproc_tag, lemma_tag))
    return train, test

In [7]:
# PARAMETERS

session_seed = 42 # set seed for our session
seed_tag = f'_{session_seed}'

random.seed(session_seed)

## Train the Tabular Model (Structured DataSet)

In [8]:
# Model Parameters
label = 'los_cat'
metric = 'roc_auc'

In [9]:
# compute other metrics
def perf_evaluator(y_test, y_pred, y_pred_proba):
    """ Function to display the main classification performance metrics """
    kappa = cohen_kappa_score(y_test, y_pred)
    precision, recall, prc_th = precision_recall_curve(y_test, y_pred_proba)
    prc_auc = auc(recall, precision)
    return kappa, prc_auc

In [10]:
# Iterate over our main methods of vectorization

vect_dict = {'tabular': (False, False),
             'stemming': (False, False),
             'spacy': (True, True)}

for key, value in vect_dict.items():
    print(key)
    # PARAMETERS

    lemmatize = value[0] # set to false if we want to do stemming
    lemma_tag = str(np.where(lemmatize, "_lemma",""))
    spacy = value[1]
    if spacy: lemma_tag = str(np.where(lemmatize, "_lemma_spacy",""))

    preprocessing = True # set to true if we want to clean and perform some preprocessing
    preproc_heavier = True # set to True if we want a heavier preprocessing
    preproc_tag_2 = np.where(preproc_heavier, '_heavier', '')
    preproc_tag = np.where(preprocessing, f'_preproc{preproc_tag_2}', f'{preproc_tag_2}')

    # Iterate over all the methods
    if key != 'tabular':
        method_list = ['frequency', 'onehot','tf_idf']
    else:
        method_list = ['frequency']

    for method in method_list:
        print(method)
        # Load the LDA datasets
        train, test = load_datasets(f'lda_{method}')
        if key == 'tabular':
            # If we are just running the tabular dataset, drop the LDA columns
            lda_topics = [col for col in train.columns if re.match(r"F[0-9]*", col)]
            train.drop(columns = lda_topics, inplace = True)
            test.drop(columns = lda_topics, inplace = True)
            assert len(lda_topics) == 300

        save_path = f'{path_to_models}text{preproc_tag}{lemma_tag}{method}'
        os.makedirs(save_path, exist_ok = True)

        # run the tabular predictor ensemble of models
        predictor = TabularPredictor(label=label, eval_metric=metric,path=save_path)
        predictor.fit(train)

        # evaluate performance on the test set
        per_tab = predictor.evaluate(test)
        print(f"\nTest set performance:\n{per_tab}")
        # save the class and probability predictions
        y_pred = predictor.predict(test)
        y_pred_proba = predictor.predict_proba(test).iloc[:,1]
        perf = perf_evaluator(test['los_cat'], y_pred, y_pred_proba)
        perf_dict = {"Cohen's Kappa": perf[0], "PRC AUC": perf[1]}
        print(f"\nPerformance metrics:\n{perf_dict}")

        # compare the different models
        leaderboard = predictor.leaderboard(test)
        print(f"\nModel Leaderboard:\n{leaderboard}")

        # compute feature importance
        importance = predictor.feature_importance(test)
        # feature importance in percentage
        importance['percent'] = 100*importance['importance']/importance['importance'].max()
        print(f"\nFeature Importance:\n{importance}")
        perf_dict.update(per_tab)
        # save performances
        df_perf = pd.DataFrame.from_dict(perf_dict, orient='index', columns=['performances'])
        df_perf.to_excel(path_to_results+f'{key}/df_perf_{method}.xlsx')
        # save leaderboard
        leaderboard.to_excel(path_to_results+f'{key}/leaderboard_{method}.xlsx')
        # save importance
        importance.to_excel(path_to_results+f'{key}/importance_{method}.xlsx')

tabular
frequency


Beginning AutoGluon training ...
AutoGluon will save models to "/content/gdrive/MyDrive/MIMIC-III Text Mining/LOS_FINAL/models/text_preproc_heavierfrequency/"
AutoGluon Version:  0.6.0
Python Version:     3.7.15
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Fri Aug 26 08:44:51 UTC 2022
Train Data Rows:    24611
Train Data Columns: 46
Label Column: los_cat
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
	2 unique label values:  [False, True]
	If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping:  class 1 = True, class 0 = False
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    53204.8 MB
	Train Data (Original)  Memory U


Test set performance:
{'roc_auc': 0.9441635626643294, 'accuracy': 0.9471802372826263, 'balanced_accuracy': 0.7051531003505697, 'mcc': 0.5348751697099995, 'f1': 0.5376955903271693, 'precision': 0.7411764705882353, 'recall': 0.421875}

Performance metrics:
{"Cohen's Kappa": 0.5119144639388208, 'PRC AUC': 0.6552421524932525}


Computing feature importance via permutation shuffling for 46 features using 5000 rows with 5 shuffle sets...


                  model  score_test  score_val  pred_time_test  pred_time_val   fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0   WeightedEnsemble_L2    0.944164   0.947699        0.985626       0.613746  72.914350                 0.005695                0.000650           0.712254            2       True         14
1            LightGBMXT    0.942363   0.941051        0.060758       0.056628   2.971784                 0.060758                0.056628           2.971784            1       True          3
2              CatBoost    0.942151   0.942311        0.026469       0.015857   9.102162                 0.026469                0.015857           9.102162            1       True          7
3              LightGBM    0.939862   0.943836        0.025571       0.030734   1.252226                 0.025571                0.030734           1.252226            1       True          4
4         LightGBMLarge    0.939767   0.

	222.18s	= Expected runtime (44.44s per shuffle set)
	83.08s	= Actual runtime (Completed 5 of 5 shuffle sets)



Feature Importance:
                    importance    stddev   p_value  n  p99_high   p99_low  \
urea_n_max            0.040859  0.005030  0.000027  5  0.051215  0.030503   
platelets_min         0.037140  0.004077  0.000017  5  0.045534  0.028746   
platelets_max         0.025476  0.003742  0.000054  5  0.033180  0.017772   
urea_n_min            0.015645  0.001430  0.000008  5  0.018590  0.012700   
platelets_mean        0.008963  0.001691  0.000145  5  0.012446  0.005481   
temp_max              0.003965  0.001156  0.000776  5  0.006344  0.001586   
dest_discharge        0.003747  0.001076  0.000734  5  0.005962  0.001531   
calcium_min           0.003073  0.000855  0.000651  5  0.004834  0.001312   
icd_chapter           0.002958  0.001045  0.001592  5  0.005109  0.000807   
type_stay             0.002354  0.000970  0.002797  5  0.004351  0.000357   
sapsii                0.001872  0.000872  0.004326  5  0.003668  0.000076   
magnesium_max         0.001794  0.002063  0.061858  5  

	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Beginning AutoGluon training ...
AutoGluon will save models to "/content/gdrive/MyDrive/MIMIC-III Text Mining/LOS_FINAL/models/text_preproc_heavierfrequency/"
AutoGluon Version:  0.6.0
Python Version:     3.7.15
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Fri Aug 26 08:44:51 UTC 2022
Train Data Rows:    24611
Train Data Columns: 346
Label Column: los_cat
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
	2 unique label values:  [False, True]
	If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])


stemming
frequency


Selected class <--> label mapping:  class 1 = True, class 0 = False
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    52668.71 MB
	Train Data (Original)  Memory Usage: 88.04 MB (0.2% of available memory)
	Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
	Stage 1 Generators:
		Fitting AsTypeFeatureGenerator...
			Note: Converting 5 features to boolean dtype as they only contain 2 unique values.
	Stage 2 Generators:
		Fitting FillNaFeatureGenerator...
	Stage 3 Generators:
		Fitting IdentityFeatureGenerator...
		Fitting CategoryFeatureGenerator...
			Fitting CategoryMemoryMinimizeFeatureGenerator...
	Stage 4 Generators:
		Fitting DropUniqueFeatureGenerator...
	Types of features in original data (raw dtype, special dtypes):
		('float', [])  : 330 | ['age', 'urea_n_min', 'urea_n_max', 'urea_n_mean', 'platelets_min', ...]
		(


Test set performance:
{'roc_auc': 0.9614525948416177, 'accuracy': 0.9556313993174061, 'balanced_accuracy': 0.75701824057844, 'mcc': 0.6251368235402176, 'f1': 0.6325706594885598, 'precision': 0.7966101694915254, 'recall': 0.5245535714285714}

Performance metrics:
{"Cohen's Kappa": 0.6100234505644875, 'PRC AUC': 0.741333430988384}


Computing feature importance via permutation shuffling for 346 features using 5000 rows with 5 shuffle sets...


                  model  score_test  score_val  pred_time_test  pred_time_val    fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0   WeightedEnsemble_L2    0.961453   0.967880        1.527310       0.777922  127.991765                 0.005371                0.000818           0.827550            2       True         14
1            LightGBMXT    0.961382   0.960593        0.117026       0.029755    8.841268                 0.117026                0.029755           8.841268            1       True          3
2               XGBoost    0.959168   0.964295        0.198862       0.073686   13.223769                 0.198862                0.073686          13.223769            1       True         11
3              LightGBM    0.958845   0.965110        0.049478       0.022981    8.663222                 0.049478                0.022981           8.663222            1       True          4
4              CatBoost    0.958015

	2229.77s	= Expected runtime (445.95s per shuffle set)
	1476.56s	= Actual runtime (Completed 5 of 5 shuffle sets)



Feature Importance:
                      importance    stddev   p_value  n      p99_high  \
urea_n_max          1.551558e-02  0.001623  0.000014  5  1.885725e-02   
platelets_max       1.069421e-02  0.001842  0.000102  5  1.448671e-02   
urea_n_min          7.634106e-03  0.000427  0.000001  5  8.512764e-03   
platelets_min       7.552602e-03  0.001921  0.000462  5  1.150842e-02   
F274                3.007885e-03  0.001187  0.002394  5  5.452509e-03   
F87                 2.323465e-03  0.000579  0.000428  5  3.516305e-03   
F195                1.560149e-03  0.000238  0.000063  5  2.050978e-03   
temp_max            1.532089e-03  0.000547  0.001663  5  2.659280e-03   
F32                 1.498479e-03  0.000368  0.000405  5  2.256923e-03   
F242                7.356131e-04  0.000383  0.006367  5  1.524858e-03   
type_stay           7.078836e-04  0.000504  0.017403  5  1.745432e-03   
platelets_mean      6.936652e-04  0.000420  0.010444  5  1.557499e-03   
temp_min            6.420014e-

	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Beginning AutoGluon training ...
AutoGluon will save models to "/content/gdrive/MyDrive/MIMIC-III Text Mining/LOS_FINAL/models/text_preproc_heavieronehot/"
AutoGluon Version:  0.6.0
Python Version:     3.7.15
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Fri Aug 26 08:44:51 UTC 2022
Train Data Rows:    24611
Train Data Columns: 346
Label Column: los_cat
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
	2 unique label values:  [False, True]
	If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping:  cla


Test set performance:
{'roc_auc': 0.9574801239514211, 'accuracy': 0.9525434747277751, 'balanced_accuracy': 0.7409550284837861, 'mcc': 0.5947600104023104, 'f1': 0.6021798365122616, 'precision': 0.7727272727272727, 'recall': 0.49330357142857145}

Performance metrics:
{"Cohen's Kappa": 0.5782496245345707, 'PRC AUC': 0.7100290688264739}


Computing feature importance via permutation shuffling for 346 features using 5000 rows with 5 shuffle sets...


                  model  score_test  score_val  pred_time_test  pred_time_val    fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0   WeightedEnsemble_L2    0.957480   0.962171        2.068727       1.160965  166.483453                 0.009129                0.000848           0.743292            2       True         14
1            LightGBMXT    0.956786   0.955055        0.122847       0.027102    8.654832                 0.122847                0.027102           8.654832            1       True          3
2              LightGBM    0.955568   0.958349        0.056862       0.022716    6.753857                 0.056862                0.022716           6.753857            1       True          4
3              CatBoost    0.954848   0.959577        0.053827       0.031939   56.447277                 0.053827                0.031939          56.447277            1       True          7
4               XGBoost    0.952576

	3220.85s	= Expected runtime (644.17s per shuffle set)
	1839.5s	= Actual runtime (Completed 5 of 5 shuffle sets)



Feature Importance:
                      importance    stddev   p_value  n      p99_high  \
urea_n_max          1.564855e-02  0.001403  0.000008  5  1.853806e-02   
platelets_max       9.237703e-03  0.002218  0.000370  5  1.380532e-02   
platelets_min       8.693818e-03  0.001550  0.000116  5  1.188509e-02   
urea_n_min          7.748114e-03  0.000780  0.000012  5  9.354886e-03   
F99                 4.960271e-03  0.001409  0.000703  5  7.860947e-03   
F43                 4.285802e-03  0.000713  0.000089  5  5.754090e-03   
F27                 1.871785e-03  0.000571  0.000923  5  3.047750e-03   
type_stay           1.659208e-03  0.000915  0.007699  5  3.542585e-03   
F255                1.428691e-03  0.000547  0.002140  5  2.554631e-03   
temp_max            1.419779e-03  0.000720  0.005808  5  2.902606e-03   
sofa                1.220288e-03  0.000340  0.000652  5  1.919911e-03   
temp_min            1.033541e-03  0.000459  0.003646  5  1.978038e-03   
platelets_mean      9.261352e-

	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Beginning AutoGluon training ...
AutoGluon will save models to "/content/gdrive/MyDrive/MIMIC-III Text Mining/LOS_FINAL/models/text_preproc_heaviertf_idf/"
AutoGluon Version:  0.6.0
Python Version:     3.7.15
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Fri Aug 26 08:44:51 UTC 2022
Train Data Rows:    24611
Train Data Columns: 346
Label Column: los_cat
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
	2 unique label values:  [False, True]
	If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping:  cla


Test set performance:
{'roc_auc': 0.9509077250532114, 'accuracy': 0.9470177149358037, 'balanced_accuracy': 0.7040370289219983, 'mcc': 0.5329663154297363, 'f1': 0.5356125356125355, 'precision': 0.7401574803149606, 'recall': 0.41964285714285715}

Performance metrics:
{"Cohen's Kappa": 0.5097836611661015, 'PRC AUC': 0.672867661949538}


Computing feature importance via permutation shuffling for 346 features using 5000 rows with 5 shuffle sets...


                  model  score_test  score_val  pred_time_test  pred_time_val    fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0   WeightedEnsemble_L2    0.950908   0.957590        1.853817       1.015373  139.945394                 0.007184                0.000708           0.714411            2       True         14
1        NeuralNetTorch    0.946431   0.950866        0.777103       0.454662   36.521026                 0.777103                0.454662          36.521026            1       True         12
2            LightGBMXT    0.946172   0.951509        0.059967       0.026338    6.155473                 0.059967                0.026338           6.155473            1       True          3
3              CatBoost    0.945572   0.951813        0.046338       0.033517   28.974746                 0.046338                0.033517          28.974746            1       True          7
4               XGBoost    0.944515

	3065.74s	= Expected runtime (613.15s per shuffle set)
	1764.08s	= Actual runtime (Completed 5 of 5 shuffle sets)



Feature Importance:
                      importance    stddev   p_value  n  p99_high  \
urea_n_max          3.355475e-02  0.003378  0.000012  5  0.040509   
platelets_min       2.522345e-02  0.004034  0.000076  5  0.033530   
platelets_max       1.888511e-02  0.003485  0.000133  5  0.026060   
urea_n_min          1.548192e-02  0.001470  0.000010  5  0.018509   
F1                  4.646741e-03  0.000816  0.000109  5  0.006326   
type_stay           3.732115e-03  0.001389  0.001932  5  0.006592   
platelets_mean      3.599073e-03  0.001221  0.001372  5  0.006113   
temp_max            3.221781e-03  0.001075  0.001290  5  0.005435   
F123                1.916931e-03  0.000348  0.000124  5  0.002633   
calcium_min         1.800385e-03  0.000671  0.001938  5  0.003181   
sapsii              1.658637e-03  0.000923  0.007951  5  0.003560   
sofa                1.477088e-03  0.000588  0.002461  5  0.002687   
F162                1.463540e-03  0.000760  0.006284  5  0.003028   
F57          

	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Beginning AutoGluon training ...
AutoGluon will save models to "/content/gdrive/MyDrive/MIMIC-III Text Mining/LOS_FINAL/models/text_preproc_heavier_lemma_spacyfrequency/"
AutoGluon Version:  0.6.0
Python Version:     3.7.15
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Fri Aug 26 08:44:51 UTC 2022
Train Data Rows:    24611
Train Data Columns: 346
Label Column: los_cat
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
	2 unique label values:  [False, True]
	If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> labe


Test set performance:
{'roc_auc': 0.9625414736446727, 'accuracy': 0.9559564440110515, 'balanced_accuracy': 0.7561650964066609, 'mcc': 0.6271152767181905, 'f1': 0.6332882273342355, 'precision': 0.8041237113402062, 'recall': 0.5223214285714286}

Performance metrics:
{"Cohen's Kappa": 0.6109812797938377, 'PRC AUC': 0.7457523314903282}


Computing feature importance via permutation shuffling for 346 features using 5000 rows with 5 shuffle sets...


                  model  score_test  score_val  pred_time_test  pred_time_val    fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0   WeightedEnsemble_L2    0.962541   0.967763        1.495082       0.705599  111.232781                 0.005051                0.000658           0.772848            2       True         14
1              LightGBM    0.961113   0.965526        0.133708       0.024154    7.682338                 0.133708                0.024154           7.682338            1       True          4
2            LightGBMXT    0.961061   0.962966        0.134448       0.026550    7.107605                 0.134448                0.026550           7.107605            1       True          3
3              CatBoost    0.960174   0.964616        0.054241       0.033455   37.505077                 0.054241                0.033455          37.505077            1       True          7
4               XGBoost    0.959537

	2423.53s	= Expected runtime (484.71s per shuffle set)
	1341.92s	= Actual runtime (Completed 5 of 5 shuffle sets)



Feature Importance:
                      importance    stddev   p_value  n      p99_high  \
urea_n_max          1.601479e-02  0.001347  0.000006  5  1.878859e-02   
platelets_max       1.009642e-02  0.002305  0.000305  5  1.484326e-02   
platelets_min       8.003599e-03  0.001936  0.000380  5  1.198913e-02   
urea_n_min          6.984312e-03  0.000521  0.000004  5  8.056918e-03   
F268                3.143858e-03  0.000301  0.000010  5  3.763079e-03   
F240                1.457530e-03  0.000628  0.003286  5  2.751170e-03   
temp_max            1.420715e-03  0.000707  0.005433  5  2.875990e-03   
F180                1.008491e-03  0.000278  0.000629  5  1.581152e-03   
temp_min            9.270315e-04  0.000303  0.001194  5  1.550878e-03   
F88                 9.140265e-04  0.000128  0.000045  5  1.177164e-03   
platelets_mean      9.107087e-04  0.000637  0.016515  5  2.222703e-03   
F60                 8.942041e-04  0.000326  0.001791  5  1.565439e-03   
F174                7.472056e-

	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Beginning AutoGluon training ...
AutoGluon will save models to "/content/gdrive/MyDrive/MIMIC-III Text Mining/LOS_FINAL/models/text_preproc_heavier_lemma_spacyonehot/"
AutoGluon Version:  0.6.0
Python Version:     3.7.15
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Fri Aug 26 08:44:51 UTC 2022
Train Data Rows:    24611
Train Data Columns: 346
Label Column: los_cat
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
	2 unique label values:  [False, True]
	If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label m


Test set performance:
{'roc_auc': 0.9572872323776136, 'accuracy': 0.9509182512595482, 'balanced_accuracy': 0.7441923203330412, 'mcc': 0.585532797330772, 'f1': 0.598404255319149, 'precision': 0.7401315789473685, 'recall': 0.5022321428571429}

Performance metrics:
{"Cohen's Kappa": 0.5732845091432833, 'PRC AUC': 0.7008862858093646}


Computing feature importance via permutation shuffling for 346 features using 5000 rows with 5 shuffle sets...


                  model  score_test  score_val  pred_time_test  pred_time_val   fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0   WeightedEnsemble_L2    0.957287   0.963838        1.432368       0.773556  92.325605                 0.005193                0.000702           0.749760            2       True         14
1               XGBoost    0.956157   0.957539        0.218260       0.071151  11.295985                 0.218260                0.071151          11.295985            1       True         11
2              LightGBM    0.956145   0.959127        0.048340       0.021543   6.761216                 0.048340                0.021543           6.761216            1       True          4
3            LightGBMXT    0.955842   0.956802        0.097736       0.025667   5.933954                 0.097736                0.025667           5.933954            1       True          3
4              CatBoost    0.955448   0.

	2274.22s	= Expected runtime (454.84s per shuffle set)
	1426.23s	= Actual runtime (Completed 5 of 5 shuffle sets)



Feature Importance:
                      importance    stddev   p_value  n      p99_high  \
urea_n_max          1.449954e-02  0.001545  0.000015  5  1.768096e-02   
F258                1.320886e-02  0.001795  0.000040  5  1.690483e-02   
platelets_max       1.011489e-02  0.002591  0.000474  5  1.544894e-02   
platelets_min       7.848911e-03  0.001820  0.000324  5  1.159679e-02   
urea_n_min          7.527661e-03  0.000735  0.000011  5  9.041292e-03   
F27                 2.708754e-03  0.000428  0.000073  5  3.590890e-03   
type_stay           2.008422e-03  0.000840  0.002947  5  3.737441e-03   
temp_max            1.668932e-03  0.000764  0.004062  5  3.241532e-03   
F184                1.439365e-03  0.000447  0.000984  5  2.359187e-03   
F81                 1.386829e-03  0.000334  0.000376  5  2.075339e-03   
sapsii              1.364721e-03  0.000645  0.004557  5  2.693554e-03   
sofa                1.157361e-03  0.000264  0.000302  5  1.700316e-03   
F232                1.078531e-

	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Beginning AutoGluon training ...
AutoGluon will save models to "/content/gdrive/MyDrive/MIMIC-III Text Mining/LOS_FINAL/models/text_preproc_heavier_lemma_spacytf_idf/"
AutoGluon Version:  0.6.0
Python Version:     3.7.15
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Fri Aug 26 08:44:51 UTC 2022
Train Data Rows:    24611
Train Data Columns: 346
Label Column: los_cat
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
	2 unique label values:  [False, True]
	If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label m


Test set performance:
{'roc_auc': 0.9540104231876799, 'accuracy': 0.9501056395254347, 'balanced_accuracy': 0.713929666958808, 'mcc': 0.5619936115772544, 'f1': 0.5608011444921316, 'precision': 0.7808764940239044, 'recall': 0.4375}

Performance metrics:
{"Cohen's Kappa": 0.5365683599150255, 'PRC AUC': 0.6896537155769118}


Computing feature importance via permutation shuffling for 346 features using 5000 rows with 5 shuffle sets...


                  model  score_test  score_val  pred_time_test  pred_time_val    fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0   WeightedEnsemble_L2    0.954010   0.958579        2.152935       1.184751  148.810848                 0.008920                0.000807           0.813180            2       True         14
1            LightGBMXT    0.951707   0.952669        0.098294       0.025141    5.956255                 0.098294                0.025141           5.956255            1       True          3
2              CatBoost    0.950442   0.954894        0.049232       0.032575   51.169433                 0.049232                0.032575          51.169433            1       True          7
3       NeuralNetFastAI    0.949247   0.945986        0.289735       0.178510   31.906057                 0.289735                0.178510          31.906057            1       True         10
4        NeuralNetTorch    0.948618

	3676.79s	= Expected runtime (735.36s per shuffle set)
	1876.69s	= Actual runtime (Completed 5 of 5 shuffle sets)



Feature Importance:
                      importance    stddev   p_value  n      p99_high  \
urea_n_max          2.676254e-02  0.002712  0.000012  5  3.234729e-02   
platelets_min       2.317750e-02  0.002924  0.000030  5  2.919710e-02   
platelets_max       1.785079e-02  0.003304  0.000135  5  2.465285e-02   
urea_n_min          1.260551e-02  0.000781  0.000002  5  1.421360e-02   
F59                 6.488700e-03  0.000708  0.000017  5  7.946028e-03   
F205                4.661517e-03  0.001152  0.000413  5  7.033221e-03   
platelets_mean      2.857339e-03  0.001086  0.002084  5  5.092804e-03   
temp_max            2.626842e-03  0.001033  0.002359  5  4.753057e-03   
type_stay           2.342231e-03  0.001291  0.007704  5  5.001403e-03   
F198                1.901800e-03  0.000243  0.000031  5  2.401390e-03   
dest_discharge      1.658785e-03  0.000670  0.002597  5  3.037543e-03   
F122                1.552509e-03  0.000424  0.000604  5  2.424806e-03   
sofa                1.386918e-