In [7]:
import pandas as pd
import numpy as np
import os 
import sys
sys.path.append(os.path.abspath('..'))
import utils.utils as utils
from sklearn.preprocessing import MinMaxScaler

train = pd.read_csv('../data/mimic-iv-private/anchor_year_group_datasets/2014_-_2016/small_train_dataset.csv')
test = pd.read_csv('../data/mimic-iv-private/anchor_year_group_datasets/2017_-_2019/test_dataset.csv')

In [8]:
len(train['pain'].unique())

108

Normalize Vital Signs

In [9]:

# List of vital signs columns
vital_signs_cols = ['temperature', 'heartrate', 'resprate', 'o2sat', 'sbp', 'dbp', 'pain']

# Ensure vital signs are numeric and handle missing values
train[vital_signs_cols] = train[vital_signs_cols].apply(pd.to_numeric, errors='coerce')
test[vital_signs_cols] = test[vital_signs_cols].apply(pd.to_numeric, errors='coerce')

In [10]:
train['pain'].unique()

array([ 0.  ,  7.  , 10.  ,  6.  , 13.  ,   nan,  2.  ,  3.  ,  8.  ,
        9.  ,  5.  ,  1.  ,  4.  ,  1.5 ,  8.59,  8.6 ,  0.5 ,  7.5 ,
       11.  ,  4.5 ,  9.5 ,  6.5 ])

Fill in NAs

In [11]:
train[vital_signs_cols] = train[vital_signs_cols].fillna(train[vital_signs_cols].mean())
test[vital_signs_cols] = test[vital_signs_cols].fillna(test[vital_signs_cols].mean())

# Normalize vital signs using Min-Max scaling
scaler = MinMaxScaler()
vital_signs_normalized = scaler.fit_transform(train[vital_signs_cols])
vital_signs_normalized_test = scaler.fit_transform(test[vital_signs_cols])
print(f"NaN in embeddings: {np.isnan(vital_signs_normalized).any()}")
train[vital_signs_cols] = vital_signs_normalized
test[vital_signs_cols] = vital_signs_normalized_test


NaN in embeddings: False


In [12]:

train['gender'] = train['gender'].map({'F': 0, 'M': 1})
test['gender'] = test['gender'].map({'F': 0, 'M': 1})

In [13]:
from sklearn.preprocessing import OneHotEncoder

# Create the encoder; set sparse=False to get a dense array and handle_unknown to ignore unseen categories.
encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)

# Fit the encoder on the train data for the specified columns
train_categories = train[['arrival_transport', 'race']]
test_categories = test[['arrival_transport', 'race']]

encoder.fit(train_categories)

# Transform both train and test data
train_encoded = encoder.transform(train_categories)
test_encoded = encoder.transform(test_categories)

# Get the new column names from the encoder
encoded_columns = encoder.get_feature_names_out(['arrival_transport', 'race'])

# Convert the arrays to DataFrames with the appropriate index
train_encoded_df = pd.DataFrame(train_encoded, columns=encoded_columns, index=train.index)
test_encoded_df = pd.DataFrame(test_encoded, columns=encoded_columns, index=test.index)

# Drop the original columns and join the new dummy columns back to train and test
train = train.drop(columns=['arrival_transport', 'race']).join(train_encoded_df)
test = test.drop(columns=['arrival_transport', 'race']).join(test_encoded_df)

In [14]:
train = train.drop(columns=['subject_id', 'stay_id','chiefcomplaint','anchor_year_group','has_null'])
test = test.drop(columns=['subject_id', 'stay_id','chiefcomplaint','anchor_year_group','has_null'])

In [15]:
# import re

# # Define a function to extract numeric values or map text descriptions
# def clean_pain(value):
#     # Attempt to extract numeric values using regex
#     match = re.search(r'\d+(\.\d+)?', str(value))  # Matches numbers like '8', '9.5'
#     if match:
#         return float(match.group())
#     # Map text descriptions to numeric values
#     text_mapping = {
#         'none': 0, 'mild': 2, 'moderate': 5, 'severe': 8, 'very bad': 10,
#         'unbearable': 10, 'uncomfortable': 4, 'not bad': 1
#     }
#     value_lower = str(value).lower()
#     for key, num in text_mapping.items():
#         if key in value_lower:
#             return num
#     # Return NaN for unprocessable values
#     return None

# # Apply the function to the pain column
# train['pain'] = train['pain'].apply(clean_pain)
# test['pain'] = test['pain'].apply(clean_pain)
# # Fill missing values with the median pain value
# train['pain'] = train['pain'].fillna(train['pain'].median())
# test['pain'] = test['pain'].fillna(test['pain'].median())

#convert acuity to int
train['acuity'] = train['acuity'].apply(lambda x: int(x))
test['acuity'] = test['acuity'].apply(lambda x: int(x))
# Adjust class labels to start from 0
train['acuity'] = train['acuity'] - 1
test['acuity'] = test['acuity'] - 1


In [16]:
train['race_AMERICAN INDIAN/ALASKA NATIVE'].unique()

array([0., 1.])

In [17]:
train

Unnamed: 0,temperature,heartrate,resprate,o2sat,sbp,dbp,pain,acuity,gender,anchor_age,...,race_PATIENT DECLINED TO ANSWER,race_PORTUGUESE,race_SOUTH AMERICAN,race_UNABLE TO OBTAIN,race_UNKNOWN,race_WHITE,race_WHITE - BRAZILIAN,race_WHITE - EASTERN EUROPEAN,race_WHITE - OTHER EUROPEAN,race_WHITE - RUSSIAN
0,0.929664,0.484211,0.333333,0.98,0.007348,0.009293,0.000000,3,1,34.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,0.933741,0.421053,0.272727,0.96,0.005929,0.010279,0.538462,2,0,21.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,0.919470,0.415789,0.333333,0.97,0.008463,0.011124,0.769231,2,1,43.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.912334,0.357895,0.333333,0.98,0.009020,0.012672,0.000000,2,1,52.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,0.929664,0.384211,0.333333,1.00,0.005777,0.010279,0.769231,2,1,41.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10482,0.927625,0.368421,0.363636,0.99,0.007703,0.013658,0.000000,2,1,48.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10483,0.919470,0.436842,0.333333,0.99,0.005473,0.011124,0.331821,2,0,21.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
10484,0.925663,0.215789,0.272727,0.99,0.005777,0.011124,0.538462,1,1,45.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
10485,0.949032,0.421053,0.333333,0.99,0.008058,0.010701,0.000000,2,1,60.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Logistic Regression

In [18]:
# Logistic Regression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

X_train = train.drop(columns=['acuity'])
y_train = train['acuity']
X_test = test.drop(columns=['acuity'])
y_test = test['acuity']

# Train logistic regression
model = LogisticRegression(max_iter=1000, random_state=42, n_jobs=4)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate predictions
metrics = utils.evaluate_predictions(y_pred, y_test, ordinal=True, by_class=True)
print(metrics)


{'overall': {'accuracy': 0.559, 'precision': 0.5447694486731739, 'recall': 0.559, 'f1_score': 0.4773628847343133, 'adjusted_accuracy': 0.961, 'adjusted_precision': 0.9580664654594232, 'adjusted_recall': 0.961, 'adjusted_f1': 0.9538848131746304, 'mae': 0.48, 'mse': 0.558, 'quadratic_kappa': np.float64(0.29881174980396885)}, 'by_class': {'0': {'precision': 0.5128205128205128, 'recall': 0.3389830508474576, 'f1-score': 0.40816326530612246, 'support': 59.0}, '1': {'precision': 0.65, 'recall': 0.17955801104972377, 'f1-score': 0.2813852813852814, 'support': 362.0}, '2': {'precision': 0.5518044237485448, 'recall': 0.9367588932806324, 'f1-score': 0.6945054945054945, 'support': 506.0}, '3': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 70.0}, '4': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 3.0}, 'accuracy': 0.559, 'macro avg': {'precision': 0.3429249873138115, 'recall': 0.2910599910355628, 'f1-score': 0.27681080823937965, 'support': 1000.0}, 'weighted avg': {'p

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [17]:
import json
# Save evaluation metrics to JSON
def save_metrics(metrics,  parameters):
    output_file = f"{parameters}_metrics.json"
    with open(output_file, 'w') as f:
        json.dump(metrics, f, indent=2)
        
save_path = "../results/Triage-MIMIC/Triage-MIMIC_LogisticRegression"
save_metrics(metrics, save_path)
print("Evaluation complete. Metrics and plots saved.")


Evaluation complete. Metrics and plots saved.


In [18]:
pd.DataFrame(y_pred+1).to_csv('../results/Triage-MIMIC/Triage-MIMIC_LogisticRegression.csv', index=False)

### XGBoost

In [22]:
from xgboost import XGBClassifier

from sklearn.metrics import (accuracy_score, f1_score, precision_score, recall_score, cohen_kappa_score, 
                             classification_report, mean_absolute_error, mean_squared_error)

# Train XGBoost classifier on full dataset
model = XGBClassifier(use_label_encoder=False, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate predictions
metrics = utils.evaluate_predictions(y_pred, y_test, ordinal=True, flexibility=1, by_class=True)
print(metrics)


Parameters: { "use_label_encoder" } are not used.



{'overall': {'accuracy': 0.21, 'precision': 0.38990399860626535, 'recall': 0.21, 'f1_score': 0.2111853840664098, 'adjusted_accuracy': 0.766, 'adjusted_precision': 0.8847723289203068, 'adjusted_recall': 0.766, 'adjusted_f1': 0.805125150025705, 'mae': 1.048, 'mse': 1.616, 'quadratic_kappa': np.float64(0.215680183888052)}, 'by_class': {'0': {'precision': 0.06405693950177936, 'recall': 0.3050847457627119, 'f1-score': 0.10588235294117647, 'support': 59.0}, '1': {'precision': 0.3767123287671233, 'recall': 0.30386740331491713, 'f1-score': 0.3363914373088685, 'support': 362.0}, '2': {'precision': 0.47619047619047616, 'recall': 0.07905138339920949, 'f1-score': 0.13559322033898305, 'support': 506.0}, '3': {'precision': 0.12574850299401197, 'recall': 0.6, 'f1-score': 0.2079207920792079, 'support': 70.0}, '4': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 3.0}, 'accuracy': 0.21, 'macro avg': {'precision': 0.20854164949067816, 'recall': 0.2576007064953677, 'f1-score': 0.157157560533

In [24]:
import json
# Save evaluation metrics to JSON
def save_metrics(metrics,  parameters):
    output_file = f"{parameters}_metrics.json"
    with open(output_file, 'w') as f:
        json.dump(metrics, f, indent=2)
        
save_path = "../results/Triage-MIMIC/Triage-MIMIC_XGBoost"
save_metrics(metrics, save_path)
print("Evaluation complete. Metrics and plots saved.")


Evaluation complete. Metrics and plots saved.


In [73]:
test.drop(columns=['acuity']).values.shape

(2500, 7)

### BioBERT

In [26]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report

# Load training and test data
embeddings = np.load('../data/mimic-iv-private/anchor_year_group_datasets/2014_-_2016/train_chiefcomplaint_embeddings_reduced.npy', allow_pickle=True)
test_embeddings= np.load('../data/mimic-iv-private/anchor_year_group_datasets/2017_-_2019/test_chiefcomplaint_embeddings_reduced.npy', allow_pickle=True)

# Combine vitals with embeddings
X_train = np.hstack([train.drop(columns=['acuity']).values, embeddings])  # Combine vitals and embeddings
y_train = train['acuity'].values

X_test = np.hstack([test.drop(columns=['acuity']).values, test_embeddings])  # Replace with inference embeddings if different
y_test = test['acuity'].values

# Subsets of training data
train_sizes = [1.0]  # 1%, 10%, 100%
metrics_results = []

# Train and evaluate for each subset
X_train_subset, y_train_subset = X_train, y_train

# Train an MLP classifier
model = MLPClassifier(
    hidden_layer_sizes=(1000, 1000),  # Small network
    random_state=42,
    early_stopping=True
)
print("Training MLP...")
model.fit(X_train_subset, y_train_subset)

Training MLP...


In [27]:
# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate predictions using utils.evaluate_predictions
metrics = utils.evaluate_predictions(y_pred, y_test, ordinal=True, flexibility=1, by_class=True)

# Save metrics
save_path = "../results/Triage-MIMIC/Triage-MIMIC_BioBERT"
save_metrics(metrics, save_path)
print("Evaluation complete. Metrics and plots saved.")

Evaluation complete. Metrics and plots saved.


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [76]:
display(results_df)

Unnamed: 0,0_precision,0_recall,0_f1-score,0_support,1_precision,1_recall,1_f1-score,1_support,2_precision,2_recall,...,macro avg_f1-score,macro avg_support,weighted avg_precision,weighted avg_recall,weighted avg_f1-score,weighted avg_support,MSE,QWK,undertriage_rate,overtriage_rate
0,0.565217,0.220339,0.317073,59.0,0.627841,0.610497,0.619048,362.0,0.681282,0.798419,...,0.404855,1000.0,0.64473,0.656,0.639527,1000.0,0.432,0.497833,0.156,0.188


In [76]:
utils.evaluate_predictions(y_pred,y_test,ordinal=True, by_class=True)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'overall': {'accuracy': 0.7036,
  'precision': 0.69382771984375,
  'recall': 0.7036,
  'f1_score': 0.6916408420473397,
  'adjusted_accuracy': 0.9916,
  'adjusted_precision': 0.9916944355529995,
  'adjusted_recall': 0.9916,
  'adjusted_f1': 0.991091579591339,
  'mae': 0.3048,
  'mse': 0.3216,
  'quadratic_kappa': 0.5809021487834903},
 'by_class': {'0': {'precision': 0.7894736842105263,
   'recall': 0.3488372093023256,
   'f1-score': 0.4838709677419355,
   'support': 86.0},
  '1': {'precision': 0.6786155747836835,
   'recall': 0.6559139784946236,
   'f1-score': 0.6670716889428918,
   'support': 837.0},
  '2': {'precision': 0.7272727272727273,
   'recall': 0.8155061019382628,
   'f1-score': 0.7688663282571913,
   'support': 1393.0},
  '3': {'precision': 0.4835164835164835,
   'recall': 0.24858757062146894,
   'f1-score': 0.3283582089552239,
   'support': 177.0},
  '4': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 7.0},
  'accuracy': 0.7036,
  'macro avg': {'precision': 0

In [88]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report

# Load training and test data
embeddings = np.load('../data/mimic-iv-private/symptom_embeddings.npy', allow_pickle=True)
test_embeddings = np.load('../data/mimic-iv-private/symptom_embeddings_test.npy', allow_pickle=True)

# Combine vitals with embeddings
X_train = np.hstack([train.drop(columns=['acuity']).values, embeddings])  # Combine vitals and embeddings
y_train = train['acuity'].values

X_test = np.hstack([test.drop(columns=['acuity']).values, test_embeddings])  # Replace with inference embeddings if different
y_test = test['acuity'].values

# Subsets of training data
train_sizes = [0.01, 0.1]  # 1%, 10%, 100%
metrics_results = []

# Train and evaluate for each subset
for size in train_sizes:
    # Create a subset of the training data
    subset_size = int(len(X_train) * size)
    X_train_subset, _, y_train_subset, _ = train_test_split(
        X_train, y_train, train_size=subset_size, random_state=42, stratify=y_train
    )

    # Train an MLP classifier
    model = MLPClassifier(
        hidden_layer_sizes=(775, 64),  # Small network
        random_state=42,
        early_stopping=True
    )
    model.fit(X_train_subset, y_train_subset)

    # Make predictions
    y_pred = model.predict(X_test)

    # Evaluate the predictions
    evaluation_metrics = utils.evaluate_predictions(y_pred,y_test,ordinal=True, by_class=True)

    # Store results
    metrics_results.append(evaluation_metrics)

# Display results
results_df = pd.DataFrame(metrics_results)
print(results_df)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                                             overall  \
0  {'accuracy': 0.6396, 'precision': 0.6202773153...   
1  {'accuracy': 0.6776, 'precision': 0.6687991886...   

                                            by_class  
0  {'0': {'precision': 0.42424242424242425, 'reca...  
1  {'0': {'precision': 0.53125, 'recall': 0.19767...  


In [91]:
results_df.loc[1]['overall']

{'accuracy': 0.6776,
 'precision': 0.6687991886557905,
 'recall': 0.6776,
 'f1_score': 0.6661262548531941,
 'adjusted_accuracy': 0.9864,
 'adjusted_precision': 0.9861556950476779,
 'adjusted_recall': 0.9864,
 'adjusted_f1': 0.985951754713654,
 'mae': 0.336,
 'mse': 0.3632,
 'quadratic_kappa': 0.5416948648827627}

In [92]:
results_df.loc[1]['by_class']

{'0': {'precision': 0.53125,
  'recall': 0.19767441860465115,
  'f1-score': 0.288135593220339,
  'support': 86.0},
 '1': {'precision': 0.6739446870451238,
  'recall': 0.5531660692951016,
  'f1-score': 0.6076115485564304,
  'support': 837.0},
 '2': {'precision': 0.7041383570105003,
  'recall': 0.8183776022972002,
  'f1-score': 0.7569721115537849,
  'support': 1393.0},
 '3': {'precision': 0.45962732919254656,
  'recall': 0.4180790960451977,
  'f1-score': 0.4378698224852071,
  'support': 177.0},
 '4': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 7.0},
 'accuracy': 0.6776,
 'macro avg': {'precision': 0.47379207464963413,
  'recall': 0.39745943724843014,
  'f1-score': 0.4181178151631523,
  'support': 2500.0},
 'weighted avg': {'precision': 0.6687991886557905,
  'recall': 0.6776,
  'f1-score': 0.6661262548531941,
  'support': 2500.0}}