In [10]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Load the train and test data
train_df = pd.read_csv("../data/kaggle/train_numeric.csv")
test_df  = pd.read_csv("../data/kaggle/test_numeric.csv")

In [3]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 688 entries, 0 to 687
Data columns (total 19 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Group                     688 non-null    int64  
 1   Sex                       688 non-null    int64  
 2   Age                       688 non-null    int64  
 3   Patients number per hour  688 non-null    int64  
 4   Arrival mode              688 non-null    int64  
 5   Injury                    688 non-null    int64  
 6   Chief_complain            688 non-null    object 
 7   Mental                    688 non-null    int64  
 8   Pain                      688 non-null    int64  
 9   NRS_pain                  370 non-null    float64
 10  SBP                       670 non-null    float64
 11  DBP                       670 non-null    float64
 12  HR                        672 non-null    float64
 13  RR                        675 non-null    float64
 14  BT        

### Logistic Regression & XGBoost

In [12]:
from sklearn.preprocessing import MinMaxScaler

# List of vital signs column
vital_signs_cols = ['NRS_pain', 'SBP', 'DBP', 'HR', 'RR', 'BT']

# Normalize vital signs using Min-Max scaling for train data
scaler = MinMaxScaler()
train_df[vital_signs_cols] = scaler.fit_transform(train_df[vital_signs_cols])

# Use same scaler for test data to maintain consistency
test_df[vital_signs_cols] = scaler.transform(test_df[vital_signs_cols])

In [13]:
train_df = train_df.drop(columns=['Length of stay_min','Group','Chief_complain','Diagnosis in ED','KTAS_RN'])
test_df = test_df.drop(columns=['Length of stay_min','Group','Chief_complain','Diagnosis in ED','KTAS_RN'])

In [16]:
from sklearn.preprocessing import OneHotEncoder

# Create encoder for categorical variables
encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)

# Fit encoder on train data for specified columns
train_categories = train_df[['Sex', 'Arrival mode', 'Mental']]
test_categories = test_df[['Sex', 'Arrival mode', 'Mental']]

encoder.fit(train_categories)

# Transform both train and test data
train_encoded = encoder.transform(train_categories)
test_encoded = encoder.transform(test_categories)

# Get encoded column names
encoded_columns = encoder.get_feature_names_out(['Sex', 'Arrival mode', 'Mental'])

# Convert to DataFrames
train_encoded_df = pd.DataFrame(train_encoded, columns=encoded_columns, index=train_df.index)
test_encoded_df = pd.DataFrame(test_encoded, columns=encoded_columns, index=test_df.index)

# Drop original columns and join encoded columns
train_df = train_df.drop(columns=['Sex', 'Arrival mode', 'Mental']).join(train_encoded_df)
test_df = test_df.drop(columns=['Sex', 'Arrival mode', 'Mental']).join(test_encoded_df)

# Scale Age separately since it's numeric
age_scaler = MinMaxScaler()
train_df['Age'] = age_scaler.fit_transform(train_df[['Age']])
test_df['Age'] = age_scaler.transform(test_df[['Age']])


In [18]:
train_df.info()
# Drop specified columns from both train and test dataframes
train_df = train_df.drop(columns=['Patients number per hour', 'Injury', 'Pain'])
test_df = test_df.drop(columns=['Patients number per hour', 'Injury', 'Pain'])


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 688 entries, 0 to 687
Data columns (total 24 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Age                       688 non-null    float64
 1   Patients number per hour  688 non-null    int64  
 2   Injury                    688 non-null    int64  
 3   Pain                      688 non-null    int64  
 4   NRS_pain                  688 non-null    float64
 5   SBP                       688 non-null    float64
 6   DBP                       688 non-null    float64
 7   HR                        688 non-null    float64
 8   RR                        688 non-null    float64
 9   BT                        688 non-null    float64
 10  KTAS_expert               688 non-null    int64  
 11  Sex_1                     688 non-null    float64
 12  Sex_2                     688 non-null    float64
 13  Arrival mode_1            688 non-null    float64
 14  Arrival mo

In [19]:
# Fill all NAs with the mean

numeric_cols = train_df.select_dtypes(include=["number"]).columns
for col in numeric_cols:
    train_df[col].fillna(train_df[col].mean(), inplace=True)
    test_df[col].fillna(test_df[col].mean(), inplace=True)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train_df[col].fillna(train_df[col].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_df[col].fillna(test_df[col].mean(), inplace=True)


In [25]:
# Optionally, drop rows with missing target values (if any)
train_df = train_df.dropna(subset=["KTAS_expert",])
test_df  = test_df.dropna(subset=["KTAS_expert", ])

# Define the target column and feature set.
# Here we assume that all columns except 'KTAS_expert' are features.
target_col = "KTAS_expert"
features = [col for col in train_df.columns if col != target_col]

X_train = train_df[features]
y_train = train_df[target_col]

X_test = test_df[features]
y_test = test_df[target_col]

# Build and train the Logistic Regression model.
# For multiclass problems, LogisticRegression can use multinomial mode.
logreg = LogisticRegression(max_iter=1000, multi_class='auto')
logreg.fit(X_train, y_train)

# Make predictions on the test set.
preds_lr = logreg.predict(X_test)

# Print the results.
metrics = utils.evaluate_predictions(preds_lr, y_test, ordinal=True, by_class=True)
print("Overall Metrics:", metrics)
output_filepath = "../results/Triage-KTAS/Triage-KTAS_LogReg"
utils.save_metrics(metrics, output_filepath)
print("Evaluation complete. Metrics and plots saved.")



Overall Metrics: {'overall': {'accuracy': 0.47150259067357514, 'precision': 0.500462952576997, 'recall': 0.47150259067357514, 'f1_score': 0.4182469761820638, 'adjusted_accuracy': 0.9050086355785838, 'adjusted_precision': 0.923734415106249, 'adjusted_recall': 0.9050086355785838, 'adjusted_f1': 0.9010054221304983, 'mae': 0.6234887737478411, 'mse': 0.8134715025906736, 'quadratic_kappa': np.float64(0.36918621461764967)}, 'by_class': {'1': {'precision': 1.0, 'recall': 0.5, 'f1-score': 0.6666666666666666, 'support': 12.0}, '2': {'precision': 0.631578947368421, 'recall': 0.08108108108108109, 'f1-score': 0.1437125748502994, 'support': 148.0}, '3': {'precision': 0.46360153256704983, 'recall': 0.5576036866359447, 'f1-score': 0.5062761506276151, 'support': 217.0}, '4': {'precision': 0.45733788395904434, 'recall': 0.73224043715847, 'f1-score': 0.5630252100840336, 'support': 183.0}, '5': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 19.0}, 'accuracy': 0.47150259067357514, 'macro avg

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [20]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), os.pardir)))
import utils.utils as utils 

In [23]:
y_train = y_train - 1

In [26]:
y_test = y_test - 1

In [37]:
y_train = y_train - 1

In [39]:
import numpy as np
np.unique(y_train)

array([0, 1, 2, 3, 4])

In [41]:
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report
import xgboost as xgb

# Build and train the XGBoost model.
# Setting 'use_label_encoder' to False to avoid warnings and specifying an evaluation metric.
xgb_model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
xgb_model.fit(X_train, y_train)

# Make predictions on the test set.
preds_xgb = xgb_model.predict(X_test)

# Print the results.
metrics = utils.evaluate_predictions(preds_xgb, y_test, ordinal=True, by_class=True)
print("Overall Metrics:", metrics)
output_filepath = "../results/Triage-KTAS/Triage-KTAS_XGB"
utils.save_metrics(metrics, output_filepath)
print("Evaluation complete. Metrics and plots saved.")



Parameters: { "use_label_encoder" } are not used.



Overall Metrics: {'overall': {'accuracy': 0.34196891191709844, 'precision': 0.3882290643130567, 'recall': 0.34196891191709844, 'f1_score': 0.2907779324106367, 'adjusted_accuracy': 0.772020725388601, 'adjusted_precision': 0.82223869589665, 'adjusted_recall': 0.772020725388601, 'adjusted_f1': 0.7412777747224233, 'mae': 0.9119170984455959, 'mse': 1.4749568221070812, 'quadratic_kappa': np.float64(0.05918862044165041)}, 'by_class': {'0': {'precision': 1.0, 'recall': 0.16666666666666666, 'f1-score': 0.2857142857142857, 'support': 12.0}, '1': {'precision': 0.4, 'recall': 0.06756756756756757, 'f1-score': 0.11560693641618497, 'support': 148.0}, '2': {'precision': 0.4298245614035088, 'recall': 0.22580645161290322, 'f1-score': 0.29607250755287007, 'support': 217.0}, '3': {'precision': 0.3238095238095238, 'recall': 0.7431693989071039, 'f1-score': 0.45107794361525705, 'support': 183.0}, '4': {'precision': 0.05555555555555555, 'recall': 0.05263157894736842, 'f1-score': 0.05405405405405406, 'support'

### BioBERT + MLP

In [42]:
# Load the train and test data
train_df = pd.read_csv("../data/kaggle/train_numeric.csv")
test_df  = pd.read_csv("../data/kaggle/test_numeric.csv")

In [43]:
from sentence_transformers import SentenceTransformer
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [45]:
train_df = pd.read_csv("../data/kaggle/train_numeric.csv")
test_df  = pd.read_csv("../data/kaggle/test_numeric.csv")
train_df = train_df.drop(columns=['Length of stay_min','Group','KTAS_RN'])
test_df = test_df.drop(columns=['Length of stay_min','Group','KTAS_RN'])

In [46]:

# Load the pre-trained SentenceTransformer model for clinical text
# This model is fine-tuned for biomedical and clinical text embeddings
model_name = 'pritamdeka/BioBERT-mnli-snli-scinli-scitail-mednli-stsb'
model = SentenceTransformer(model_name)

# from transformers import AutoTokenizer, AutoModel
# tokenizer = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
# model = AutoModel.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")


In [47]:
# Process chief complaints in batches of 10,000
print("Computing symptom embeddings in batches...")
# Create embeddings for the chiefcomplaint column for train and test datasets
train_texts = train_df['Chief_complain'].tolist()
test_texts = test_df['Chief_complain'].tolist()

# Use the model to encode the texts; show_progress_bar=True gives you a progress update
train_chief_embeddings = model.encode(train_texts, show_progress_bar=True)
test_chief_embeddings = model.encode(test_texts, show_progress_bar=True)

# Save chief complaint embeddings
np.save('../data/kaggle/KTAS_train_chiefcomplaint_embeddings.npy', train_chief_embeddings)
np.save('../data/kaggle/KTAS_test_chiefcomplaint_embeddings.npy', test_chief_embeddings)

# Create embeddings for diagnosis column
train_texts = train_df['Diagnosis in ED'].tolist()
test_texts = test_df['Diagnosis in ED'].tolist()

# Use the model to encode the texts; show_progress_bar=True gives you a progress update
train_diagnosis_embeddings = model.encode(train_texts, show_progress_bar=True)
test_diagnosis_embeddings = model.encode(test_texts, show_progress_bar=True)

# Save diagnosis embeddings
np.save('../data/kaggle/KTAS_train_diagnosis_embeddings.npy', train_diagnosis_embeddings)
np.save('../data/kaggle/KTAS_test_diagnosis_embeddings.npy', test_diagnosis_embeddings)


Computing symptom embeddings in batches...


Batches: 100%|██████████| 22/22 [00:03<00:00,  5.69it/s]
Batches: 100%|██████████| 19/19 [00:00<00:00, 25.50it/s]
Batches: 100%|██████████| 22/22 [00:00<00:00, 22.67it/s]
Batches: 100%|██████████| 19/19 [00:01<00:00, 15.26it/s]


In [48]:
import numpy as np
from sklearn.decomposition import PCA

# ----- For Chief Complaint Embeddings -----

# Perform PCA on train chief complaint embeddings
print("Performing PCA on train chief complaint embeddings...")
pca_chief = PCA(n_components=25)
train_embeddings = pca_chief.fit_transform(train_chief_embeddings)

# Transform test chief complaint embeddings using same PCA
print("Performing PCA on test chief complaint embeddings...")
test_embeddings = pca_chief.transform(test_chief_embeddings)

# ----- For Diagnosis Embeddings -----

# Perform PCA on train diagnosis embeddings
print("Performing PCA on train diagnosis embeddings...")
pca_diag = PCA(n_components=25)
train_embeddings_two = pca_diag.fit_transform(train_diagnosis_embeddings)

# Transform test diagnosis embeddings using same PCA
print("Performing PCA on test diagnosis embeddings...")
test_embeddings_two = pca_diag.transform(test_diagnosis_embeddings)


Performing PCA on train chief complaint embeddings...
Performing PCA on test chief complaint embeddings...
Performing PCA on train diagnosis embeddings...
Performing PCA on test diagnosis embeddings...


In [49]:
# Fill all NAs with the mean

numeric_cols = train_df.select_dtypes(include=["number"]).columns
for col in numeric_cols:
    train_df[col].fillna(train_df[col].mean(), inplace=True)
    test_df[col].fillna(test_df[col].mean(), inplace=True)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train_df[col].fillna(train_df[col].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_df[col].fillna(test_df[col].mean(), inplace=True)


In [51]:
train_df = train_df.drop(columns=['Chief_complain','Diagnosis in ED'])
test_df = test_df.drop(columns=['Chief_complain','Diagnosis in ED'])

# Define the target column and feature set.
# Here we assume that all columns except 'KTAS_expert' are features.
target_col = "KTAS_expert"
features = [col for col in train_df.columns if col != target_col]

X_train = train_df[features]
y_train = train_df[target_col].values

X_test = test_df[features]
y_test = test_df[target_col].values

# # (Optional) Convert all features to numeric in case they are not.
# X_train = X_train.apply(pd.to_numeric, errors='coerce')
# X_test  = X_test.apply(pd.to_numeric, errors='coerce')

# # (Optional) Fill any remaining missing values with the median of each column.
# X_train.fillna(X_train.median(), inplace=True)
# X_test.fillna(X_test.median(), inplace=True)


In [52]:
# Combine vitals data with both chief complaint and diagnosis embeddings
X_train = np.hstack([X_train.values, train_embeddings, train_embeddings_two])  # Combine vitals, complaint embeddings, and diagnosis embeddings
X_test = np.hstack([X_test.values, test_embeddings, test_embeddings_two])  # Combine test vitals and both types of embeddings

In [57]:
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
import os 
import sys
sys.path.append(os.path.abspath('..'))
import utils.utils as utils 

metrics_results = []

# Train an MLP classifier
model = MLPClassifier(
    hidden_layer_sizes=(500, 500),  # Small network
    random_state=42,
    early_stopping=True
)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)


# Print the results.
metrics = utils.evaluate_predictions(preds_xgb, y_test, ordinal=True, by_class=True)
print("Overall Metrics:", metrics)
output_filepath = "../results/Triage-KTAS/Triage-KTAS_BioBERT"
utils.save_metrics(metrics, output_filepath)
print("Evaluation complete. Metrics and plots saved.")

Overall Metrics: {'overall': {'accuracy': 0.3298791018998273, 'precision': 0.31228373178195923, 'recall': 0.3298791018998273, 'f1_score': 0.2605722812103006, 'adjusted_accuracy': 0.8635578583765112, 'adjusted_precision': 0.8832116659259983, 'adjusted_recall': 0.8635578583765112, 'adjusted_f1': 0.8531444782160913, 'mae': 0.8238341968911918, 'mse': 1.1692573402417963, 'quadratic_kappa': np.float64(0.07352555360430724)}, 'by_class': {'0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 12.0}, '2': {'precision': 0.2719298245614035, 'recall': 0.20945945945945946, 'f1-score': 0.2366412213740458, 'support': 148.0}, '3': {'precision': 0.36666666666666664, 'recall': 0.7096774193548387, 'f1-score': 0.4835164835164835, 'support': 217.0}, '4': {'precision': 0.3333333333333333, 'recall': 0.03278688524590164, 'f1-score': 0.05970149253731343, 'support': 183.0}, '5': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
