In [2]:
# External libraries for data processing
import numpy as np
import pandas as pd
import sklearn as sk
#To render graphs within notebook
%matplotlib inline
import matplotlib.pyplot as plt
import joblib 
import os

# Versions of libraries
print("Numpy version: {}".format(np.__version__))
print("Pandas version: {}".format(pd.__version__))
print("Scikit version: {}".format(sk.__version__))

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import accuracy_score, classification_report

Numpy version: 1.24.3
Pandas version: 1.5.3
Scikit version: 1.3.0


In [3]:
from joblib import dump
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

#### Global functions

In [4]:
path = "C:/Project/Data/"

In [5]:
def convert_to_days(duration_str):
    parts = duration_str.split(' days ')  # Split string into form ['22', '20:55:00']
    days = float(parts[0])  # Extract number of days and convert to float
    time_parts = parts[1].split(':')  # Split time part (hh:mm:ss) ['20', '55', '00']
    hours = float(time_parts[0])  # Extract hours and convert to float
    minutes = float(time_parts[1])  # Extract minutes and convert to float
    seconds = float(time_parts[2])  # Extract seconds and convert to float
    total_days = days + (hours / 24) + (minutes / (24 * 60)) + (seconds / (24 * 3600))  # Calculate total days
    return total_days

In [6]:
file = "hosp/admissions.csv"
full_path = path + file

df_admissions = pd.read_csv(full_path)

df_admissions['dischtime'] = pd.to_datetime(df_admissions['dischtime'], format='%d/%m/%Y %H:%M')
df_admissions['admittime'] = pd.to_datetime(df_admissions['admittime'], format='%d/%m/%Y %H:%M')

df_admittime= pd.DataFrame()
df_admittime['hadm_id'] = df_admissions['hadm_id']
df_admittime['admittime'] = df_admissions['admittime']

In [7]:
file = "hosp/transfers.csv"
full_path = path + file

df_transfers = pd.read_csv(full_path)

In [8]:
# drop dishcarged samples
df_transfers = df_transfers[df_transfers['eventtype'] != 'discharge']

# convert time to datetime
df_transfers['intime'] = pd.to_datetime(df_transfers['intime'])
df_transfers['outtime'] = pd.to_datetime(df_transfers['outtime'])

In [9]:
# Target variable (careunit based on transfer_id)

df_target = df_transfers.drop(columns=['subject_id', 'eventtype','hadm_id','intime','outtime'])

In [10]:
df_target

Unnamed: 0,transfer_id,careunit
26,31766090,Med/Surg
27,39182916,Med/Surg
28,33930547,Med/Surg
29,39351025,Medicine
30,37834930,Medicine
...,...,...
1185,31983963,PACU
1186,38367109,Cardiac Surgery
1187,39362807,Medicine/Cardiology
1188,38425947,Medicine/Cardiology


# Loading pretrained transfer prediction learners

In [11]:
path = "C:/Users/jenni/OneDrive/Desktop/IP/target_learners/"

### emar

In [12]:
file = "bagging_clf_emar.joblib"
full_path = path + file

bagging_clf_emar = joblib.load(full_path)

In [13]:
bagging_clf_emar

### microbiologyevents

In [14]:
file = "bagging_clf_microbio.joblib"
full_path = path + file

bagging_clf_microbio = joblib.load(full_path)

In [15]:
bagging_clf_microbio

### prescriptions

In [16]:
file = "bagging_clf_prescriptions.joblib"
full_path = path + file

bagging_clf_prescriptions = joblib.load(full_path)

In [17]:
bagging_clf_prescriptions

### ingredientevents

In [18]:
file = "bagging_clf_ingredient.joblib"
full_path = path + file

bagging_clf_ingredient = joblib.load(full_path)

In [19]:
bagging_clf_ingredient

### inputevents

In [20]:
file = "bagging_clf_input.joblib"
full_path = path + file

bagging_clf_input = joblib.load(full_path)

In [21]:
bagging_clf_input

### procedureevents

In [22]:
file = "bagging_clf_procedure_events.joblib"
full_path = path + file

bagging_clf_procedure_events = joblib.load(full_path)

In [23]:
bagging_clf_procedure_events

## Load and preprocess evaluation data

In [24]:
folder_name = 'EnsembleEvaluationData'

### emar

In [25]:
file_path = os.path.join(folder_name, 'df_emar_evaluation.csv')

df_emar = pd.read_csv(file_path)

In [26]:
df_emar

Unnamed: 0,subject_id,hadm_id,emar_id,emar_seq,poe_id,pharmacy_id,enter_provider_id,charttime,medication,event_txt,scheduletime,storetime
0,10015272,27993466.0,10015272-31,31,10015272-48,88758875.0,,2137-06-13 08:36:00,Metoprolol Tartrate,,2137-06-13 08:36:00,2137-06-13 08:36:00
1,10020786,23488445.0,10020786-27,27,10020786-47,82871676.0,,2189-06-09 15:45:00,CefePIME,,2189-06-09 15:45:00,2189-06-09 17:31:00
2,10020786,23488445.0,10020786-26,26,10020786-47,82871676.0,,2189-06-09 15:15:00,CefePIME,,2189-06-09 15:15:00,2189-06-09 15:15:00
3,10021312,25020332.0,10021312-28,28,10021312-39,,P33K2X,2113-08-16 18:08:00,Lidocaine 1%,,2113-08-16 18:08:00,2113-08-16 18:08:00
4,10004235,25970245.0,10004235-83,83,10004235-554,,P54TSS,2196-06-14 22:50:00,Magnesium Sulfate,,2196-06-14 22:50:00,2196-06-14 22:51:00
...,...,...,...,...,...,...,...,...,...,...,...,...
7570,10035631,29276678.0,10035631-1673,1673,10035631-1997,,P52GRX,2116-03-12 06:41:00,HYDROmorphone (Dilaudid),Administered Bolus from IV Drip,2116-03-12 06:41:00,2116-03-12 06:41:00
7571,10035631,29276678.0,10035631-1667,1667,10035631-1997,,P52GRX,2116-03-12 02:39:00,HYDROmorphone (Dilaudid),Administered Bolus from IV Drip,2116-03-12 02:39:00,2116-03-12 02:40:00
7572,10035631,29276678.0,10035631-1666,1666,10035631-1997,,P52GRX,2116-03-12 02:15:00,HYDROmorphone (Dilaudid),Administered Bolus from IV Drip,2116-03-12 02:15:00,2116-03-12 02:39:00
7573,10037861,24540843.0,10037861-371,371,10037861-385,,,2117-03-17 19:00:00,Midazolam,Infusion Reconciliation Not Done,2117-03-17 19:00:00,2117-03-17 18:33:00


In [27]:
# Want a df of transfer_id, careunit and predicted value 

In [28]:
df_emar = pd.merge(df_emar, df_target[['transfer_id', 'careunit']], on='transfer_id', how='left')
target_emar = df_emar[["transfer_id", "careunit"]]

KeyError: 'transfer_id'

In [None]:
target_emar

#### Preprocessing

In [None]:
# change transfer_id to careunit

df_emar = pd.merge(df_emar, df_target[['transfer_id', 'careunit']], on='transfer_id', how='left')
df_emar.drop(columns=['transfer_id','hadm_id','charttime'], inplace=True)

In [None]:
data = df_emar.drop(columns=['careunit'])
target = pd.DataFrame(df_emar['careunit'])

In [29]:
data['delay']= data['delay'].astype(str)
data['delay']= data['delay'].apply(convert_to_days)

NameError: name 'data' is not defined

#### Testing the learner

In [24]:
# Convert DataFrame to 1D array using ravel()
target = target.values.ravel()

# Make predictions on the testing set
y_pred_emar = bagging_clf_emar.predict(data)

# Evaluate the model
accuracy = accuracy_score(target, y_pred_emar)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(target, y_pred_emar, zero_division=0))

Accuracy: 0.053417576105686385
Classification Report:
                                                  precision    recall  f1-score   support

                                 Cardiac Surgery       0.12      0.23      0.16        48
    Cardiac Vascular Intensive Care Unit (CVICU)       0.00      0.00      0.00         0
                        Coronary Care Unit (CCU)       0.00      0.00      0.00         2
                                Discharge Lounge       0.00      0.00      0.00         0
                            Emergency Department       0.00      0.00      0.00         2
                Emergency Department Observation       0.31      0.60      0.41        43
                             Hematology/Oncology       0.00      0.00      0.00       551
                Hematology/Oncology Intermediate       0.00      0.00      0.00       136
                                        Med/Surg       0.10      0.14      0.11        29
                                    Med/Surg/

In [177]:
target_emar['pred_emar'] = y_pred_emar

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  target_emar['pred_emar'] = y_pred_emar


In [178]:
target_emar

Unnamed: 0,transfer_id,careunit,pred_emar
0,30185783.0,Medicine,Medicine
1,37652315.0,Med/Surg/Trauma,Medicine
2,37652315.0,Med/Surg/Trauma,Medicine
3,39739186.0,Transplant,PACU
4,33747001.0,PACU,Hematology/Oncology
...,...,...,...
1736,39136387.0,Hematology/Oncology,Medicine/Cardiology
1737,39136387.0,Hematology/Oncology,Medicine
1738,39136387.0,Hematology/Oncology,Medicine
1739,39136387.0,Hematology/Oncology,Medicine


### microbiologyevents

In [179]:
folder_name = 'EnsembleEvaluationData'

In [180]:
file_path = os.path.join(folder_name, 'df_microbio_evaluation.csv')

df_microbio = pd.read_csv(file_path)

In [181]:
# Want a df of transfer_id, careunit and predicted value 

In [182]:
df_microbio = pd.merge(df_microbio, df_target[['transfer_id', 'careunit']], on='transfer_id', how='left')
target_microbio = df_microbio[["transfer_id", "careunit"]]

In [183]:
target_microbio

Unnamed: 0,transfer_id,careunit
0,35065627.0,Surgical Intensive Care Unit (SICU)
1,34170353.0,Trauma SICU (TSICU)
2,34976513.0,Hematology/Oncology
3,34976513.0,Hematology/Oncology
4,32627049.0,Medicine/Cardiology
...,...,...
178,32627049.0,Medicine/Cardiology
179,32627049.0,Medicine/Cardiology
180,34302052.0,Neurology
181,34976513.0,Hematology/Oncology


#### Preprocessing

In [47]:
# change transfer_id to careunit

df_microbio = pd.merge(df_microbio, df_target[['transfer_id', 'careunit']], on='transfer_id', how='left')
df_microbio.drop(columns=['transfer_id'], inplace=True)

In [48]:
data = df_microbio.drop(columns=['hadm_id','careunit'])
target = pd.DataFrame(df_microbio['careunit'])

In [49]:
# Converting duration strings to floats

data['delay']= data['delay'].astype(str)
data['delay']= data['delay'].apply(convert_to_days)
data['days_since_admission'] = data['days_since_admission'].astype(str)
data['days_since_admission'] = data['days_since_admission'].apply(convert_to_days)

#### Testing the learner

In [50]:
# Convert DataFrame to 1D array using ravel()
target = target.values.ravel()

# Make predictions on the testing set
y_pred_microbio = bagging_clf_microbio.predict(data)

# Evaluate the model
accuracy = accuracy_score(target, y_pred_microbio)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(target, y_pred_microbio, zero_division=0))

Accuracy: 0.13114754098360656
Classification Report:
                                                  precision    recall  f1-score   support

                                 Cardiac Surgery       0.73      0.44      0.55        18
    Cardiac Vascular Intensive Care Unit (CVICU)       0.00      0.00      0.00         2
                        Coronary Care Unit (CCU)       0.00      0.00      0.00         0
                                Discharge Lounge       0.00      0.00      0.00         1
                             Hematology/Oncology       0.08      0.04      0.05        27
                Hematology/Oncology Intermediate       0.67      0.33      0.44         6
                                        Med/Surg       0.00      0.00      0.00         0
                                    Med/Surg/GYN       0.00      0.00      0.00         0
                                 Med/Surg/Trauma       0.04      0.60      0.08         5
              Medical Intensive Care Unit (MIC

In [184]:
target_microbio['pred_microbio'] = y_pred_microbio

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  target_microbio['pred_microbio'] = y_pred_microbio


In [185]:
target_microbio

Unnamed: 0,transfer_id,careunit,pred_microbio
0,35065627.0,Surgical Intensive Care Unit (SICU),Cardiac Surgery
1,34170353.0,Trauma SICU (TSICU),Transplant
2,34976513.0,Hematology/Oncology,Med/Surg/Trauma
3,34976513.0,Hematology/Oncology,Med/Surg/Trauma
4,32627049.0,Medicine/Cardiology,Med/Surg
...,...,...,...
178,32627049.0,Medicine/Cardiology,Med/Surg/Trauma
179,32627049.0,Medicine/Cardiology,Med/Surg/Trauma
180,34302052.0,Neurology,Medical Intensive Care Unit (MICU)
181,34976513.0,Hematology/Oncology,Med/Surg/Trauma


### prescriptions

In [186]:
folder_name = 'EnsembleEvaluationData'

In [187]:
file_path = os.path.join(folder_name, 'df_prescriptions_evaluation.csv')

df_prescriptions = pd.read_csv(file_path)

In [188]:
# Want a df of transfer_id, careunit and predicted value 

In [189]:
df_prescriptions = pd.merge(df_prescriptions, df_target[['transfer_id', 'careunit']], on='transfer_id', how='left')
target_prescriptions = df_prescriptions[["transfer_id", "careunit"]]

In [190]:
target_prescriptions

Unnamed: 0,transfer_id,careunit
0,39793139.0,Med/Surg/Trauma
1,31077365.0,Medical Intensive Care Unit (MICU)
2,31077365.0,Medical Intensive Care Unit (MICU)
3,34953924.0,Cardiac Surgery
4,33916615.0,Med/Surg
...,...,...
1624,38564981.0,Cardiac Surgery
1625,30642078.0,Medicine
1626,39544317.0,Medicine
1627,30744153.0,Neurology


#### Preprocessing

In [81]:
# change transfer_id to careunit

df_prescriptions = pd.merge(df_prescriptions, df_target[['transfer_id', 'careunit']], on='transfer_id', how='left')
df_prescriptions.drop(columns=['transfer_id'], inplace=True)

In [82]:
data = df_prescriptions.drop(columns=['hadm_id','careunit'])
target = pd.DataFrame(df_prescriptions['careunit'])

#### Testing the learner

In [83]:
# Convert DataFrame to 1D array using ravel()
target = target.values.ravel()

# Make predictions on the testing set
y_pred_prescriptions = bagging_clf_prescriptions.predict(data)

# Evaluate the model
accuracy = accuracy_score(target, y_pred_prescriptions)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(target, y_pred_prescriptions, zero_division=0))

Accuracy: 0.13014119091467158
Classification Report:
                                                  precision    recall  f1-score   support

                                 Cardiac Surgery       0.18      0.43      0.26       127
    Cardiac Vascular Intensive Care Unit (CVICU)       0.01      0.03      0.02        32
                        Coronary Care Unit (CCU)       0.00      0.00      0.00         0
                                Discharge Lounge       0.00      0.00      0.00        22
                            Emergency Department       0.00      0.00      0.00        21
                             Hematology/Oncology       0.10      0.06      0.07       163
                Hematology/Oncology Intermediate       0.00      0.00      0.00        78
                                        Med/Surg       0.15      0.28      0.20        64
                                    Med/Surg/GYN       0.00      0.00      0.00        14
                                 Med/Surg/Trau

In [191]:
target_prescriptions['pred_prescriptions'] = y_pred_prescriptions

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  target_prescriptions['pred_prescriptions'] = y_pred_prescriptions


In [192]:
target_prescriptions

Unnamed: 0,transfer_id,careunit,pred_prescriptions
0,39793139.0,Med/Surg/Trauma,Cardiac Surgery
1,31077365.0,Medical Intensive Care Unit (MICU),Med/Surg/GYN
2,31077365.0,Medical Intensive Care Unit (MICU),Neurology
3,34953924.0,Cardiac Surgery,Hematology/Oncology
4,33916615.0,Med/Surg,Med/Surg
...,...,...,...
1624,38564981.0,Cardiac Surgery,Medical Intensive Care Unit (MICU)
1625,30642078.0,Medicine,Cardiac Surgery
1626,39544317.0,Medicine,Medical Intensive Care Unit (MICU)
1627,30744153.0,Neurology,Medicine/Cardiology


### ingredientevents

In [193]:
folder_name = 'EnsembleEvaluationData'

In [194]:
file_path = os.path.join(folder_name, 'df_ingredient_evaluation.csv')

df_ingredient = pd.read_csv(file_path)

In [195]:
# Want a df of transfer_id, careunit and predicted value 

In [196]:
df_ingredient = pd.merge(df_ingredient, df_target[['transfer_id', 'careunit']], on='transfer_id', how='left')
target_ingredient = df_ingredient[["transfer_id", "careunit"]]

In [197]:
target_ingredient

Unnamed: 0,transfer_id,careunit
0,35186527.0,Hematology/Oncology Intermediate
1,35186527.0,Hematology/Oncology Intermediate
2,35186527.0,Hematology/Oncology Intermediate
3,35186527.0,Hematology/Oncology Intermediate
4,35186527.0,Hematology/Oncology Intermediate
...,...,...
2516,37348935.0,Neurology
2517,37348935.0,Neurology
2518,37348935.0,Neurology
2519,37348935.0,Neurology


#### Preprocessing

In [88]:
df_ingredient = pd.merge(df_ingredient, df_target[['transfer_id', 'careunit']], on='transfer_id', how='left')
df_ingredient.drop(columns=['transfer_id'], inplace=True)
data = df_ingredient.drop(columns=['hadm_id','careunit'])
target = pd.DataFrame(df_ingredient['careunit'])

#### Testing the learner

In [89]:
# Convert DataFrame to 1D array using ravel()
target = target.values.ravel()

# Make predictions on the testing set
y_pred_ingredient = bagging_clf_ingredient.predict(data)

# Evaluate the model
accuracy = accuracy_score(target, y_pred_ingredient)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(target, y_pred_ingredient, zero_division=0))

Accuracy: 0.09163030543435145
Classification Report:
                                                  precision    recall  f1-score   support

                                 Cardiac Surgery       0.16      0.55      0.25       110
    Cardiac Vascular Intensive Care Unit (CVICU)       0.00      0.00      0.00         0
                        Coronary Care Unit (CCU)       0.00      0.00      0.00         0
                             Hematology/Oncology       0.19      0.02      0.03       550
                Hematology/Oncology Intermediate       0.00      0.00      0.00       121
                                        Med/Surg       0.00      0.00      0.00         0
                                    Med/Surg/GYN       0.00      0.00      0.00         0
                                 Med/Surg/Trauma       0.05      0.18      0.08       137
              Medical Intensive Care Unit (MICU)       0.25      0.02      0.04       250
Medical/Surgical Intensive Care Unit (MICU/SIC

In [198]:
target_ingredient['pred_ingredient'] = y_pred_ingredient

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  target_ingredient['pred_ingredient'] = y_pred_ingredient


In [199]:
target_ingredient

Unnamed: 0,transfer_id,careunit,pred_ingredient
0,35186527.0,Hematology/Oncology Intermediate,Medicine
1,35186527.0,Hematology/Oncology Intermediate,Medicine
2,35186527.0,Hematology/Oncology Intermediate,Medicine/Cardiology
3,35186527.0,Hematology/Oncology Intermediate,Medicine/Cardiology
4,35186527.0,Hematology/Oncology Intermediate,Cardiac Surgery
...,...,...,...
2516,37348935.0,Neurology,Medicine/Cardiology
2517,37348935.0,Neurology,Medicine
2518,37348935.0,Neurology,Medicine
2519,37348935.0,Neurology,Medicine/Cardiology


### inputevents

In [156]:
folder_name = 'EnsembleEvaluationData'

In [157]:
file_path = os.path.join(folder_name, 'df_input_evaluation.csv')

df_input = pd.read_csv(file_path)

In [158]:
# Want a df of transfer_id, careunit and predicted value 

In [159]:
df_input = pd.merge(df_input, df_target[['transfer_id', 'careunit']], on='transfer_id', how='left')
target_input = df_input[["transfer_id", "careunit"]]

In [160]:
target_input

Unnamed: 0,transfer_id,careunit
0,35186527.0,Hematology/Oncology Intermediate
1,35186527.0,Hematology/Oncology Intermediate
2,35186527.0,Hematology/Oncology Intermediate
3,35186527.0,Hematology/Oncology Intermediate
4,35186527.0,Hematology/Oncology Intermediate
...,...,...
1918,37348935.0,Neurology
1919,37348935.0,Neurology
1920,37348935.0,Neurology
1921,37348935.0,Neurology


#### Preprocessing 

In [92]:
# change transfer_id to careunit

df_input = pd.merge(df_input, df_target[['transfer_id', 'careunit']], on='transfer_id', how='left')
df_input.drop(columns=['transfer_id'], inplace=True)
data = df_input.drop(columns=['hadm_id','careunit'])
target = pd.DataFrame(df_input['careunit'])

#### Testing the learner

In [93]:
# Convert DataFrame to 1D array using ravel()
target = target.values.ravel()

# Make predictions on the testing set
y_pred_input = bagging_clf_input.predict(data)

# Evaluate the model
accuracy = accuracy_score(target, y_pred_input)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(target, y_pred_input, zero_division=0))

Accuracy: 0.02080083203328133
Classification Report:
                                                  precision    recall  f1-score   support

                                 Cardiac Surgery       0.00      0.00      0.00        81
    Cardiac Vascular Intensive Care Unit (CVICU)       0.00      0.00      0.00         0
                        Coronary Care Unit (CCU)       0.00      0.00      0.00         0
                             Hematology/Oncology       0.00      0.00      0.00       439
                Hematology/Oncology Intermediate       0.00      0.00      0.00        71
                                        Med/Surg       0.00      0.00      0.00         0
                                    Med/Surg/GYN       0.00      0.00      0.00         0
                                 Med/Surg/Trauma       0.00      0.00      0.00       121
              Medical Intensive Care Unit (MICU)       0.00      0.00      0.00       177
Medical/Surgical Intensive Care Unit (MICU/SIC

In [162]:
target_input['pred_input'] = y_pred_input

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  target_input['pred_input'] = y_pred_input


In [163]:
target_input

Unnamed: 0,transfer_id,careunit,pred_input
0,35186527.0,Hematology/Oncology Intermediate,Medicine/Cardiology
1,35186527.0,Hematology/Oncology Intermediate,Medicine/Cardiology
2,35186527.0,Hematology/Oncology Intermediate,Medicine/Cardiology
3,35186527.0,Hematology/Oncology Intermediate,Medicine/Cardiology
4,35186527.0,Hematology/Oncology Intermediate,Medicine/Cardiology
...,...,...,...
1918,37348935.0,Neurology,Neuro Stepdown
1919,37348935.0,Neurology,Neuro Stepdown
1920,37348935.0,Neurology,Neuro Stepdown
1921,37348935.0,Neurology,Neuro Stepdown


### procedureevents

In [200]:
folder_name = 'EnsembleEvaluationData'

In [201]:
file_path = os.path.join(folder_name, 'df_procedure_events_evaluation.csv')

df_procedure_events = pd.read_csv(file_path)

In [202]:
# Want a df of transfer_id, careunit and predicted value 

In [203]:
df_procedure_events = pd.merge(df_procedure_events, df_target[['transfer_id', 'careunit']], on='transfer_id', how='left')
target_procedure_events = df_procedure_events[["transfer_id", "careunit"]]

In [204]:
target_procedure_events

Unnamed: 0,transfer_id,careunit
0,39136387.0,Hematology/Oncology
1,39136387.0,Hematology/Oncology
2,39136387.0,Hematology/Oncology
3,39136387.0,Hematology/Oncology
4,39136387.0,Hematology/Oncology
...,...,...
169,37348935.0,Neurology
170,37348935.0,Neurology
171,35186527.0,Hematology/Oncology Intermediate
172,35186527.0,Hematology/Oncology Intermediate


#### Preprocessing 

In [96]:
# change transfer_id to careunit

df_procedure_events = pd.merge(df_procedure_events, df_target[['transfer_id', 'careunit']], on='transfer_id', how='left')
df_procedure_events.drop(columns=['transfer_id'], inplace=True)
data = df_procedure_events.drop(columns=['hadm_id','careunit'])
target = pd.DataFrame(df_procedure_events['careunit'])

#### Testing the learner

In [97]:
# Convert DataFrame to 1D array using ravel()
target = target.values.ravel()

# Make predictions on the testing set
y_pred_procedure_events = bagging_clf_procedure_events.predict(data)

# Evaluate the model
accuracy = accuracy_score(target, y_pred_procedure_events)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(target, y_pred_procedure_events, zero_division=0))

Accuracy: 0.017241379310344827
Classification Report:
                                                  precision    recall  f1-score   support

                                 Cardiac Surgery       0.00      0.00      0.00        12
                             Hematology/Oncology       0.00      0.00      0.00        24
                Hematology/Oncology Intermediate       0.00      0.00      0.00         7
                                        Med/Surg       0.00      0.00      0.00         0
                                    Med/Surg/GYN       0.00      0.00      0.00         0
                                 Med/Surg/Trauma       0.00      0.00      0.00        16
              Medical Intensive Care Unit (MICU)       0.00      0.00      0.00         5
Medical/Surgical Intensive Care Unit (MICU/SICU)       0.00      0.00      0.00         0
                                        Medicine       0.25      0.15      0.19        20
                             Medicine/Cardiol

In [205]:
target_procedure_events['pred_procedure_events'] = y_pred_procedure_events

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  target_procedure_events['pred_procedure_events'] = y_pred_procedure_events


In [206]:
target_procedure_events

Unnamed: 0,transfer_id,careunit,pred_procedure_events
0,39136387.0,Hematology/Oncology,Medical/Surgical Intensive Care Unit (MICU/SICU)
1,39136387.0,Hematology/Oncology,Medical/Surgical Intensive Care Unit (MICU/SICU)
2,39136387.0,Hematology/Oncology,Medical/Surgical Intensive Care Unit (MICU/SICU)
3,39136387.0,Hematology/Oncology,Medical/Surgical Intensive Care Unit (MICU/SICU)
4,39136387.0,Hematology/Oncology,Medical/Surgical Intensive Care Unit (MICU/SICU)
...,...,...,...
169,37348935.0,Neurology,Neuro Stepdown
170,37348935.0,Neurology,Neuro Stepdown
171,35186527.0,Hematology/Oncology Intermediate,Medicine
172,35186527.0,Hematology/Oncology Intermediate,Medicine


In [None]:
# Can see which units get commonly confused 

In [99]:
pd.DataFrame(target).value_counts()

Neurology                             64
Hematology/Oncology                   24
Medicine/Cardiology                   23
Medicine                              20
Med/Surg/Trauma                       16
Cardiac Surgery                       12
Hematology/Oncology Intermediate       7
Medical Intensive Care Unit (MICU)     5
Vascular                               3
dtype: int64

In [100]:
pd.DataFrame(y_pred_procedure_events).value_counts()

Medical/Surgical Intensive Care Unit (MICU/SICU)    47
Transplant                                          40
Med/Surg/Trauma                                     21
Neuro Stepdown                                      17
Med/Surg                                            15
Medicine                                            12
PACU                                                 7
Medical Intensive Care Unit (MICU)                   5
Hematology/Oncology                                  4
Med/Surg/GYN                                         3
Medicine/Cardiology                                  2
Trauma SICU (TSICU)                                  1
dtype: int64

## Combining predictions

In [226]:
target_input

Unnamed: 0,transfer_id,pred_input,y_input
0,30281852.0,Med/Surg/Trauma,Neurology
1,30896594.0,Medicine,Medical Intensive Care Unit (MICU)
2,31026953.0,Cardiac Vascular Intensive Care Unit (CVICU),Neurology
3,32051325.0,Medical/Surgical Intensive Care Unit (MICU/SICU),Cardiac Surgery
4,32537286.0,Medical Intensive Care Unit (MICU),Medicine
5,32627049.0,Medicine,Medicine/Cardiology
6,32787491.0,PACU,Medicine
7,32832189.0,Neuro Stepdown,Neurology
8,32932646.0,Trauma SICU (TSICU),Vascular
9,33281088.0,Medicine,Medical/Surgical Intensive Care Unit (MICU/SICU)


In [225]:
target_input = target_input.rename(columns={'careunit': 'y_input'})

In [228]:
target_procedure_events = target_procedure_events.rename(columns={'careunit': 'y_procedure_events'})

In [229]:
target_procedure_events

Unnamed: 0,transfer_id,pred_procedure_events,y_procedure_events
0,30281852.0,Med/Surg/Trauma,Neurology
1,30896594.0,Medicine,Medical Intensive Care Unit (MICU)
2,31026953.0,Med/Surg/Trauma,Neurology
3,32051325.0,Medical/Surgical Intensive Care Unit (MICU/SICU),Cardiac Surgery
4,32537286.0,Medical Intensive Care Unit (MICU),Medicine
5,32627049.0,Transplant,Medicine/Cardiology
6,32787491.0,PACU,Medicine
7,32832189.0,Neuro Stepdown,Neurology
8,34302052.0,Transplant,Neurology
9,34372637.0,Med/Surg/GYN,Medicine


In [230]:
target_microbio = target_microbio.rename(columns={'careunit': 'y_microbio'})

In [231]:
target_microbio

Unnamed: 0,transfer_id,pred_microbio,y_microbio
0,30281852.0,Med/Surg,Neurology
1,30642078.0,Med/Surg/Trauma,Medicine
2,30896594.0,Med/Surg/GYN,Medical Intensive Care Unit (MICU)
3,31077365.0,Hematology/Oncology,Medical Intensive Care Unit (MICU)
4,31983963.0,Medical/Surgical Intensive Care Unit (MICU/SICU),PACU
5,32537286.0,Med/Surg/Trauma,Medicine
6,32627049.0,Med/Surg/Trauma,Medicine/Cardiology
7,32787491.0,Medicine,Medicine
8,32832189.0,Med/Surg/Trauma,Neurology
9,33082827.0,Med/Surg,Cardiac Surgery


In [232]:
target_ingredient = target_ingredient.rename(columns={'careunit': 'y_ingredient'})

In [233]:
target_ingredient

Unnamed: 0,transfer_id,pred_ingredient,y_ingredient
0,30281852.0,Medicine,Neurology
1,30896594.0,Medicine,Medical Intensive Care Unit (MICU)
2,31026953.0,Medicine/Cardiology,Neurology
3,32051325.0,Cardiac Surgery,Cardiac Surgery
4,32537286.0,Medicine,Medicine
5,32627049.0,Medicine,Medicine/Cardiology
6,32787491.0,Medicine,Medicine
7,32832189.0,Medicine,Neurology
8,32932646.0,Medicine,Vascular
9,33281088.0,Cardiac Surgery,Medical/Surgical Intensive Care Unit (MICU/SICU)


In [234]:
target_prescriptions = target_prescriptions.rename(columns={'careunit': 'y_prescriptions'})

In [235]:
target_prescriptions

Unnamed: 0,transfer_id,pred_prescriptions,y_prescriptions
0,30145190.0,Cardiac Vascular Intensive Care Unit (CVICU),Neurology
1,30185783.0,Cardiac Surgery,Medicine
2,30265082.0,Hematology/Oncology,Hematology/Oncology
3,30281852.0,Medicine,Neurology
4,30642078.0,Cardiac Surgery,Medicine
...,...,...,...
67,39544317.0,Med/Surg,Medicine
68,39641848.0,Medicine,Transplant
69,39670756.0,Medical Intensive Care Unit (MICU),Hematology/Oncology
70,39764235.0,Med/Surg,Emergency Department


In [221]:
target_emar = target_emar.rename(columns={'careunit': 'y_emar'})

In [236]:
target_emar

Unnamed: 0,transfer_id,pred_emar,y_emar
0,30145190.0,Coronary Care Unit (CCU),Neurology
1,30145612.0,Medicine,Medicine
2,30185783.0,Med/Surg/GYN,Medicine
3,30458338.0,Medicine,Hematology/Oncology Intermediate
4,30619804.0,Medical/Surgical Intensive Care Unit (MICU/SICU),Med/Surg
5,30642078.0,Hematology/Oncology,Medicine
6,30744153.0,Medicine,Neurology
7,31043383.0,Trauma SICU (TSICU),Medicine/Cardiology
8,31077365.0,Hematology/Oncology,Medical Intensive Care Unit (MICU)
9,31306648.0,Medicine/Cardiology,Medicine


In [None]:
# Change individual prediction tables so they predict once per transfer_id (just take the majority vote)
# Group by id and take vote using mode 

In [169]:
target_input = target_input.groupby('transfer_id')[['pred_input','careunit']].apply(lambda x: x.mode().iloc[0]).reset_index()

In [170]:
target_input

Unnamed: 0,transfer_id,pred_input,careunit
0,30281852.0,Med/Surg/Trauma,Neurology
1,30896594.0,Medicine,Medical Intensive Care Unit (MICU)
2,31026953.0,Cardiac Vascular Intensive Care Unit (CVICU),Neurology
3,32051325.0,Medical/Surgical Intensive Care Unit (MICU/SICU),Cardiac Surgery
4,32537286.0,Medical Intensive Care Unit (MICU),Medicine
5,32627049.0,Medicine,Medicine/Cardiology
6,32787491.0,PACU,Medicine
7,32832189.0,Neuro Stepdown,Neurology
8,32932646.0,Trauma SICU (TSICU),Vascular
9,33281088.0,Medicine,Medical/Surgical Intensive Care Unit (MICU/SICU)


In [207]:
target_procedure_events = target_procedure_events.groupby('transfer_id')[['pred_procedure_events','careunit']].apply(lambda x: x.mode().iloc[0]).reset_index()

In [208]:
target_procedure_events

Unnamed: 0,transfer_id,pred_procedure_events,careunit
0,30281852.0,Med/Surg/Trauma,Neurology
1,30896594.0,Medicine,Medical Intensive Care Unit (MICU)
2,31026953.0,Med/Surg/Trauma,Neurology
3,32051325.0,Medical/Surgical Intensive Care Unit (MICU/SICU),Cardiac Surgery
4,32537286.0,Medical Intensive Care Unit (MICU),Medicine
5,32627049.0,Transplant,Medicine/Cardiology
6,32787491.0,PACU,Medicine
7,32832189.0,Neuro Stepdown,Neurology
8,34302052.0,Transplant,Neurology
9,34372637.0,Med/Surg/GYN,Medicine


In [238]:
target_microbio = target_microbio.groupby('transfer_id')[['pred_microbio','y_microbio']].apply(lambda x: x.mode().iloc[0]).reset_index()

In [239]:
target_microbio

Unnamed: 0,transfer_id,pred_microbio,y_microbio
0,30281852.0,Med/Surg,Neurology
1,30642078.0,Med/Surg/Trauma,Medicine
2,30896594.0,Med/Surg/GYN,Medical Intensive Care Unit (MICU)
3,31077365.0,Hematology/Oncology,Medical Intensive Care Unit (MICU)
4,31983963.0,Medical/Surgical Intensive Care Unit (MICU/SICU),PACU
5,32537286.0,Med/Surg/Trauma,Medicine
6,32627049.0,Med/Surg/Trauma,Medicine/Cardiology
7,32787491.0,Medicine,Medicine
8,32832189.0,Med/Surg/Trauma,Neurology
9,33082827.0,Med/Surg,Cardiac Surgery


In [240]:
target_ingredient = target_ingredient.groupby('transfer_id')[['pred_ingredient','y_ingredient']].apply(lambda x: x.mode().iloc[0]).reset_index()

In [241]:
target_ingredient

Unnamed: 0,transfer_id,pred_ingredient,y_ingredient
0,30281852.0,Medicine,Neurology
1,30896594.0,Medicine,Medical Intensive Care Unit (MICU)
2,31026953.0,Medicine/Cardiology,Neurology
3,32051325.0,Cardiac Surgery,Cardiac Surgery
4,32537286.0,Medicine,Medicine
5,32627049.0,Medicine,Medicine/Cardiology
6,32787491.0,Medicine,Medicine
7,32832189.0,Medicine,Neurology
8,32932646.0,Medicine,Vascular
9,33281088.0,Cardiac Surgery,Medical/Surgical Intensive Care Unit (MICU/SICU)


In [242]:
target_prescriptions = target_prescriptions.groupby('transfer_id')[['pred_prescriptions','y_prescriptions']].apply(lambda x: x.mode().iloc[0]).reset_index()

In [243]:
target_prescriptions

Unnamed: 0,transfer_id,pred_prescriptions,y_prescriptions
0,30145190.0,Cardiac Vascular Intensive Care Unit (CVICU),Neurology
1,30185783.0,Cardiac Surgery,Medicine
2,30265082.0,Hematology/Oncology,Hematology/Oncology
3,30281852.0,Medicine,Neurology
4,30642078.0,Cardiac Surgery,Medicine
...,...,...,...
67,39544317.0,Med/Surg,Medicine
68,39641848.0,Medicine,Transplant
69,39670756.0,Medical Intensive Care Unit (MICU),Hematology/Oncology
70,39764235.0,Med/Surg,Emergency Department


In [244]:
target_emar = target_emar.groupby('transfer_id')[['pred_emar','y_emar']].apply(lambda x: x.mode().iloc[0]).reset_index()

In [245]:
target_emar

Unnamed: 0,transfer_id,pred_emar,y_emar
0,30145190.0,Coronary Care Unit (CCU),Neurology
1,30145612.0,Medicine,Medicine
2,30185783.0,Med/Surg/GYN,Medicine
3,30458338.0,Medicine,Hematology/Oncology Intermediate
4,30619804.0,Medical/Surgical Intensive Care Unit (MICU/SICU),Med/Surg
5,30642078.0,Hematology/Oncology,Medicine
6,30744153.0,Medicine,Neurology
7,31043383.0,Trauma SICU (TSICU),Medicine/Cardiology
8,31077365.0,Hematology/Oncology,Medical Intensive Care Unit (MICU)
9,31306648.0,Medicine/Cardiology,Medicine


In [292]:
# Merge on transfer_id
merged_df = pd.merge(target_input, target_procedure_events, on='transfer_id', how='outer')
merged_df = pd.merge(merged_df, target_microbio, on='transfer_id', how='outer')
merged_df = pd.merge(merged_df, target_ingredient, on='transfer_id', how='outer')
merged_df = pd.merge(merged_df, target_prescriptions, on='transfer_id', how='outer')
merged_df = pd.merge(merged_df, target_emar, on='transfer_id', how='outer')
merged_df

Unnamed: 0,transfer_id,pred_input,y_input,pred_procedure_events,y_procedure_events,pred_microbio,y_microbio,pred_ingredient,y_ingredient,pred_prescriptions,y_prescriptions,pred_emar,y_emar
0,30281852.0,Med/Surg/Trauma,Neurology,Med/Surg/Trauma,Neurology,Med/Surg,Neurology,Medicine,Neurology,Medicine,Neurology,,
1,30896594.0,Medicine,Medical Intensive Care Unit (MICU),Medicine,Medical Intensive Care Unit (MICU),Med/Surg/GYN,Medical Intensive Care Unit (MICU),Medicine,Medical Intensive Care Unit (MICU),Medicine,Medical Intensive Care Unit (MICU),,
2,31026953.0,Cardiac Vascular Intensive Care Unit (CVICU),Neurology,Med/Surg/Trauma,Neurology,,,Medicine/Cardiology,Neurology,Med/Surg/Trauma,Neurology,,
3,32051325.0,Medical/Surgical Intensive Care Unit (MICU/SICU),Cardiac Surgery,Medical/Surgical Intensive Care Unit (MICU/SICU),Cardiac Surgery,,,Cardiac Surgery,Cardiac Surgery,Cardiac Surgery,Cardiac Surgery,Cardiac Surgery,Cardiac Surgery
4,32537286.0,Medical Intensive Care Unit (MICU),Medicine,Medical Intensive Care Unit (MICU),Medicine,Med/Surg/Trauma,Medicine,Medicine,Medicine,Cardiac Surgery,Medicine,Med/Surg/GYN,Medicine
...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,36190790.0,,,,,,,,,,,Medicine,Neurology
96,36974713.0,,,,,,,,,,,Medicine,Med/Surg/GYN
97,38598607.0,,,,,,,,,,,Vascular,Emergency Department Observation
98,39257822.0,,,,,,,,,,,Cardiac Surgery,Medicine


In [293]:
def mode_without_nan(row):
    return row.mode() if not row.isnull().all() else np.nan

In [294]:
# Select columns that begin with 'y_'
y_columns = merged_df.filter(like='y_')

# Calculate mode across rows, ignoring NaN values
merged_df['careunit'] = y_columns.apply(mode_without_nan, axis=1)

In [295]:
merged_df

Unnamed: 0,transfer_id,pred_input,y_input,pred_procedure_events,y_procedure_events,pred_microbio,y_microbio,pred_ingredient,y_ingredient,pred_prescriptions,y_prescriptions,pred_emar,y_emar,careunit
0,30281852.0,Med/Surg/Trauma,Neurology,Med/Surg/Trauma,Neurology,Med/Surg,Neurology,Medicine,Neurology,Medicine,Neurology,,,Neurology
1,30896594.0,Medicine,Medical Intensive Care Unit (MICU),Medicine,Medical Intensive Care Unit (MICU),Med/Surg/GYN,Medical Intensive Care Unit (MICU),Medicine,Medical Intensive Care Unit (MICU),Medicine,Medical Intensive Care Unit (MICU),,,Medical Intensive Care Unit (MICU)
2,31026953.0,Cardiac Vascular Intensive Care Unit (CVICU),Neurology,Med/Surg/Trauma,Neurology,,,Medicine/Cardiology,Neurology,Med/Surg/Trauma,Neurology,,,Neurology
3,32051325.0,Medical/Surgical Intensive Care Unit (MICU/SICU),Cardiac Surgery,Medical/Surgical Intensive Care Unit (MICU/SICU),Cardiac Surgery,,,Cardiac Surgery,Cardiac Surgery,Cardiac Surgery,Cardiac Surgery,Cardiac Surgery,Cardiac Surgery,Cardiac Surgery
4,32537286.0,Medical Intensive Care Unit (MICU),Medicine,Medical Intensive Care Unit (MICU),Medicine,Med/Surg/Trauma,Medicine,Medicine,Medicine,Cardiac Surgery,Medicine,Med/Surg/GYN,Medicine,Medicine
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,36190790.0,,,,,,,,,,,Medicine,Neurology,Neurology
96,36974713.0,,,,,,,,,,,Medicine,Med/Surg/GYN,Med/Surg/GYN
97,38598607.0,,,,,,,,,,,Vascular,Emergency Department Observation,Emergency Department Observation
98,39257822.0,,,,,,,,,,,Cardiac Surgery,Medicine,Medicine


In [296]:
y_columns.columns

Index(['y_input', 'y_procedure_events', 'y_microbio', 'y_ingredient',
       'y_prescriptions', 'y_emar'],
      dtype='object', name=0)

In [297]:
merged_df.drop(columns=y_columns.columns, inplace=True)

In [298]:
merged_df

Unnamed: 0,transfer_id,pred_input,pred_procedure_events,pred_microbio,pred_ingredient,pred_prescriptions,pred_emar,careunit
0,30281852.0,Med/Surg/Trauma,Med/Surg/Trauma,Med/Surg,Medicine,Medicine,,Neurology
1,30896594.0,Medicine,Medicine,Med/Surg/GYN,Medicine,Medicine,,Medical Intensive Care Unit (MICU)
2,31026953.0,Cardiac Vascular Intensive Care Unit (CVICU),Med/Surg/Trauma,,Medicine/Cardiology,Med/Surg/Trauma,,Neurology
3,32051325.0,Medical/Surgical Intensive Care Unit (MICU/SICU),Medical/Surgical Intensive Care Unit (MICU/SICU),,Cardiac Surgery,Cardiac Surgery,Cardiac Surgery,Cardiac Surgery
4,32537286.0,Medical Intensive Care Unit (MICU),Medical Intensive Care Unit (MICU),Med/Surg/Trauma,Medicine,Cardiac Surgery,Med/Surg/GYN,Medicine
...,...,...,...,...,...,...,...,...
95,36190790.0,,,,,,Medicine,Neurology
96,36974713.0,,,,,,Medicine,Med/Surg/GYN
97,38598607.0,,,,,,Vascular,Emergency Department Observation
98,39257822.0,,,,,,Cardiac Surgery,Medicine


In [299]:
def mode_without_nan(row):
    return row.mode()[0] if not row.isnull().all() else np.nan

In [300]:
# Add a new column containing the mode of values across rows, ignoring NaN values
merged_df['Vote'] = merged_df.drop(columns=['transfer_id','careunit']).apply(mode_without_nan, axis=1)

In [301]:
merged_df

Unnamed: 0,transfer_id,pred_input,pred_procedure_events,pred_microbio,pred_ingredient,pred_prescriptions,pred_emar,careunit,Vote
0,30281852.0,Med/Surg/Trauma,Med/Surg/Trauma,Med/Surg,Medicine,Medicine,,Neurology,Med/Surg/Trauma
1,30896594.0,Medicine,Medicine,Med/Surg/GYN,Medicine,Medicine,,Medical Intensive Care Unit (MICU),Medicine
2,31026953.0,Cardiac Vascular Intensive Care Unit (CVICU),Med/Surg/Trauma,,Medicine/Cardiology,Med/Surg/Trauma,,Neurology,Med/Surg/Trauma
3,32051325.0,Medical/Surgical Intensive Care Unit (MICU/SICU),Medical/Surgical Intensive Care Unit (MICU/SICU),,Cardiac Surgery,Cardiac Surgery,Cardiac Surgery,Cardiac Surgery,Cardiac Surgery
4,32537286.0,Medical Intensive Care Unit (MICU),Medical Intensive Care Unit (MICU),Med/Surg/Trauma,Medicine,Cardiac Surgery,Med/Surg/GYN,Medicine,Medical Intensive Care Unit (MICU)
...,...,...,...,...,...,...,...,...,...
95,36190790.0,,,,,,Medicine,Neurology,Medicine
96,36974713.0,,,,,,Medicine,Med/Surg/GYN,Medicine
97,38598607.0,,,,,,Vascular,Emergency Department Observation,Vascular
98,39257822.0,,,,,,Cardiac Surgery,Medicine,Cardiac Surgery


In [302]:
# Evaluate the model
accuracy = accuracy_score(merged_df['careunit'], merged_df['Vote'])
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(merged_df['careunit'], merged_df['Vote'], zero_division=0))

Accuracy: 0.11
Classification Report:
                                                  precision    recall  f1-score   support

                                 Cardiac Surgery       0.18      0.60      0.27         5
    Cardiac Vascular Intensive Care Unit (CVICU)       0.00      0.00      0.00         3
                        Coronary Care Unit (CCU)       0.00      0.00      0.00         1
                                Discharge Lounge       0.00      0.00      0.00         2
                            Emergency Department       0.00      0.00      0.00         7
                Emergency Department Observation       1.00      0.17      0.29         6
                             Hematology/Oncology       0.09      0.20      0.13         5
                Hematology/Oncology Intermediate       0.00      0.00      0.00         4
                                        Med/Surg       0.25      0.29      0.27         7
                                    Med/Surg/GYN       0.00  

In [None]:
# Should be 183 evaluation transfers
# The missing ones could be transfers that happened in the same day and no other recorded information happened between that 
# time (so a prediction cannot be made)