In [155]:
# External libraries for data processing
import numpy as np
import pandas as pd
import sklearn as sk
#To render graphs within notebook
%matplotlib inline
import matplotlib.pyplot as plt
import joblib 
import os
from tqdm import tqdm

# Versions of libraries
print("Numpy version: {}".format(np.__version__))
print("Pandas version: {}".format(pd.__version__))
print("Scikit version: {}".format(sk.__version__))

Numpy version: 1.24.3
Pandas version: 1.5.3
Scikit version: 1.3.0


In [156]:
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer

#### Global functions

In [157]:
path = "C:/Project/Data/"

In [158]:
def convert_to_days(duration_str):
    parts = duration_str.split(' days ')  # Split string into form ['22', '20:55:00']
    days = float(parts[0])  # Extract number of days and convert to float
    time_parts = parts[1].split(':')  # Split time part (hh:mm:ss) ['20', '55', '00']
    hours = float(time_parts[0])  # Extract hours and convert to float
    minutes = float(time_parts[1])  # Extract minutes and convert to float
    seconds = float(time_parts[2])  # Extract seconds and convert to float
    total_days = days + (hours / 24) + (minutes / (24 * 60)) + (seconds / (24 * 3600))  # Calculate total days
    return total_days

In [159]:
file = "hosp/admissions.csv"
full_path = path + file

df_admissions = pd.read_csv(full_path)

df_admissions['dischtime'] = pd.to_datetime(df_admissions['dischtime'], format='%d/%m/%Y %H:%M')
df_admissions['admittime'] = pd.to_datetime(df_admissions['admittime'], format='%d/%m/%Y %H:%M')

df_admittime= pd.DataFrame()
df_admittime['hadm_id'] = df_admissions['hadm_id']
df_admittime['admittime'] = df_admissions['admittime']

### Target variable calculation

In [160]:
file = "hosp/procedures_icd.csv"
full_path = path + file

df_procedures = pd.read_csv(full_path)

In [161]:
df_procedures

Unnamed: 0,subject_id,hadm_id,seq_num,chartdate,icd_code,icd_version
0,10011398,27505812,3,2146-12-15,3961,9
1,10011398,27505812,2,2146-12-15,3615,9
2,10011398,27505812,1,2146-12-15,3614,9
3,10014729,23300884,4,2125-03-23,3897,9
4,10014729,23300884,1,2125-03-20,3403,9
...,...,...,...,...,...,...
717,10004733,27411876,3,2174-12-20,4513,9
718,10021118,24490144,4,2161-11-19,5A1221Z,10
719,10021118,24490144,3,2161-11-19,06BP4ZZ,10
720,10021118,24490144,1,2161-11-19,02100Z9,10


In [162]:
file = "hosp/d_icd_procedures.csv"
full_path = path + file

df_codes = pd.read_csv(full_path)

In [163]:
# drop unneeded columns 
df_procedures = df_procedures.drop(columns=['subject_id', 'seq_num','icd_version'])

In [164]:
# convert time to datetime
df_procedures['chartdate'] = pd.to_datetime(df_procedures['chartdate'])

In [165]:
df_procedures

Unnamed: 0,hadm_id,chartdate,icd_code
0,27505812,2146-12-15,3961
1,27505812,2146-12-15,3615
2,27505812,2146-12-15,3614
3,23300884,2125-03-23,3897
4,23300884,2125-03-20,3403
...,...,...,...
717,27411876,2174-12-20,4513
718,24490144,2161-11-19,5A1221Z
719,24490144,2161-11-19,06BP4ZZ
720,24490144,2161-11-19,02100Z9


# Loading pretrained procedure prediction learners

In [166]:
path = "C:/Users/jenni/OneDrive/Desktop/IP/procedures_learners/"

### emar

In [167]:
file = "logistic_clf_emar.joblib"
full_path = path + file

logistic_clf_emar = joblib.load(full_path)

In [168]:
logistic_clf_emar

### microbiologyevents

In [169]:
file = "logistic_clf_microbio.joblib"
full_path = path + file

logistic_clf_microbio = joblib.load(full_path)

In [170]:
logistic_clf_microbio

### pharmacy

In [171]:
file = "logistic_clf_pharmacy.joblib"
full_path = path + file

logistic_clf_pharmacy = joblib.load(full_path)

In [173]:
logistic_clf_pharmacy

### icustays

In [174]:
file = "logistic_clf_icustays.joblib"
full_path = path + file

logistic_clf_icustays = joblib.load(full_path)

In [175]:
logistic_clf_icustays

### inputevents

In [176]:
file = "logistic_clf_input.joblib"
full_path = path + file

logistic_clf_input = joblib.load(full_path)

In [177]:
logistic_clf_input

### outputevents

In [178]:
file = "logistic_clf_output.joblib"
full_path = path + file

logistic_clf_output = joblib.load(full_path)

In [179]:
logistic_clf_output

### procedureevents

In [180]:
file = "logistic_clf_procedure_events.joblib"
full_path = path + file

logistic_clf_procedure_events = joblib.load(full_path)

In [181]:
logistic_clf_procedure_events

### datetimeevents

In [182]:
file = "logistic_clf_datetime_events.joblib"
full_path = path + file

logistic_clf_datetime_events = joblib.load(full_path)

In [183]:
logistic_clf_datetime_events

## Load and preprocess evaluation data

In [184]:
folder_name = 'EnsembleEvaluationData'

### emar

In [188]:
file_path = os.path.join(folder_name, 'df_emar_evaluation.csv')

df_emar = pd.read_csv(file_path)

file_path = os.path.join(folder_name, 'df_emar_evaluation_target.csv')

target = pd.read_csv(file_path)

#### Preprocessing

In [189]:
df_emar.drop(columns=['hadm_id','charttime'], inplace=True)

In [193]:
data = df_emar

In [196]:
# Convert '0 days' to timedelta
zero_timedelta = pd.Timedelta(0)

# Replace '0 days' with the desired timedelta value
data['delay'] = data['delay'].replace('0 days', zero_timedelta)

In [197]:
data['delay']= data['delay'].astype(str)
data['delay']= data['delay'].apply(convert_to_days)

#### Testing the learner

In [287]:
y_pred_emar

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [288]:
y_pred_emar.size

472491

In [289]:
np.sum(np.sum(y_pred_emar, axis=1))
# 2201 available to predict 
# underpredicting

1112

In [199]:
from sklearn.metrics import accuracy_score, classification_report

# # Standardize features
# scaler = StandardScaler()
# data = scaler.fit_transform(data)

# Make predictions on the testing set
y_pred_emar = logistic_clf_emar.predict(data.values)

# Evaluate the model
accuracy = accuracy_score(target, y_pred_emar)
print("Accuracy:", accuracy)

print("Classification Report:")
print(classification_report(target, y_pred_emar))

Accuracy: 0.41927782751417486
Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.00      0.00      0.00         0
           2       0.00      0.00      0.00         0
           3       0.75      0.08      0.15       109
           4       0.00      0.00      0.00         0
           5       0.00      0.00      0.00        77
           6       0.11      0.03      0.04       228
           7       0.00      0.00      0.00         0
           8       0.00      0.00      0.00         0
           9       0.00      0.00      0.00         0
          10       0.00      0.00      0.00         0
          11       0.00      0.00      0.00         0
          12       0.00      0.00      0.00         0
          13       0.00      0.00      0.00         0
          14       0.00      0.00      0.00        30
          15       0.00      0.00      0.00         0
          16       0.00     

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### microbiologyevents

In [None]:
# ONLY PREDICTING 0 FOR ALL

In [200]:
folder_name = 'EnsembleEvaluationData'

In [201]:
file_path = os.path.join(folder_name, 'df_microbio_evaluation.csv')

df_microbio = pd.read_csv(file_path)

file_path = os.path.join(folder_name, 'df_microbio_evaluation_target.csv')

target = pd.read_csv(file_path)

#### Preprocessing

In [203]:
data = df_microbio.drop(columns=['hadm_id'])

In [206]:
# Converting duration strings to floats

data['delay']= data['delay'].astype(str)
data['delay']= data['delay'].apply(convert_to_days)
data['days_since_admission'] = data['days_since_admission'].astype(str)
data['days_since_admission'] = data['days_since_admission'].apply(convert_to_days)

#### Testing the learner

In [220]:
pd.DataFrame(y_pred_microbio)[3].value_counts()

0    245
Name: 3, dtype: int64

In [291]:
y_pred_microbio.size

31115

In [292]:
np.sum(np.sum(y_pred_microbio, axis=1))
# 194 available to predict 
# underpredicting

0

In [208]:
# # Standardize features
# scaler = StandardScaler()
# data = scaler.fit_transform(data)

# Make predictions on the testing set
y_pred_microbio = logistic_clf_microbio.predict(data.values)

# Evaluate the model
accuracy = accuracy_score(target, y_pred_microbio)
print("Accuracy:", accuracy)

print("Classification Report:")
print(classification_report(target, y_pred_microbio))

Accuracy: 0.30612244897959184
Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.00      0.00      0.00         0
           2       0.00      0.00      0.00         0
           3       0.00      0.00      0.00         0
           4       0.00      0.00      0.00         2
           5       0.00      0.00      0.00         0
           6       0.00      0.00      0.00         0
           7       0.00      0.00      0.00         0
           8       0.00      0.00      0.00         5
           9       0.00      0.00      0.00        14
          10       0.00      0.00      0.00         0
          11       0.00      0.00      0.00         0
          12       0.00      0.00      0.00         0
          13       0.00      0.00      0.00         0
          14       0.00      0.00      0.00         0
          15       0.00      0.00      0.00         0
          16       0.00     

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### pharmacy

In [226]:
folder_name = 'EnsembleEvaluationData'

In [227]:
file_path = os.path.join(folder_name, 'df_pharmacy_evaluation.csv')

df_pharmacy = pd.read_csv(file_path)

file_path = os.path.join(folder_name, 'df_pharmacy_evaluation_target.csv')

target = pd.read_csv(file_path)

#### Preprocessing

In [230]:
data = df_pharmacy.drop(columns=['hadm_id','duration'])

In [233]:
data['verification_delay'] = data['verification_delay'].replace('0 days', zero_timedelta)

In [234]:
data['medication_duration']= data['medication_duration'].astype(str)
data['medication_duration']= data['medication_duration'].apply(convert_to_days)
data['verification_delay'] = data['verification_delay'].astype(str)
data['verification_delay'] = data['verification_delay'].apply(convert_to_days)

#### Testing the learner

In [293]:
y_pred_pharmacy.size

312417

In [294]:
np.sum(np.sum(y_pred_pharmacy, axis=1))
# 1438 available to predict 
# underpredicting

124

In [236]:
# # Standardize features
# scaler = StandardScaler()
# data = scaler.fit_transform(data)

# Make predictions on the testing set
y_pred_pharmacy = logistic_clf_pharmacy.predict(data.values)

# Evaluate the model
accuracy = accuracy_score(target, y_pred_pharmacy)
print("Accuracy:", accuracy)

print("Classification Report:")
print(classification_report(target, y_pred_pharmacy))

Accuracy: 0.2930474333983106
Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.00      0.00      0.00         0
           2       0.00      0.00      0.00        12
           3       0.00      0.00      0.00         0
           4       0.00      0.00      0.00         0
           5       0.00      0.00      0.00         0
           6       0.00      0.00      0.00        26
           7       0.00      0.00      0.00         0
           8       0.00      0.00      0.00         0
           9       0.00      0.00      0.00         0
          10       0.00      0.00      0.00        42
          11       0.20      0.03      0.05       112
          12       0.00      0.00      0.00         0
          13       0.00      0.00      0.00         0
          14       0.00      0.00      0.00         0
          15       0.00      0.00      0.00         0
          16       0.00      

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### icustays

In [296]:
folder_name = 'EnsembleEvaluationData'

In [297]:
file_path = os.path.join(folder_name, 'df_icustays_evaluation.csv')

df_icustays = pd.read_csv(file_path)

file_path = os.path.join(folder_name, 'df_icustays_evaluation_target.csv')

target = pd.read_csv(file_path)

#### Preprocessing

In [298]:
data = df_icustays.drop(columns=['hadm_id'])
file_path = os.path.join(folder_name, 'df_icustays_evaluation_target.csv')
target = pd.read_csv(file_path)
# target = target.drop(columns=['hadm_id','chartdate'])

In [299]:
data['days_since_admission']= data['days_since_admission'].astype(str)
data['days_since_admission']= data['days_since_admission'].apply(convert_to_days)

#### Testing the learner

In [301]:
y_pred_icustays.size

150

In [302]:
np.sum(np.sum(y_pred_icustays, axis=1))
# 1 available to predict 

6

In [300]:
# Make predictions on the testing set
y_pred_icustays = logistic_clf_icustays.predict(data.values)

# Evaluate the model
accuracy = accuracy_score(target, y_pred_icustays)
print("Accuracy:", accuracy)

print("Classification Report:")
print(classification_report(target.values, y_pred_icustays))

Accuracy: 0.0
Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.00      0.00      0.00         0
           2       0.00      0.00      0.00         0
           3       0.00      0.00      0.00         0
           4       0.00      0.00      0.00         0
           5       0.00      0.00      0.00         0
           6       0.00      0.00      0.00         0
           7       0.00      0.00      0.00         0
           8       0.00      0.00      0.00         0
           9       0.00      0.00      0.00         0
          10       0.00      0.00      0.00         0
          11       0.00      0.00      0.00         0
          12       0.00      0.00      0.00         0
          13       0.00      0.00      0.00         0
          14       0.00      0.00      0.00         0
          15       0.00      0.00      0.00         0
          16       0.00      0.00      0.00 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [67]:
# # Standardize features
# scaler = StandardScaler()
# data = scaler.fit_transform(data)

# # Make predictions on the testing set
# y_pred_icustays = logistic_clf_icustays.predict(data)

# # Evaluate the model
# accuracy = accuracy_score(target, y_pred_icustays)
# print("Accuracy:", accuracy)

# print("Classification Report:")
# print(classification_report(target, y_pred_icustays))

Accuracy: 0.0
Classification Report:
              precision    recall  f1-score   support

     0B978ZX       0.00      0.00      0.00       1.0
     0D598ZZ       0.00      0.00      0.00       1.0
        3491       0.00      0.00      0.00       1.0
        3893       0.00      0.00      0.00       0.0
        3897       0.00      0.00      0.00       0.0
        4311       0.00      0.00      0.00       0.0
        5491       0.00      0.00      0.00       1.0
        8604       0.00      0.00      0.00       1.0
        9604       0.00      0.00      0.00       0.0

    accuracy                           0.00       5.0
   macro avg       0.00      0.00      0.00       5.0
weighted avg       0.00      0.00      0.00       5.0



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### inputevents

In [243]:
folder_name = 'EnsembleEvaluationData'

In [244]:
file_path = os.path.join(folder_name, 'df_input_evaluation.csv')

df_input = pd.read_csv(file_path)

file_path = os.path.join(folder_name, 'df_input_evaluation_target.csv')

target = pd.read_csv(file_path)

#### Preprocessing 

In [250]:
data = df_input.drop(columns=['hadm_id','duration'])

In [251]:
data

Unnamed: 0,amount,rate,patientweight,totalamount,isopenbag,originalamount,originalrate,recording_delay,rateuom_N/A,rateuom_grams/hour,...,itemid_229014,itemid_229058,itemid_229069,itemid_229072,itemid_229295,itemid_229296,itemid_229297,itemid_229420,itemid_229615,itemid_229639
0,1.000000,0.000000,59.5,200.0,0,1.000000,1.000000,-0.000694,1,0,...,0,0,0,0,0,0,0,0,0,0
1,2.000000,0.000000,59.5,100.0,0,2.000000,0.033333,-0.040972,1,0,...,0,0,0,0,0,0,0,0,0,0
2,2.000000,0.000000,59.5,100.0,0,2.000000,0.033333,-0.041667,1,0,...,0,0,0,0,0,0,0,0,0,0
3,2.000000,0.000000,59.5,100.0,0,2.000000,0.033333,-0.041667,1,0,...,0,0,0,0,0,0,0,0,0,0
4,71.800001,6.000000,59.5,100.0,0,100.000000,6.000000,-0.498611,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2723,0.500000,0.000000,96.0,0.0,0,0.500000,0.500000,-0.000694,1,0,...,0,0,0,0,0,0,0,0,0,0
2724,0.500000,0.000000,96.0,0.0,0,0.500000,0.500000,0.057639,1,0,...,0,0,0,0,0,0,0,0,0,0
2725,0.319770,0.030008,96.0,250.0,0,8.000000,0.030000,-0.076389,0,0,...,0,0,0,0,0,0,0,0,0,0
2726,0.110377,0.009998,96.0,250.0,0,7.532536,0.010000,-0.075694,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Testing the learner

In [303]:
y_pred_input.size

332816

In [304]:
np.sum(np.sum(y_pred_input, axis=1))
# 2173 available to predict 

6129

In [252]:
# # Standardize features
# scaler = StandardScaler()
# data = scaler.fit_transform(data)

# Make predictions on the testing set
y_pred_input = logistic_clf_input.predict(data.values)

# Evaluate the model
accuracy = accuracy_score(target, y_pred_input)
print("Accuracy:", accuracy)

print("Classification Report:")
print(classification_report(target, y_pred_input))

Accuracy: 0.27089442815249265
Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.00      0.00      0.00         0
           2       0.00      0.00      0.00         0
           3       0.00      0.00      0.00         0
           4       0.00      0.00      0.00         0
           5       0.18      0.01      0.02       285
           6       0.00      0.00      0.00         0
           7       0.00      0.00      0.00         0
           8       0.00      0.00      0.00         0
           9       0.00      0.00      0.00         0
          10       0.00      0.00      0.00         0
          11       0.00      0.00      0.00        50
          12       0.00      0.00      0.00         0
          13       0.00      0.00      0.00         0
          14       0.00      0.00      0.00         0
          15       0.00      0.00      0.00         0
          16       0.00     

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### outputevents

In [253]:
folder_name = 'EnsembleEvaluationData'

In [254]:
file_path = os.path.join(folder_name, 'df_output_evaluation.csv')

df_output = pd.read_csv(file_path)

file_path = os.path.join(folder_name, 'df_output_evaluation_target.csv')

target = pd.read_csv(file_path)

#### Preprocessing 

In [255]:
data = df_output.drop(columns=['hadm_id'])

In [256]:
data['days_since_admission']= data['days_since_admission'].astype(str)
data['days_since_admission']= data['days_since_admission'].apply(convert_to_days)
data['recording_delay']= data['recording_delay'].astype(str)
data['recording_delay']= data['recording_delay'].apply(convert_to_days)

#### Testing the learner

In [305]:
y_pred_output.size

117000

In [306]:
np.sum(np.sum(y_pred_output, axis=1))
# 814 available to predict 
# underpredicting

113

In [259]:
# # Standardize features
# scaler = StandardScaler()
# data = scaler.fit_transform(data)

# Make predictions on the testing set
y_pred_output = logistic_clf_output.predict(data.values)

# Evaluate the model
accuracy = accuracy_score(target, y_pred_output)
print("Accuracy:", accuracy)

print("Classification Report:")
print(classification_report(target, y_pred_output))

Accuracy: 0.21128205128205127
Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.00      0.00      0.00         0
           2       0.00      0.00      0.00         0
           3       0.00      0.00      0.00         0
           4       0.00      0.00      0.00         0
           5       0.00      0.00      0.00        77
           6       0.00      0.00      0.00         0
           7       0.00      0.00      0.00         0
           8       0.00      0.00      0.00         0
           9       0.00      0.00      0.00         0
          10       0.00      0.00      0.00         8
          11       0.00      0.00      0.00         0
          12       0.00      0.00      0.00         0
          13       0.00      0.00      0.00         0
          14       0.00      0.00      0.00         0
          15       0.00      0.00      0.00         0
          16       0.00     

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### procedureevents

In [261]:
folder_name = 'EnsembleEvaluationData'

In [262]:
file_path = os.path.join(folder_name, 'df_procedure_events_evaluation.csv')

df_procedure_events = pd.read_csv(file_path)

file_path = os.path.join(folder_name, 'df_procedure_events_evaluation_target.csv')

target = pd.read_csv(file_path)

#### Preprocessing 

In [263]:
data = df_procedure_events.drop(columns=['hadm_id'])

#### Testing the learner

In [307]:
y_pred_procedure_events.size

15729

In [308]:
np.sum(np.sum(y_pred_procedure_events, axis=1))
# 123 available to predict 

487

In [266]:
# Standardize features
# scaler = StandardScaler()
# data = scaler.fit_transform(data)

# Make predictions on the testing set
y_pred_procedure_events = logistic_clf_procedure_events.predict(data.values)

# Evaluate the model
accuracy = accuracy_score(target, y_pred_procedure_events)
print("Accuracy:", accuracy)

print("Classification Report:")
print(classification_report(target, y_pred_procedure_events))

Accuracy: 0.17006802721088435
Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.00      0.00      0.00         0
           2       0.00      0.00      0.00         0
           3       0.00      0.00      0.00         0
           4       0.00      0.00      0.00         0
           5       0.00      0.00      0.00        11
           6       0.00      0.00      0.00         0
           7       0.00      0.00      0.00         0
           8       0.00      0.00      0.00         0
           9       0.00      0.00      0.00         0
          10       0.00      0.00      0.00         0
          11       0.00      0.00      0.00         4
          12       0.00      0.00      0.00         0
          13       0.00      0.00      0.00         0
          14       0.00      0.00      0.00         0
          15       0.00      0.00      0.00         0
          16       0.00     

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### datetimeevents

In [271]:
folder_name = 'EnsembleEvaluationData'

In [272]:
file_path = os.path.join(folder_name, 'df_datetime_events_evaluation.csv')

df_datetime_events = pd.read_csv(file_path)


file_path = os.path.join(folder_name, 'df_datetime_events_evaluation_target.csv')

target = pd.read_csv(file_path)

#### Preprocessing 

In [273]:
data = df_datetime_events.drop(columns=['hadm_id'])

#### Testing the learner

In [309]:
y_pred_datetime_events.size

377454

In [310]:
np.sum(np.sum(y_pred_datetime_events, axis=1))
# 2442 available to predict 
# underpredicting

297

In [276]:
# # Standardize features
# scaler = StandardScaler()
# data = scaler.fit_transform(data)

# Make predictions on the testing set
y_pred_datetime_events = logistic_clf_datetime_events.predict(data.values)

# Evaluate the model
accuracy = accuracy_score(target, y_pred_datetime_events)
print("Accuracy:", accuracy)

print("Classification Report:")
print(classification_report(target, y_pred_datetime_events))

Accuracy: 0.1799265605875153
Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.00      0.00      0.00         0
           2       0.00      0.00      0.00         0
           3       0.00      0.00      0.00         0
           4       0.00      0.00      0.00         0
           5       0.00      0.00      0.00         0
           6       0.00      0.00      0.00         0
           7       0.00      0.00      0.00       100
           8       0.00      0.00      0.00         0
           9       0.00      0.00      0.00         0
          10       0.00      0.00      0.00         0
          11       0.00      0.00      0.00         0
          12       0.00      0.00      0.00         0
          13       0.00      0.00      0.00         0
          14       0.00      0.00      0.00         0
          15       0.00      0.00      0.00        12
          16       0.00      

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [284]:
y_pred_datetime_events.size

377454

In [283]:
np.sum(np.sum(y_pred_datetime_events, axis=1))
# 2442 available to predict 
# underpredicting

297

In [285]:
np.sum(y_pred_datetime_events, axis=1).size

2451

Instead of combining the predictions (because they are all trash) I am going to analyse each individually to provide insights into which procedures are easiest to predict, the most informative tables etc. and try to explain why