In [2]:
import streamlit as st
import pandas as pd
import pickle as pkl
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor 
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder, StandardScaler 
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

st.title("Hospital Length of Stay Predictor")

2025-02-25 19:14:18.079 
  command:

    streamlit run /opt/anaconda3/lib/python3.12/site-packages/ipykernel_launcher.py [ARGUMENTS]


DeltaGenerator()

In [27]:
# Section 1 - Model based on Hospital-LOS

st.header("Base Model (Hostpital-LOS.csv)")

# Load Base Data

data_base = pd.read_csv('Hospital-LOS.csv')

print(type(data_base))



<class 'pandas.core.frame.DataFrame'>


In [17]:
print(data_base.columns)

Index(['Available Extra Rooms in Hospital', 'Department', 'Ward_Facility_Code',
       'doctor_name', 'staff_available', 'patientid', 'Age', 'gender',
       'Type of Admission', 'Severity of Illness', 'health_conditions',
       'Visitors with Patient', 'Insurance', 'Stay (in days)'],
      dtype='object')


In [33]:
print(type(data_base))
print(type(X_base))

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>


In [41]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Make predictions on the test set
y_pred_base = model.predict(X_test_base)

# Calculate metrics
mae = mean_absolute_error(y_test_base, y_pred_base)
mse = mean_squared_error(y_test_base, y_pred_base)
r2 = r2_score(y_test_base, y_pred_base)

print("MAE:", mae)
print("MSE:", mse)
print("R^2 Score:", r2)


MAE: 1.2405153187399511
MSE: 3.4202028539614244
R^2 Score: 0.9447086063740399


In [43]:
# Define target and features
X_base = data_base.drop(columns=['Stay (in days)', 'patientid'])
y_base = data_base['Stay (in days)']

# Identify categorical and numeric columns
categorical_cols = X_base.select_dtypes(include=['object']).columns
numeric_cols = X_base.select_dtypes(include=['number']).columns

# Create preprocessor
preprocessor = ColumnTransformer([
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols),
    ('num', StandardScaler(), numeric_cols)
])

# Split data
X_train_base, X_test_base, y_train_base, y_test_base = train_test_split(X_base, y_base, test_size=0.2, random_state=42)

# Build pipeline
model = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(random_state=42))
])

# Train model
model.fit(X_train_base, y_train_base)

In [45]:
# Evaluate model
y_pred_base = model.predict(X_test_base)
st.write("**Evaluation Metrics:**")
st.write("MAE:", mean_absolute_error(y_test_base, y_pred_base))
st.write("MSE:", mean_squared_error(y_test_base, y_pred_base))
st.write("R² Score:", r2_score(y_test_base, y_pred_base))

st.subheader("Predict LOS for a New Patient (Base Model)")

# Define interactive widgets based on base data columns.
available_rooms = st.number_input("Available Extra Rooms in Hospital", min_value=0, value=3)
department = st.selectbox("Department", options=data_base['Department'].unique())
ward_code = st.selectbox("Ward Facility Code", options=data_base['Ward_Facility_Code'].unique())
doctor_name = st.selectbox("Doctor Name", options=data_base['doctor_name'].unique())
staff_available = st.number_input("Staff Available", min_value=0, value=15)
age_group = st.selectbox("Age Group", options=data_base['Age'].unique())
gender = st.selectbox("Gender", options=data_base['gender'].unique())
admission_type = st.selectbox("Type of Admission", options=data_base['Type of Admission'].unique())
severity = st.selectbox("Severity of Illness", options=data_base['Severity of Illness'].unique())
health_conditions = st.selectbox("Health Conditions", options=data_base['health_conditions'].unique())
visitors = st.number_input("Visitors with Patient", min_value=0, value=2)
insurance = st.selectbox("Insurance", options=data_base['Insurance'].unique())

new_patient_base = {
    'Available Extra Rooms in Hospital': available_rooms,
    'Department': department,
    'Ward_Facility_Code': ward_code,
    'doctor_name': doctor_name,
    'staff_available': staff_available,
    'Age': age_group,
    'gender': gender,
    'Type of Admission': admission_type,
    'Severity of Illness': severity,
    'health_conditions': health_conditions,
    'Visitors with Patient': visitors,
    'Insurance': insurance
}

if st.button("Predict LOS (Base Model)"):
    new_patient_df = pd.DataFrame([new_patient_base])
    predicted_stay = model.predict(new_patient_df)
    st.success(f"Predicted Length of Stay: {predicted_stay[0]:.2f} days")




In [47]:
# Section 2: Updated Model with Co-morbidities
#############################
st.header("Updated Model with Co-morbidities (Hospital_LOS_with_Comorbidities.csv)")

# Load updated dataset
data_updated = pd.read_csv('Hospital_LOS_with_Comorbidities.csv')

# Define target and features
X_updated = data_updated.drop(columns=['Stay (in days)', 'patientid'])
y_updated = data_updated['Stay (in days)']

# Identify columns for updated data
categorical_cols_updated = X_updated.select_dtypes(include=['object']).columns
numeric_cols_updated = X_updated.select_dtypes(include=['number']).columns

# Updated preprocessor
preprocessor_updated = ColumnTransformer([
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols_updated),
    ('num', StandardScaler(), numeric_cols_updated)
])

# Split data
X_train_updated, X_test_updated, y_train_updated, y_test_updated = train_test_split(X_updated, y_updated, test_size=0.2, random_state=42)

# Build updated model pipeline
updated_model = Pipeline([
    ('preprocessor', preprocessor_updated),
    ('regressor', RandomForestRegressor(random_state=42))
])

# Train updated model
updated_model.fit(X_train_updated, y_train_updated)



In [49]:
# Evaluate updated model
y_pred_updated = updated_model.predict(X_test_updated)
st.write("**Updated Model Evaluation Metrics:**")
st.write("MAE:", mean_absolute_error(y_test_updated, y_pred_updated))
st.write("MSE:", mean_squared_error(y_test_updated, y_pred_updated))
st.write("R² Score:", r2_score(y_test_updated, y_pred_updated))

st.subheader("Predict LOS for a New Patient (Updated Model)")

# Define widgets for updated model input.
available_rooms_upd = st.number_input("Available Extra Rooms in Hospital (Updated)", min_value=0, value=3, key="rooms_upd")
department_upd = st.selectbox("Department (Updated)", options=data_updated['Department'].unique(), key="dept_upd")
ward_code_upd = st.selectbox("Ward Facility Code (Updated)", options=data_updated['Ward_Facility_Code'].unique(), key="ward_upd")
doctor_name_upd = st.selectbox("Doctor Name (Updated)", options=data_updated['doctor_name'].unique(), key="doc_upd")
staff_available_upd = st.number_input("Staff Available (Updated)", min_value=0, value=15, key="staff_upd")
age_group_upd = st.selectbox("Age Group (Updated)", options=data_updated['Age'].unique(), key="age_upd")
gender_upd = st.selectbox("Gender (Updated)", options=data_updated['gender'].unique(), key="gender_upd")
admission_type_upd = st.selectbox("Type of Admission (Updated)", options=data_updated['Type of Admission'].unique(), key="adm_upd")
severity_upd = st.selectbox("Severity of Illness (Updated)", options=data_updated['Severity of Illness'].unique(), key="sev_upd")
health_conditions_upd = st.selectbox("Health Conditions (Updated)", options=data_updated['health_conditions'].unique(), key="hc_upd")
visitors_upd = st.number_input("Visitors with Patient (Updated)", min_value=0, value=2, key="visitors_upd")
insurance_upd = st.selectbox("Insurance (Updated)", options=data_updated['Insurance'].unique(), key="ins_upd")
# Additional co-morbidity fields
septicemia = st.number_input("Septicemia", min_value=0, value=0, key="septicemia")
chf = st.number_input("CHF", min_value=0, value=0, key="chf")
pneumonia = st.number_input("Pneumonia", min_value=0, value=0, key="pneumonia")
copd = st.number_input("COPD_Bronchiectasis", min_value=0, value=0, key="copd")
cardiac = st.number_input("Cardiac_Dysrhythmias", min_value=0, value=0, key="cardiac")
cerebrovascular = st.number_input("Acute_Cerebrovascular_Disease", min_value=0, value=0, key="cerebrovascular")
renal_failure = st.number_input("Acute_Renal_Failure", min_value=0, value=0, key="renal_failure")
skin_infections = st.number_input("Skin_Infections", min_value=0, value=0, key="skin_infections")
uti = st.number_input("UTI", min_value=0, value=0, key="uti")

new_patient_updated = {
    'Available Extra Rooms in Hospital': available_rooms_upd,
    'Department': department_upd,
    'Ward_Facility_Code': ward_code_upd,
    'doctor_name': doctor_name_upd,
    'staff_available': staff_available_upd,
    'Age': age_group_upd,
    'gender': gender_upd,
    'Type of Admission': admission_type_upd,
    'Severity of Illness': severity_upd,
    'health_conditions': health_conditions_upd,
    'Visitors with Patient': visitors_upd,
    'Insurance': insurance_upd,
    'Septicemia': septicemia,
    'CHF': chf,
    'Pneumonia': pneumonia,
    'COPD_Bronchiectasis': copd,
    'Cardiac_Dysrhythmias': cardiac,
    'Acute_Cerebrovascular_Disease': cerebrovascular,
    'Acute_Renal_Failure': renal_failure,
    'Skin_Infections': skin_infections,
    'UTI': uti
}

if st.button("Predict LOS (Updated Model)"):
    new_patient_updated_df = pd.DataFrame([new_patient_updated])
    predicted_stay_updated = updated_model.predict(new_patient_updated_df)
    st.success(f"Predicted Length of Stay: {predicted_stay_updated[0]:.2f} days")



In [None]:
!streamlit run streamlit_app.py

[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8502[0m
[34m  Network URL: [0m[1mhttp://192.168.0.235:8502[0m
[0m
