In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Load data
data = pd.read_csv('/Users/bradlarson/Desktop/CAS-502/Hospital-LOS.csv')

# Define target and features
X = data.drop(columns=['Stay (in days)', 'patientid'])
y = data['Stay (in days)']

# Preprocess data
categorical_cols = X.select_dtypes(include=['object']).columns
numeric_cols = X.select_dtypes(include=['number']).columns

preprocessor = ColumnTransformer([
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols),
    ('num', StandardScaler(), numeric_cols)
])

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build pipeline
model = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(random_state=42))
])

# Train model
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
print("MAE:", mean_absolute_error(y_test, y_pred))
print("MSE:", mean_squared_error(y_test, y_pred))
print("R² Score:", r2_score(y_test, y_pred))


MAE: 1.2405153187399511
MSE: 3.4202028539614244
R² Score: 0.9447086063740399


In [2]:
new_patient = {
    'Available Extra Rooms in Hospital': 3,
    'Department': 'gynecology',
    'Ward_Facility_Code': 'C',
    'doctor_name': 'Dr. Oliva',
    'staff_available': 15,
    'Age': '31-40',
    'gender': 'Female',
    'Type of Admission': 'Emergency',
    'Severity of Illness': 'Extreme',
    'health_conditions': 'diabetes',
    'Visitors with Patient': 2,
    'Insurance': 'yes'
}


In [3]:
# Convert new patient data to DataFrame
new_patient_df = pd.DataFrame([new_patient])

# Predict length of stay
predicted_stay = model.predict(new_patient_df)

print("Predicted Length of Stay (in days):", predicted_stay[0])


Predicted Length of Stay (in days): 8.032666666666666


In [4]:
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
from model import predict_study_length  # Example function from your model file

st.title("Length of Study Prediction Dashboard")

uploaded_file = st.file_uploader("Upload a CSV", type=["csv"])
if uploaded_file:
    df = pd.read_csv(uploaded_file)
    st.write(df.head())

    # Make predictions
    predictions = predict_study_length(df)  # This should be a function from model.py
    df['Predictions'] = predictions
    st.write(df)

    # Visualization example
    st.line_chart(df['Predictions'])


2025-02-09 16:26:11.244 
  command:

    streamlit run /opt/anaconda3/lib/python3.12/site-packages/ipykernel_launcher.py [ARGUMENTS]


In [11]:
pip install 

SyntaxError: invalid syntax (3737097518.py, line 1)