In [29]:
import gradio as gr
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
import joblib
import logging
from sklearn.preprocessing import StandardScaler

# Setup logging
logging.basicConfig(filename='app.log', level=logging.INFO)

# Load and split your data
training_data_path = 'C:\\Users\\Kevin\\Documents\\DAT158\\training_data.csv'
training_data = pd.read_csv(training_data_path)
training_data['gender'] = training_data['gender'].replace({'M': 0, 'F': 1})
training_data['facid'] = training_data['facid'].replace({'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4})
pd.set_option('display.max_columns', None)
training_data = training_data.dropna()
training_data.head()

#Drop the 'date' column from the training data
if 'vdate' in training_data.columns:
    training_data.drop(columns=['vdate'], inplace=True)
X = training_data.drop(columns=['lengthofstay'])
y = training_data['lengthofstay'] 

    # Split data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
X_train.head() 
     
    # 
scaler = StandardScaler()
scaler.fit(X_train)
#X_train['secondarydiagnosisnonicd9'] = scaler.fit_transform(X_train[['secondarydiagnosisnonicd9']])
#X_val['secondarydiagnosisnonicd9'] = scaler.transform(X_val[['secondarydiagnosisnonicd9']])

    # Train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
    
    # Validate your model
val_score = model.score(X_val, y_val)
logging.info(f'Validation score: {val_score}')
print(val_score)
        
    # Save the model & scaler
joblib.dump(model, 'C:\\Users\\Kevin\\Documents\\DAT158\\trained_model.joblib') 
joblib.dump(scaler, 'scaler.pkl')
    
    
def predict_length_of_stay(age, gender, diagnoses):
    try:
        # Load model & scaler
        model = joblib.load('C:\\Users\\Kevin\\Documents\\DAT158\\trained_model.joblib')
        scaler = joblib.load('scaler.pkl')
        
        # Basic input preprocessing
        gender_numeric = 1 if gender == "Male" else 0
        num_diagnoses = len(diagnoses.split(","))
        age = scaler.transform([[age]])[0, 0]
        input_feature = np.array([age, gender_numeric, num_diagnoses]).reshape(1, -1)
        
        # Predict & log
        predicted_length_of_stay = model.predict(input_feature)[0]
        logging.info(f'Input: {[age, gender, diagnoses]}, Prediction: {predicted_length_of_stay}')
        
        return f"Predicted length of stay: {round(predicted_length_of_stay, 2)} days"
    except Exception as e:
        logging.error(f'Error: {str(e)}, Input: {[age, gender, diagnoses]}')
        return f"Error: {str(e)}"
   
# Gradio interface 
iface = gr.Interface(
    fn=predict_length_of_stay,
    inputs=[
        gr.Number(label="Age"),
        gr.Radio(["Male", "Female"], label="Gender"),
        gr.Textbox(label="Diagnoses (comma-separated)"),
    ],
    outputs=gr.Textbox(label="Predicted length of stay (days)"),
    title="Hospital Length of Stay Predictor",
    description="Enter patient data to predict their length of stay in the hospital."
)

iface.launch()


0.9204895233115795
Running on local URL:  http://127.0.0.1:7869

To create a public link, set `share=True` in `launch()`.


