In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import lightgbm as lgb
from lightgbm import early_stopping, log_evaluation

data_path = r"C:\Program Files\Ampps\www\Length of Stay prediction\healthcare\surgery_length_of_stay.csv"
df = pd.read_csv(data_path)
df.columns = df.columns.str.replace(' ', '_').str.replace('(', '').str.replace(')', '')


features = ['Gender', 'Type_of_Surgery', 'Chronic_Disease', 'Smoker', 'Recommended_Medication', 'Age']
X = df[features]
y = df['Length_of_Stay_Days']


categorical_cols = ['Gender', 'Type_of_Surgery', 'Chronic_Disease', 'Smoker', 'Recommended_Medication']
ohe = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
X_encoded = pd.DataFrame(ohe.fit_transform(X[categorical_cols]), columns=ohe.get_feature_names_out(categorical_cols))
X_numeric = X[['Age']].reset_index(drop=True)
X_final = pd.concat([X_numeric, X_encoded], axis=1)


kf = KFold(n_splits=5, shuffle=True, random_state=42)

mae_scores = []
rmse_scores = []
r2_scores = []

for fold, (train_idx, val_idx) in enumerate(kf.split(X_final)):
    print(f"----- Fold {fold + 1} -----")
    X_train, X_val = X_final.iloc[train_idx], X_final.iloc[val_idx]
    y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

    train_data = lgb.Dataset(X_train, label=y_train)
    val_data = lgb.Dataset(X_val, label=y_val, reference=train_data)

    params = {
        'objective': 'regression',
        'metric': 'rmse',
        'learning_rate': 0.03,
        'num_leaves': 64,
        'max_depth': 10,
        'min_data_in_leaf': 30,
        'subsample': 0.9,
        'colsample_bytree': 0.8,
        'reg_alpha': 0.1,
        'reg_lambda': 0.1,
        'verbose': -1,
        'seed': 42
    }

    model = lgb.train(
        params,
        train_data,
        num_boost_round=2000,
        valid_sets=[train_data, val_data],
        callbacks=[early_stopping(stopping_rounds=100), log_evaluation(period=100)]
    )

    y_pred = model.predict(X_val, num_iteration=model.best_iteration)
    mae = mean_absolute_error(y_val, y_pred)
    rmse = np.sqrt(mean_squared_error(y_val, y_pred))
    r2 = r2_score(y_val, y_pred)

    print(f"MAE: {mae:.3f} | RMSE: {rmse:.3f} | R²: {r2:.3f}")

    mae_scores.append(mae)
    rmse_scores.append(rmse)
    r2_scores.append(r2)

print("\n=== Cross-Validation Results ===")
print(f"Average MAE: {np.mean(mae_scores):.3f} ± {np.std(mae_scores):.3f}")
print(f"Average RMSE: {np.mean(rmse_scores):.3f} ± {np.std(rmse_scores):.3f}")
print(f"Average R²: {np.mean(r2_scores):.3f} ± {np.std(r2_scores):.3f}")


----- Fold 1 -----
Training until validation scores don't improve for 100 rounds
[100]	training's rmse: 3.02916	valid_1's rmse: 3.20518
Early stopping, best iteration is:
[57]	training's rmse: 3.14009	valid_1's rmse: 3.15522
MAE: 2.666 | RMSE: 3.155 | R²: 0.219
----- Fold 2 -----
Training until validation scores don't improve for 100 rounds
[100]	training's rmse: 2.9641	valid_1's rmse: 3.51165
Early stopping, best iteration is:
[61]	training's rmse: 3.042	valid_1's rmse: 3.4954
MAE: 2.883 | RMSE: 3.495 | R²: 0.237
----- Fold 3 -----
Training until validation scores don't improve for 100 rounds
[100]	training's rmse: 3.06703	valid_1's rmse: 3.10392
[200]	training's rmse: 2.93048	valid_1's rmse: 3.11779
Early stopping, best iteration is:
[111]	training's rmse: 3.05123	valid_1's rmse: 3.09788
MAE: 2.561 | RMSE: 3.098 | R²: 0.283
----- Fold 4 -----
Training until validation scores don't improve for 100 rounds
[100]	training's rmse: 2.95936	valid_1's rmse: 3.49695
Early stopping, best itera

In [2]:
import pandas as pd

data_path = r"C:\Program Files\Ampps\www\Length of Stay prediction\healthcare\surgery_length_of_stay.csv"
df = pd.read_csv(data_path)


print("Unique Surgery Types:")
print(df['Type of Surgery'].unique())

print("\nUnique Recommended Medications:")
print(df['Recommended Medication'].unique())


Unique Surgery Types:
['Spinal Fusion' 'Heart Valve Repair' 'Kidney Transplant'
 'Knee Replacement' 'Liver Transplant' 'Appendectomy' 'Hysterectomy'
 'Lung Resection' 'Cataract Surgery' 'Gallbladder Removal'
 'Hip Replacement' 'Coronary Artery Bypass']

Unique Recommended Medications:
['Albuterol' 'Atorvastatin' 'Pain Management' 'Insulin' 'Budesonide'
 'Paracetamol' 'Furosemide' 'Metformin' 'Lisinopril' 'Amlodipine'
 'Erythropoietin' 'Chemotherapy' 'Ibuprofen' 'Aspirin']


In [3]:
import gradio as gr
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.preprocessing import OneHotEncoder
import itertools


model = lgb.Booster(model_file='model.txt')

categorical_cols = ['Gender', 'Type_of_Surgery', 'Chronic_Disease', 'Smoker', 'Recommended_Medication']

gender_options = ['Male', 'Female']

surgery_options = [
    'Spinal Fusion', 'Heart Valve Repair', 'Kidney Transplant',
    'Knee Replacement', 'Liver Transplant', 'Appendectomy',
    'Hysterectomy', 'Lung Resection', 'Cataract Surgery',
    'Gallbladder Removal', 'Hip Replacement', 'Coronary Artery Bypass'
]

chronic_options = ['Yes', 'No']
smoker_options = ['Yes', 'No']

medication_options = [
    'Albuterol', 'Atorvastatin', 'Pain Management', 'Insulin',
    'Budesonide', 'Paracetamol', 'Furosemide', 'Metformin',
    'Lisinopril', 'Amlodipine', 'Erythropoietin', 'Chemotherapy',
    'Ibuprofen', 'Aspirin'
]


dummy_data = pd.DataFrame(list(itertools.product(
    gender_options,
    surgery_options,
    chronic_options,
    smoker_options,
    medication_options
)), columns=categorical_cols)

ohe = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
ohe.fit(dummy_data)

def predict_length_of_stay(Gender, Type_of_Surgery, Chronic_Disease, Smoker, Recommended_Medication, Age):
    input_df = pd.DataFrame([[Gender, Type_of_Surgery, Chronic_Disease, Smoker, Recommended_Medication]],
                            columns=categorical_cols)
    encoded = ohe.transform(input_df)
    encoded_df = pd.DataFrame(encoded, columns=ohe.get_feature_names_out(categorical_cols))
    input_final = pd.concat([pd.DataFrame([Age], columns=['Age']), encoded_df], axis=1)
    prediction = model.predict(input_final)[0]
    return round(prediction, 2)

iface = gr.Interface(
    fn=predict_length_of_stay,
    inputs=[
        gr.Dropdown(choices=gender_options, label="Gender"),
        gr.Dropdown(choices=surgery_options, label="Type of Surgery"),
        gr.Dropdown(choices=chronic_options, label="Chronic Disease"),
        gr.Dropdown(choices=smoker_options, label="Smoker"),
        gr.Dropdown(choices=medication_options, label="Recommended Medication"),
        gr.Number(label="Age", value=50)
    ],
    outputs=gr.Number(label="Predicted Length of Stay (Days)"),
    title="Length of Stay Prediction",
    description="Enter patient details to predict length of hospital stay."
)

iface.launch()


* Running on local URL:  http://127.0.0.1:7871
* To create a public link, set `share=True` in `launch()`.


