# **User Interface to Predict Healthcare Costs**

Import and Load Libraries

In [1]:
import json
import joblib
import pandas as pd
import torch
import numpy as np
!pip install shiny
from shiny import App, ui, render, reactive
import xgboost as xgb
from scipy.sparse import csr_matrix, hstack
import os
!pip install scikit-learn==1.5.2
!pip install scikit-learn==1.6.1
!pip install --upgrade pip
!pip install cudf-cu12 cuml-cu12 --extra-index-url=https://pypi.nvidia.com

# Configure GPU for XGBoost
os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # Use first GPU
print("XGBoost will use GPU for predictions")

Collecting shiny
  Downloading shiny-1.3.0-py3-none-any.whl.metadata (9.2 kB)
Collecting uvicorn>=0.16.0 (from shiny)
  Downloading uvicorn-0.34.0-py3-none-any.whl.metadata (6.5 kB)
Collecting starlette (from shiny)
  Downloading starlette-0.46.1-py3-none-any.whl.metadata (6.2 kB)
Collecting htmltools>=0.6.0 (from shiny)
  Downloading htmltools-0.6.0-py3-none-any.whl.metadata (3.3 kB)
Collecting appdirs>=1.4.4 (from shiny)
  Downloading appdirs-1.4.4-py2.py3-none-any.whl.metadata (9.0 kB)
Collecting asgiref>=3.5.2 (from shiny)
  Downloading asgiref-3.8.1-py3-none-any.whl.metadata (9.3 kB)
Collecting watchfiles>=0.18.0 (from shiny)
  Downloading watchfiles-1.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)
Collecting questionary>=2.0.0 (from shiny)
  Downloading questionary-2.1.0-py3-none-any.whl.metadata (5.4 kB)
Collecting python-multipart>=0.0.7 (from shiny)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Downloading shiny-1.3.

Mount Google Drive

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Import Files

In [3]:
# Base path
base_path = "/content/drive/My Drive/Capstone Project/"

In [4]:
# Load unique values for dropdowns
with open(os.path.join(base_path, "Data/Mappings/unique_values.json"), "r") as f:
    unique_values = json.load(f)

In [5]:
# Load mappings
with open(os.path.join(base_path, "Data/Mappings/age_mapping.json"), "r") as f:
    age_mapping = json.load(f)
# age_mapping_reverse = {v: k for k, v in age_mapping.items()}

with open(os.path.join(base_path, "Data/Mappings/risk_severity_mapping.json"), "r") as f:
    risk_severity_mapping = json.load(f)
risk_severity_mapping_reverse = {v: k for k, v in risk_severity_mapping.items()}

with open(os.path.join(base_path, "Data/Mappings/facility_mapping.json"), "r") as f:
    facility_mapping = json.load(f)

with open(os.path.join(base_path, "Data/Mappings/illness_mapping.json"), "r") as f:
    illness_mapping = json.load(f)

In [6]:
# Load categorical and ordinal columns from text files
with open(os.path.join(base_path, "Data/Features/categorical_columns.json"), "r") as f:
    cols_to_encode = json.load(f)

with open(os.path.join(base_path, "Data/Features/ordinal_columns.json"), "r") as f:
    cols_to_scale = json.load(f)

In [7]:
# Define local feature mapping for UI to know which fields to collect
feature_ids = {}
for feature in cols_to_encode + cols_to_scale:
    feature_ids[feature] = feature.replace(' ', '_').replace('-', '_')

In [8]:
# Load preprocessing tools
encoder = joblib.load(os.path.join(base_path, "Data/Preprocessing Components/encoder.joblib"))
scaler = joblib.load(os.path.join(base_path, "Data/Preprocessing Components/scaler.joblib"))
svd = joblib.load(os.path.join(base_path, "Data/Preprocessing Components/svd.joblib"))

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [9]:
# Load scaling information for Total Charges
total_charges_idx = cols_to_scale.index("Total Charges")
total_charges_mean = scaler.mean_[total_charges_idx]
total_charges_scale = scaler.scale_[total_charges_idx]

In [10]:
# Load the model
model_path = os.path.join(base_path, "Code/Models/XGBoost/xgb_model.joblib")
xgb_model = joblib.load(model_path)

# Check if we can extract the booster
if hasattr(xgb_model, 'get_booster'):
    booster = xgb_model.get_booster()
else:
    booster = xgb_model

Define User Interface

In [11]:
app_ui = ui.page_fluid(
    ui.h1("Healthcare Cost Prediction"),

    ui.row(
        ui.column(3,
            ui.h3("Patient Information"),
            ui.input_select("age_group", "Age Group", choices=list(age_mapping.keys())),
            ui.input_select("gender", "Gender", choices={"M": "Male", "F": "Female", "U": "Do Not Wish to Disclose"}),
            ui.input_select("race", "Race", choices=unique_values.get("Race", [])),
            ui.input_select("ethnicity", "Ethnicity", choices=unique_values.get("Ethnicity", []))
            ),

        ui.column(3,
            ui.h3("Hospital & Admission Details"),
            ui.input_select("facility", "Hospital Facility", choices=list(facility_mapping.values())),
            ui.input_select("hospital_county", "Hospital County", choices=unique_values.get("Hospital County", [])),
            ui.input_select("zip_code", "Zip Code (3 digits)", choices=unique_values.get("Zip Code - 3 digits", [])),
            ui.input_select("admission_type", "Type of Admission", choices=unique_values.get("Type of Admission", [])),
            ui.input_select("emergency_dept", "Emergency Department Indicator", choices={"Y": "Yes", "N": "No"}),
            ui.input_numeric("length_of_stay", "Length of Stay (days)", value=3, min=1, max=120),
            ui.p("Note: For stays longer than 120 days, enter 121", style="color:gray;font-size:small;"),
            ui.input_select("disposition", "Patient Disposition", choices=unique_values.get("Patient Disposition", []))
            ),

          ui.column(3,
            ui.h3("Medical Condition"),
            ui.input_select("mdc_code", "Medical Diagnosis Category", choices=list(illness_mapping.values())),
            ui.input_select("medical_surgical", "Medical or Surgical", choices=unique_values.get("APR Medical Surgical Description", [])),
            ui.input_select("severity", "Severity of Illness", choices=list(risk_severity_mapping.keys())),
            ui.input_select("risk_mortality", "Risk of Mortality", choices=list(risk_severity_mapping.keys()))
            ),

          ui.column(3,
            ui.h3("Payment Information"),
            ui.input_select("payment_1", "Payment Type 1", choices=unique_values.get("Payment Typology 1", [])),
            ui.input_select("payment_2", "Payment Type 2", choices=unique_values.get("Payment Typology 2", [])),
            ui.input_action_button("predict", "Predict Cost", class_="btn-primary")
            ),

        ui.row(
          ui.column(12,
              ui.h2("Predicted Healthcare Costs"),
              ui.output_text_verbatim("prediction_result"),
                                style="background-color:#f8f9fa; padding:20px; font-size:24px; font-weight:bold; text-align:center; border-radius:10px; border: 2px solid #007bff;")
              )
        )
)

Define Server

In [12]:
def server(input, output, session):

    @reactive.Calc
    def process_input_data():
        # Create a single row dataframe with user inputs
        input_data = pd.DataFrame({
            "Age Group": [input.age_group()],
            "Gender": [input.gender()],
            "Race": [input.race()],
            "Ethnicity": [input.ethnicity()],
            "Hospital County": [input.hospital_county()],
            "Permanent Facility Id": [input.facility()],  # Get ID from name
            "Zip Code - 3 digits": [input.zip_code()],
            "Type of Admission": [input.admission_type()],
            "Patient Disposition": [input.disposition()],
            "Length of Stay": [input.length_of_stay()],
            "APR MDC Code": [input.mdc_code()],
            "APR Medical Surgical Description": [input.medical_surgical()],
            "APR Severity of Illness Description": [input.severity()],
            "APR Risk of Mortality": [input.risk_mortality()],
            "Payment Typology 1": [input.payment_1()],
            "Payment Typology 2": [input.payment_2()],
            "Emergency Department Indicator": [input.emergency_dept()]
        })

        # Print the input data for debugging
        print("Input data:")
        print(input_data)

        return input_data

    @reactive.Calc
    def preprocess_data():
        try:
            # Get input data
            input_df = process_input_data()

            # Create ordinal features dataframe
            ordinal_df = pd.DataFrame()
            ordinal_df['Age Group'] = input_df['Age Group'].map(age_mapping)
            ordinal_df["Length of Stay"] = input_df["Length of Stay"]
            ordinal_df['Severity of Illness'] = input_df['APR Severity of Illness Description'].map(risk_severity_mapping)
            ordinal_df['Risk of Mortality'] = input_df['APR Risk of Mortality'].map(risk_severity_mapping)
            # Set Total Charges to 0 since it's the target
            ordinal_df["Total Charges"] = 0

            # Scale ordinal features
            scaled_ordinal = scaler.transform(ordinal_df)
            scaled_ordinal_df = pd.DataFrame(scaled_ordinal, columns=ordinal_df.columns)

            # Convert to string to match the encoder's expectations
            for col in cols_to_encode:
                input_df[col] = input_df[col].astype(str)

            # Handle unknown categories
            # For all facilities not in the training data, use a default value
            for i, col in enumerate(cols_to_encode):
                # Get all known categories for this column from the encoder
                known_categories = set(encoder.categories_[i])
                if not input_df[col].iloc[0] in known_categories:
                    print(f"Unknown category {input_df[col].iloc[0]} in column {col}. Using default value.")
                    # Use the most common category from training data for this column
                    default_value = encoder.categories_[i][0]
                    input_df[col] = default_value

            # Create sparse matrix for Emergency Department Indicator
            emergency_indicator = 1 if input_df["Emergency Department Indicator"].iloc[0] == "Y" else 0
            sparse_emergency_indicator = csr_matrix([emergency_indicator]).T

            # One-hot encode categorical features
            sparse_encoded_features = encoder.transform(input_df[cols_to_encode])

            # Combine sparse matrices
            one_hot_sparse_df = hstack([sparse_emergency_indicator, sparse_encoded_features])
            one_hot_sparse_df = one_hot_sparse_df.tocsr()

            # Apply SVD transformation
            transformed_one_hot = svd.transform(one_hot_sparse_df)

            # Convert to DataFrame
            transformed_one_hot_df = pd.DataFrame(
                transformed_one_hot,
                columns=[f'SVD_Component_{i+1}' for i in range(transformed_one_hot.shape[1])]
            )

            # Combine all features
            final_df = pd.concat([scaled_ordinal_df.drop("Total Charges", axis=1), transformed_one_hot_df], axis=1)

            return final_df
        except Exception as e:
            print(f"Error in preprocessing: {e}")
            raise e

    @reactive.Effect
    @reactive.event(input.predict)
    def predict_cost():
        # Only run when the predict button is clicked
        pass

    @output
    @render.text
    @reactive.event(input.predict)
    def prediction_result():
        try:
            # Preprocess the data
            processed_data = preprocess_data()

            # Create DMatrix for GPU prediction
            dtest = xgb.DMatrix(processed_data)

            # Make prediction with GPU
            if hasattr(xgb_model, 'predict'):
                scaled_prediction = float(xgb_model.predict(dtest, output_margin=False)[0])
            else:
                scaled_prediction = float(booster.predict(dtest)[0])

            # Unscale the prediction to get the actual dollar amount
            # The StandardScaler formula is: z = (x - mean) / std
            # So to reverse: x = z * std + mean
            unscaled_prediction = scaled_prediction * total_charges_scale + total_charges_mean

            # Print debug info
            print(f"Raw prediction from model: {scaled_prediction}")
            print(f"Unscaled prediction: {unscaled_prediction}")

            # Format prediction for display
            result = f"Predicted Cost: ${unscaled_prediction:,.2f}"
            print(f"Prediction successful: {result}")
            return result
        except Exception as e:
            error_message = f"Error during prediction: {str(e)}"
            print(error_message)
            return error_message

Run App

In [13]:
!curl -s https://loca.lt/mytunnelpassword

35.225.156.6

In [14]:
# Create and run the app
app = App(app_ui, server)

from threading import Thread
import socket
import time
import subprocess

# Function to find an available port
def find_available_port(start_port=8501):
    port = start_port
    while True:
        try:
            with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
                s.bind(('', port))  # Bind to all interfaces
                return port
        except OSError:
            port += 1
            if port > start_port + 100:  # Avoid infinite loop
                raise RuntimeError("Could not find an available port")

# Find an available port
available_port = find_available_port(8501)
print(f"Found available port: {available_port}")

def run_app_in_thread(app, port):
    app.run(host="0.0.0.0", port=port)  # Bind to all interfaces

# Create a thread for the app
app_thread = Thread(target=run_app_in_thread, args=(app, available_port))
app_thread.daemon = True
app_thread.start()

# Give the app a moment to start
time.sleep(3)

# Install localtunnel
!npm install -g localtunnel

# Now start localtunnel
print("Starting localtunnel...")
!lt --port {available_port}

INFO:     Started server process [3341]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8501 (Press CTRL+C to quit)


Found available port: 8501
[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K
added 22 packages in 2s
[1G[0K⠸[1G[0K
[1G[0K⠸[1G[0K3 packages are looking for funding
[1G[0K⠸[1G[0K  run `npm fund` for details
[1G[0K⠸[1G[0K[1mnpm[22m [96mnotice[39m
[1mnpm[22m [96mnotice[39m New [31mmajor[39m version of npm available! [31m10.8.2[39m -> [34m11.2.0[39m
[1mnpm[22m [96mnotice[39m Changelog: [34mhttps://github.com/npm/cli/releases/tag/v11.2.0[39m
[1mnpm[22m [96mnotice[39m To update run: [4mnpm install -g npm@11.2.0[24m
[1mnpm[22m [96mnotice[39m
[1G[0K⠸[1G[0KStarting localtunnel...
your url is: https://loud-pears-deny.loca.lt
INFO:     35.224.32.43:0 - "GET /robots.txt HTTP/1.1" 404 Not Found
INFO:     75.102.217.226:0 - "GET / HTTP/1.1" 200 OK
INFO:     75.102.217.226:0 - "GET /lib/requirejs-2.3.6/require.min.js HTTP/1.1" 200 OK
INFO:     75.102.217.226:0 - "GET /lib/shiny

INFO:     ('75.102.217.226', 0) - "WebSocket /websocket/" [accepted]
INFO:     connection open


INFO:     75.102.217.226:0 - "GET /favicon.ico HTTP/1.1" 404 Not Found
INFO:     75.102.217.226:0 - "GET /lib/requirejs-2.3.6/require.min.js HTTP/1.1" 200 OK
INFO:     75.102.217.226:0 - "GET /lib/shiny-1.3.0/shiny.js HTTP/1.1" 200 OK
INFO:     75.102.217.226:0 - "GET /lib/bootstrap-5.3.1/bootstrap.bundle.min.js HTTP/1.1" 200 OK
INFO:     75.102.217.226:0 - "GET /lib/jquery-3.6.0/jquery-3.6.0.min.js HTTP/1.1" 200 OK
INFO:     75.102.217.226:0 - "GET /lib/bslib-components-0.9.0.9000/components.min.js HTTP/1.1" 200 OK
INFO:     75.102.217.226:0 - "GET /lib/ionrangeslider-2.3.1/js/ion.rangeSlider.min.js HTTP/1.1" 200 OK
INFO:     75.102.217.226:0 - "GET /lib/selectize-0.12.6/js/selectize.min.js HTTP/1.1" 200 OK
INFO:     75.102.217.226:0 - "GET /lib/strftime-0.9.2/strftime-min.js HTTP/1.1" 200 OK
INFO:     75.102.217.226:0 - "GET /lib/bootstrap-datepicker-1.9.0/js/bootstrap-datepicker.min.js HTTP/1.1" 200 OK
INFO:     75.102.217.226:0 - "GET /lib/selectize-0.12.6/accessibility/js/selectiz

INFO:     connection closed


^C
