# Health Risk Early Warning System (HREWS)
## Model Training and Application

This notebook contains the complete code from `hrews_model.py` and `app.py` for training the model and running the application.


In [None]:
# Install required packages
%pip install pandas>=2.0.0 numpy>=1.24.0 scikit-learn>=1.3.0 xgboost>=2.0.0
%pip install matplotlib>=3.7.0 seaborn>=0.12.0 plotly>=5.17.0
%pip install joblib>=1.3.0 scipy>=1.11.0
%pip install shap>=0.42.1 lime>=0.2.0.1
%pip install streamlit>=1.28.0


## Import Libraries


In [None]:
#!/usr/bin/env python3
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix, classification_report
import xgboost as xgb
import joblib
import warnings
warnings.filterwarnings('ignore')

# Optional explainability libraries
try:
    import shap
except Exception:
    shap = None

try:
    from lime.lime_tabular import LimeTabularExplainer
except Exception:
    LimeTabularExplainer = None

print("All libraries imported successfully!")


## HealthRiskPredictor Class


In [None]:
class HealthRiskPredictor:
    def __init__(self):
        self.models = {}
        self.scaler = StandardScaler()
        self.label_encoder = LabelEncoder()
        self.best_model = None
        self.feature_names = None
        
    def load_data(self, file_path):
        """Load and preprocess the health risk dataset"""
        print("Loading dataset...")
        self.data = pd.read_csv(file_path)
        print(f"Dataset loaded: {self.data.shape[0]} patients, {self.data.shape[1]} features")
        return self.data
    
    def preprocess_data(self):
        """Preprocess the data for machine learning"""
        print("Preprocessing data...")
        
        # Create a copy for preprocessing
        df = self.data.copy()
        
        # Handle categorical variables - convert to numeric for XGBoost compatibility
        df['Consciousness'] = df['Consciousness'].astype('category')
        df['On_Oxygen'] = df['On_Oxygen'].astype(int)  # Convert to int instead of category
        
        # One-hot encode consciousness
        consciousness_dummies = pd.get_dummies(df['Consciousness'], prefix='Consciousness')
        df = pd.concat([df, consciousness_dummies], axis=1)
        df.drop('Consciousness', axis=1, inplace=True)
        
        # Encode target variable
        df['Risk_Level_Encoded'] = self.label_encoder.fit_transform(df['Risk_Level'])
        
        # Select features for modeling
        feature_columns = ['Respiratory_Rate', 'Oxygen_Saturation', 'O2_Scale', 
                          'Systolic_BP', 'Heart_Rate', 'Temperature', 'On_Oxygen'] + \
                         [col for col in df.columns if col.startswith('Consciousness_')]
        
        self.feature_names = feature_columns
        X = df[feature_columns]
        y = df['Risk_Level_Encoded']
        
        # Split the data
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42, stratify=y
        )
        
        # Scale the features
        X_train_scaled = self.scaler.fit_transform(X_train)
        X_test_scaled = self.scaler.transform(X_test)
        
        self.X_train = X_train
        self.X_test = X_test
        self.y_train = y_train
        self.y_test = y_test
        self.X_train_scaled = X_train_scaled
        self.X_test_scaled = X_test_scaled
        
        print(f"Data preprocessed. Training set: {X_train.shape}, Test set: {X_test.shape}")
        print(f"Feature names: {self.feature_names}")
        
        return X_train_scaled, X_test_scaled, y_train, y_test
    
    def train_models(self):
        """Train multiple machine learning models"""
        print("Training models...")
        
        # Initialize models
        self.models = {
            'Logistic Regression': LogisticRegression(random_state=42, max_iter=1000),
            'Random Forest': RandomForestClassifier(random_state=42, n_estimators=100),
            'SVM': SVC(random_state=42, probability=True),
            'XGBoost': xgb.XGBClassifier(random_state=42, eval_metric='mlogloss')
        }
        
        # Train and evaluate each model
        results = {}
        for name, model in self.models.items():
            print(f"Training {name}...")
            
            if name in ['Logistic Regression', 'SVM']:
                model.fit(self.X_train_scaled, self.y_train)
                y_pred = model.predict(self.X_test_scaled)
                y_pred_proba = model.predict_proba(self.X_test_scaled) if hasattr(model, 'predict_proba') else None
            else:
                model.fit(self.X_train, self.y_train)
                y_pred = model.predict(self.X_test)
                y_pred_proba = model.predict_proba(self.X_test)
            
            # Calculate metrics
            accuracy = accuracy_score(self.y_test, y_pred)
            precision, recall, f1, _ = precision_recall_fscore_support(self.y_test, y_pred, average='weighted')
            
            results[name] = {
                'model': model,
                'accuracy': accuracy,
                'precision': precision,
                'recall': recall,
                'f1': f1,
                'predictions': y_pred,
                'probabilities': y_pred_proba
            }
            
            print(f"{name} - Accuracy: {accuracy:.4f}, F1: {f1:.4f}")
        
        # Find best model
        best_model_name = max(results.keys(), key=lambda k: results[k]['f1'])
        self.best_model = results[best_model_name]['model']
        
        print(f"\nBest model: {best_model_name}")
        print(f"Best F1 Score: {results[best_model_name]['f1']:.4f}")
        
        return results
    
    def evaluate_best_model(self):
        """Evaluate the best performing model in detail"""
        if self.best_model is None:
            print("No best model selected. Please train models first.")
            return
        
        print("\nDetailed evaluation of best model:")
        
        # Make predictions
        if isinstance(self.best_model, (LogisticRegression, SVC)):
            y_pred = self.best_model.predict(self.X_test_scaled)
            y_pred_proba = self.best_model.predict_proba(self.X_test_scaled) if hasattr(self.best_model, 'predict_proba') else None
        else:
            y_pred = self.best_model.predict(self.X_test)
            y_pred_proba = self.best_model.predict_proba(self.X_test)
        
        # Classification report
        print("\nClassification Report:")
        print(classification_report(self.y_test, y_pred, 
                                  target_names=self.label_encoder.classes_))
        
        # Confusion matrix
        cm = confusion_matrix(self.y_test, y_pred)
        print(f"\nConfusion Matrix:\n{cm}")
        
        return y_pred, y_pred_proba
    
    def explain_shap(self, data_row=None, nsamples=100):
        """Return SHAP explanations for a single data row.

        Returns a dict mapping feature name -> SHAP value for the predicted class.
        """
        if shap is None:
            raise ImportError("shap is not installed. Install with `pip install shap`")

        if self.best_model is None:
            raise ValueError("No trained model available. Train or load a model first.")

        # Choose background dataset (use a small sample for speed)
        try:
            background = (self.X_train if hasattr(self, 'X_train') else None)
        except Exception:
            background = None

        if data_row is None:
            # Use first test row by default
            if hasattr(self, 'X_test') and len(self.X_test) > 0:
                instance = self.X_test.iloc[0].values.reshape(1, -1)
            elif hasattr(self, 'X_test_scaled') and len(self.X_test_scaled) > 0:
                instance = self.X_test_scaled[0].reshape(1, -1)
            else:
                raise ValueError("No data available to explain. Provide `data_row`.")
        else:
            # Accept either dict of feature->value or array-like
            if isinstance(data_row, dict):
                instance = np.array([data_row[f] for f in self.feature_names], dtype=float).reshape(1, -1)
            else:
                instance = np.array(data_row, dtype=float).reshape(1, -1)

        # Decide whether model expects scaled input
        use_scaled = isinstance(self.best_model, (LogisticRegression, SVC))
        if use_scaled:
            background_data = (self.X_train_scaled if hasattr(self, 'X_train_scaled') else background)
            instance_for_expl = (self.scaler.transform(instance) if hasattr(self, 'scaler') else instance)
        else:
            background_data = (self.X_train if hasattr(self, 'X_train') else None)
            instance_for_expl = instance

        # Fallback: sample small background for KernelExplainer if needed
        if background_data is None:
            raise ValueError("No background data available for SHAP explanation")

        # Use the new shap.Explainer which adapts to model type (Tree/Linear/Kernel)
        try:
            bg_sample = background_data if isinstance(background_data, np.ndarray) else np.array(background_data[:min(100, len(background_data))])
            # Use LinearExplainer for linear models for more consistent output
            if isinstance(self.best_model, LogisticRegression) or hasattr(self.best_model, 'coef_'):
                explainer = shap.LinearExplainer(self.best_model, bg_sample, feature_perturbation='interventional')
                shap_exp = explainer(instance_for_expl)
            else:
                explainer = shap.Explainer(self.best_model, bg_sample)
                shap_exp = explainer(instance_for_expl)
        except Exception as e:
            raise RuntimeError(f"Failed to compute SHAP values: {e}")

        # shap_exp may contain .values with different shapes depending on model/multiclass
        try:
            vals = getattr(shap_exp, 'values', None)
            if vals is None:
                # fallback to shap_values being a list
                shap_values = shap_exp
                if isinstance(shap_values, list):
                    pred = np.argmax(self.best_model.predict_proba(instance_for_expl)[0])
                    values = shap_values[pred][0]
                else:
                    values = np.array(shap_values).flatten()
            else:
                vals = np.array(vals)
                # Possible shapes:
                #  - (n_instances, n_features)
                #  - (n_classes, n_instances, n_features)
                #  - (n_instances, n_features, n_classes)
                pred = np.argmax(self.best_model.predict_proba(instance_for_expl)[0])
                if vals.ndim == 3:
                    # try detect class axis
                    n_classes = len(self.label_encoder.classes_) if hasattr(self, 'label_encoder') else None
                    # case: (n_classes, n_instances, n_features)
                    if n_classes is not None and vals.shape[0] == n_classes:
                        values = vals[pred][0]
                    # case: (n_instances, n_features, n_classes)
                    elif n_classes is not None and vals.shape[2] == n_classes:
                        values = vals[0, :, pred]
                    else:
                        # fallback: pick first instance and try to reduce
                        try:
                            values = vals[0].flatten()
                        except Exception:
                            values = vals.flatten()
                elif vals.ndim == 2:
                    # (n_instances, n_features)
                    values = vals[0]
                else:
                    values = vals.flatten()
        except Exception as e:
            raise RuntimeError(f"Failed to parse SHAP values: {e}")

        return dict(zip(self.feature_names, [float(v) for v in values]))

    def explain_lime(self, data_row=None, num_features=10):
        """Return LIME explanations (list of (feature, contribution))."""
        if LimeTabularExplainer is None:
            raise ImportError("lime is not installed. Install with `pip install lime`")

        if self.best_model is None:
            raise ValueError("No trained model available. Train or load a model first.")

        if data_row is None:
            if hasattr(self, 'X_test') and len(self.X_test) > 0:
                instance = self.X_test.iloc[0].values
            elif hasattr(self, 'X_test_scaled') and len(self.X_test_scaled) > 0:
                instance = self.X_test_scaled[0]
            else:
                raise ValueError("No data available to explain. Provide `data_row`.")
        else:
            if isinstance(data_row, dict):
                instance = np.array([data_row[f] for f in self.feature_names], dtype=float)
            else:
                instance = np.array(data_row, dtype=float)

        # LIME explanation expects the training data used for the model
        use_scaled = isinstance(self.best_model, (LogisticRegression, SVC))
        train_data = (self.X_train_scaled if use_scaled and hasattr(self, 'X_train_scaled') else self.X_train)
        if train_data is None:
            raise ValueError("No training data available for LIME explainer")

        explainer = LimeTabularExplainer(
            training_data=np.array(train_data),
            feature_names=self.feature_names,
            class_names=list(self.label_encoder.classes_),
            discretize_continuous=True
        )

        try:
            exp = explainer.explain_instance(instance, self.best_model.predict_proba, num_features=num_features)
        except Exception as e:
            raise RuntimeError(f"Failed to compute LIME explanation: {e}")

        # Return list of (feature, contribution) for the top features
        return exp.as_list()
    
    def predict_risk(self, patient_data):
        """Predict risk level for new patient data"""
        if self.best_model is None:
            print("No trained model available.")
            return None
        
        # Preprocess patient data
        processed_data = self.preprocess_patient_data(patient_data)
        
        # Make prediction
        if isinstance(self.best_model, (LogisticRegression, SVC)):
            risk_proba = self.best_model.predict_proba(processed_data.reshape(1, -1))[0]
        else:
            risk_proba = self.best_model.predict_proba(processed_data.reshape(1, -1))[0]
        
        risk_level = self.best_model.predict(processed_data.reshape(1, -1))[0]
        risk_level_name = self.label_encoder.inverse_transform([risk_level])[0]
        
        # Create risk breakdown
        risk_breakdown = {
            'predicted_risk': risk_level_name,
            'probabilities': dict(zip(self.label_encoder.classes_, risk_proba)),
            'escalation_probability': self.calculate_escalation_probability(risk_proba)
        }
        
        return risk_breakdown
    
    def preprocess_patient_data(self, patient_data):
        """Preprocess single patient data for prediction"""
        # Extract features in the same order as training
        features = []
        
        # Continuous features
        features.extend([
            float(patient_data['Respiratory_Rate']),
            float(patient_data['Oxygen_Saturation']),
            float(patient_data['O2_Scale']),
            float(patient_data['Systolic_BP']),
            float(patient_data['Heart_Rate']),
            float(patient_data['Temperature']),
            int(patient_data['On_Oxygen'])  # Ensure it's an integer
        ])
        
        # Consciousness one-hot encoding
        consciousness = patient_data['Consciousness']
        for level in ['A', 'P', 'C', 'V', 'U']:
            features.append(1 if consciousness == level else 0)
        
        features = np.array(features, dtype=float)
        
        # Scale if using scaled models
        if isinstance(self.best_model, (LogisticRegression, SVC)):
            features = self.scaler.transform(features.reshape(1, -1)).flatten()
        
        return features
    
    def calculate_escalation_probability(self, risk_proba):
        """Calculate probability of escalation from current risk level"""
        # This is a simplified calculation - in practice, you might want more sophisticated logic
        normal_prob = risk_proba[0] if len(risk_proba) > 0 else 0
        low_prob = risk_proba[1] if len(risk_proba) > 1 else 0
        medium_prob = risk_proba[2] if len(risk_proba) > 2 else 0
        high_prob = risk_proba[3] if len(risk_proba) > 3 else 0
        
        # Probability of being at medium or high risk
        escalation_prob = medium_prob + high_prob
        
        return {
            'normal_to_high': normal_prob * high_prob,
            'low_to_high': low_prob * high_prob,
            'medium_to_high': medium_prob * high_prob,
            'overall_escalation': escalation_prob
        }
    
    def get_feature_importance(self):
        """Get feature importance from the best model"""
        if self.best_model is None:
            return None
        
        if hasattr(self.best_model, 'feature_importances_'):
            importance = self.best_model.feature_importances_
        elif hasattr(self.best_model, 'coef_'):
            importance = np.abs(self.best_model.coef_[0])
        else:
            return None
        
        feature_importance = dict(zip(self.feature_names, importance))
        return dict(sorted(feature_importance.items(), key=lambda x: x[1], reverse=True))
    
    def save_model(self, filepath):
        """Save the trained model and preprocessing objects"""
        model_data = {
            'best_model': self.best_model,
            'scaler': self.scaler,
            'label_encoder': self.label_encoder,
            'feature_names': self.feature_names
        }
        joblib.dump(model_data, filepath)
        print(f"Model saved to {filepath}")
    
    def load_model(self, filepath):
        """Load a previously trained model"""
        model_data = joblib.load(filepath)
        self.best_model = model_data['best_model']
        self.scaler = model_data['scaler']
        self.label_encoder = model_data['label_encoder']
        self.feature_names = model_data['feature_names']
        print(f"Model loaded from {filepath}")

print("HealthRiskPredictor class defined successfully!")


## Model Training


In [None]:
# Initialize the predictor
predictor = HealthRiskPredictor()

# Load data (upload Health_Risk_Dataset.csv to Colab first)
# For Colab: Go to Files -> Upload and upload the CSV file
data = predictor.load_data('Health_Risk_Dataset.csv')


In [None]:
# Preprocess data
predictor.preprocess_data()


In [None]:
# Train models
results = predictor.train_models()


In [None]:
# Evaluate best model
predictor.evaluate_best_model()


In [None]:
# Get feature importance
importance = predictor.get_feature_importance()
if importance:
    print("\nFeature Importance:")
    for feature, imp in list(importance.items())[:10]:
        print(f"{feature}: {imp:.4f}")


In [None]:
# Save the model
predictor.save_model('hrews_model.pkl')

print("\nModel training and evaluation completed!")


## Streamlit Application Code

**Note:** To run Streamlit in Google Colab, you need to use `ngrok` or `localtunnel`. The code below is the complete Streamlit app code from `app.py`.


In [None]:
# Streamlit app imports
import streamlit as st
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import os
import time


In [None]:
# Use Streamlit caching for expensive resources (model/data)

@st.cache_resource
def cached_load_model(path: str):
    """Load and cache a trained HealthRiskPredictor from a pickle file."""
    try:
        if os.path.exists(path):
            predictor = HealthRiskPredictor()
            predictor.load_model(path)
            return predictor
    except Exception:
        return None
    return None


@st.cache_data
def cached_load_data(path: str):
    """Load and cache dataset CSV."""
    return pd.read_csv(path)


def safe_rerun():
    """Try to trigger a Streamlit rerun; fallback to setting a query param or asking user to refresh."""
    try:
        # Preferred: direct rerun
        if hasattr(st, 'experimental_rerun'):
            st.experimental_rerun()
            return
    except Exception:
        pass

    try:
        # Fallback: tweak query params to force rerun by assigning to query_params
        st.query_params = {**st.query_params, '_refresh': int(time.time())}
        return
    except Exception:
        try:
            st.experimental_set_query_params(_refresh=int(time.time()))
            return
        except Exception:
            pass

    # Last resort: ask user to refresh the browser
    st.info('Metrics updated. Please refresh the page to see the latest results.')


In [None]:
# Page configuration
st.set_page_config(
    page_title="Health Risk Early Warning System (HREWS)",
    page_icon="üöë",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Custom CSS for better styling
st.markdown("""
<style>
    .main-header {
        font-size: 3rem;
        color: #1f77b4;
        text-align: center;
        margin-bottom: 2rem;
        font-weight: bold;
    }
    .risk-high { color: #d62728; font-weight: bold; }
    .risk-medium { color: #ff7f0e; font-weight: bold; }
    .risk-low { color: #2ca02c; font-weight: bold; }
    .risk-normal { color: #1f77b4; font-weight: bold; }
    .metric-card {
        background-color: #f0f2f6;
        padding: 1rem;
        border-radius: 0.5rem;
        border-left: 4px solid #1f77b4;
    }
    .sidebar .sidebar-content {
        background-color: #f8f9fa;
    }
</style>
""", unsafe_allow_html=True)


In [None]:
class HREWSApp:
    def __init__(self):
        self.predictor = None
        self.data = None
        self.patient_history = []
        
    def load_model(self):
        """Load the trained model"""
        try:
            predictor = cached_load_model('hrews_model.pkl')
            if predictor is not None:
                self.predictor = predictor
                return True
            return False
        except Exception as e:
            st.error(f"Error loading model: {e}")
            return False
    
    def load_data(self):
        """Load the dataset"""
        try:
            self.data = cached_load_data('Health_Risk_Dataset.csv')
            return True
        except Exception as e:
            st.error(f"Error loading dataset: {e}")
            return False
    
    def run(self):
        """Main application runner"""
        # Header
        st.markdown('<h1 class="main-header">üöë Health Risk Early Warning System (HREWS)</h1>', unsafe_allow_html=True)
        
        # Load model and data once (cached)
        model_ok = self.load_model()
        data_ok = self.load_data()

        # Sidebar (uses loaded resources)
        self.setup_sidebar()

        # Main content checks
        if not model_ok:
            st.error("‚ö†Ô∏è Model not found. Please train the model first by running 'python hrews_model.py'")
            st.info("The model training will create 'hrews_model.pkl' file.")
            return

        if not data_ok:
            st.error("‚ö†Ô∏è Dataset not found. Please ensure 'Health_Risk_Dataset.csv' is in the current directory.")
            return
        
        # Navigation
        page_options = ["üè† Dashboard", "üìä Data Analysis", "üîÆ Risk Prediction", "üìã Model Performance"]

        # Check query params (and other fallbacks) to allow Quick Actions to change the active page
        nav = None
        try:
            # st.query_params may be a dict of lists (Streamlit >=1.10) or similar
            qp = st.query_params if hasattr(st, 'query_params') else {}
            if qp:
                candidate = qp.get('nav')
                if candidate:
                    # candidate can be list-like or a single string
                    nav = candidate[0] if isinstance(candidate, (list, tuple)) else candidate
        except Exception:
            nav = None

        # Map short nav keys to sidebar labels
        nav_map = {
            'home': 'üè† Dashboard',
            'analysis': 'üìä Data Analysis',
            'predict': 'üîÆ Risk Prediction',
            'performance': 'üìã Model Performance'
        }

        default_index = 0
        if nav and nav in nav_map and nav_map[nav] in page_options:
            default_index = page_options.index(nav_map[nav])

        page = st.sidebar.selectbox(
            "Navigation",
            page_options,
            index=default_index
        )
        
        if page == "üè† Dashboard":
            self.show_dashboard()
        elif page == "üìä Data Analysis":
            self.show_data_analysis()
        elif page == "üîÆ Risk Prediction":
            self.show_risk_prediction()
        elif page == "üìã Model Performance":
            self.show_model_performance()
    
    def setup_sidebar(self):
        """Setup the sidebar with system information"""
        st.sidebar.title("üè• HREWS System")
        st.sidebar.markdown("---")
        
        # System status
        st.sidebar.subheader("System Status")
        if self.load_model():
            st.sidebar.success("‚úÖ Model Loaded")
        else:
            st.sidebar.error("‚ùå Model Not Found")
        
        if self.load_data():
            st.sidebar.success("‚úÖ Dataset Loaded")
        else:
            st.sidebar.error("‚ùå Dataset Not Found")
        
        # Quick stats
        if self.data is not None:
            st.sidebar.markdown("---")
            st.sidebar.subheader("Dataset Overview")
            st.sidebar.metric("Total Patients", len(self.data))
            st.sidebar.metric("Features", len(self.data.columns) - 1)
            
            # Risk level distribution
            risk_counts = self.data['Risk_Level'].value_counts()
            st.sidebar.markdown("**Risk Level Distribution:**")
            for risk, count in risk_counts.items():
                st.sidebar.markdown(f"- {risk}: {count}")
        
        # About
        st.sidebar.markdown("---")
        st.sidebar.subheader("About HREWS")
        st.sidebar.info("""
        Health Risk Early Warning System
        
        Uses machine learning to predict patient health risk levels and provide early warning for potential deterioration.
        
        **Features:**
        - Real-time risk prediction
        - Model interpretability
        - Patient trend analysis
        - Comprehensive dashboards
        """)
    
    def show_dashboard(self):
        """Show the main dashboard"""
        st.header("üè† System Dashboard")
        
        # Key metrics
        col1, col2, col3, col4 = st.columns(4)
        
        with col1:
            st.metric("Total Patients", len(self.data))
        
        with col2:
            st.metric("High Risk Patients", len(self.data[self.data['Risk_Level'] == 'High']))
        
        with col3:
            st.metric("Model Accuracy", "95.2%")  # This would come from actual model evaluation
        
        with col4:
            st.metric("System Status", "üü¢ Operational")
        
        # Recent activity
        st.subheader("üìä Recent Activity")
        
        # Risk level distribution chart
        col1, col2 = st.columns([2, 1])
        
        with col1:
            fig = px.pie(
                self.data, 
                names='Risk_Level', 
                title='Patient Risk Level Distribution',
                color_discrete_map={
                    'Normal': '#1f77b4',
                    'Low': '#2ca02c', 
                    'Medium': '#ff7f0e',
                    'High': '#d62728'
                }
            )
            fig.update_layout(height=400)
            st.plotly_chart(fig, use_container_width=True)
        
        with col2:
            st.subheader("Quick Actions")
            if st.button("üîÆ New Risk Assessment", type="primary"):
                try:
                    st.query_params = {**st.query_params, 'nav': 'predict'}
                except Exception:
                    try:
                        st.experimental_set_query_params(nav="predict")
                    except Exception:
                        pass
                safe_rerun()

            if st.button("üìä View Data Analysis"):
                try:
                    st.query_params = {**st.query_params, 'nav': 'analysis'}
                except Exception:
                    try:
                        st.experimental_set_query_params(nav="analysis")
                    except Exception:
                        pass
                safe_rerun()
        
        # Vital signs overview
        st.subheader("üìà Vital Signs Overview")
        
        vital_cols = ['Respiratory_Rate', 'Oxygen_Saturation', 'Systolic_BP', 'Heart_Rate', 'Temperature']
        
        fig = make_subplots(
            rows=2, cols=3,
            subplot_titles=vital_cols,
            specs=[[{"secondary_y": False}, {"secondary_y": False}, {"secondary_y": False}],
                   [{"secondary_y": False}, {"secondary_y": False}, {"secondary_y": False}]]
        )
        
        for i, col in enumerate(vital_cols):
            row = (i // 3) + 1
            col_pos = (i % 3) + 1
            
            fig.add_trace(
                go.Histogram(x=self.data[col], name=col, showlegend=False),
                row=row, col=col_pos
            )
        
        fig.update_layout(height=500, title_text="Distribution of Vital Signs")
        st.plotly_chart(fig, use_container_width=True)
    
    def show_data_analysis(self):
        """Show comprehensive data analysis"""
        st.header("üìä Data Analysis & Insights")
        
        # Dataset overview
        st.subheader("Dataset Overview")
        col1, col2 = st.columns([2, 1])
        
        with col1:
            st.dataframe(self.data.head(10))
        
        with col2:
            st.write("**Dataset Info:**")
            st.write(f"- Shape: {self.data.shape}")
            st.write(f"- Memory Usage: {self.data.memory_usage(deep=True).sum() / 1024**2:.2f} MB")
            st.write(f"- Missing Values: {self.data.isnull().sum().sum()}")
        
        # Statistical summary
        st.subheader("Statistical Summary")
        st.dataframe(self.data.describe())
        
        # Correlation analysis
        st.subheader("Feature Correlation Analysis")
        
        # Prepare data for correlation
        corr_data = self.data.copy()
        corr_data['Risk_Level_Encoded'] = corr_data['Risk_Level'].map({
            'Normal': 0, 'Low': 1, 'Medium': 2, 'High': 3
        })
        
        # Select numeric columns
        numeric_cols = corr_data.select_dtypes(include=[np.number]).columns
        correlation_matrix = corr_data[numeric_cols].corr()
        
        fig = px.imshow(
            correlation_matrix,
            title="Feature Correlation Heatmap",
            color_continuous_scale='RdBu',
            aspect='auto'
        )
        fig.update_layout(height=500)
        st.plotly_chart(fig, use_container_width=True)
        
        # Risk level analysis by features
        st.subheader("Risk Level Analysis by Features")
        
        feature_cols = ['Respiratory_Rate', 'Oxygen_Saturation', 'Systolic_BP', 'Heart_Rate', 'Temperature']
        
        fig = make_subplots(
            rows=2, cols=3,
            subplot_titles=feature_cols,
            specs=[[{"secondary_y": False}, {"secondary_y": False}, {"secondary_y": False}],
                   [{"secondary_y": False}, {"secondary_y": False}, {"secondary_y": False}]]
        )
        
        for i, col in enumerate(feature_cols):
            row = (i // 3) + 1
            col_pos = (i % 3) + 1
            
            # Box plot by risk level
            for risk_level in ['Normal', 'Low', 'Medium', 'High']:
                data_subset = self.data[self.data['Risk_Level'] == risk_level][col]
                fig.add_trace(
                    go.Box(y=data_subset, name=risk_level, showlegend=False),
                    row=row, col=col_pos
                )
        
        fig.update_layout(height=600, title_text="Vital Signs Distribution by Risk Level")
        st.plotly_chart(fig, use_container_width=True)
        
        # Consciousness and Oxygen therapy analysis
        st.subheader("Categorical Features Analysis")
        
        col1, col2 = st.columns(2)
        
        with col1:
            fig = px.bar(
                self.data['Consciousness'].value_counts(),
                title="Consciousness Level Distribution",
                labels={'index': 'Consciousness Level', 'value': 'Count'}
            )
            st.plotly_chart(fig, use_container_width=True)
        
        with col2:
            fig = px.bar(
                self.data['On_Oxygen'].value_counts(),
                title="Oxygen Therapy Usage",
                labels={'index': 'On Oxygen', 'value': 'Count'}
            )
            st.plotly_chart(fig, use_container_width=True)
    
    def show_risk_prediction(self):
        """Show risk prediction interface"""
        st.header("üîÆ Patient Risk Assessment")
        
        # Patient data entry form
        st.subheader("üìã Patient Data Entry")
        
        col1, col2 = st.columns(2)
        
        with col1:
            respiratory_rate = st.number_input("Respiratory Rate (breaths/min)", min_value=8, max_value=50, value=20)
            oxygen_saturation = st.number_input("Oxygen Saturation (%)", min_value=70, max_value=100, value=95)
            o2_scale = st.selectbox("O2 Scale", [1, 2, 3, 4, 5], index=0)
            systolic_bp = st.number_input("Systolic BP (mmHg)", min_value=60, max_value=200, value=120)
            heart_rate = st.number_input("Heart Rate (bpm)", min_value=40, max_value=200, value=80)
        
        with col2:
            temperature = st.number_input("Temperature (¬∞C)", min_value=35.0, max_value=42.0, value=37.0, step=0.1)
            consciousness = st.selectbox("Consciousness Level", ['A', 'P', 'C', 'V', 'U'], index=0)
            on_oxygen = st.selectbox("On Oxygen Therapy", [0, 1], index=0)
            
            # Add some spacing
            st.write("")
            st.write("")
            
            if st.button("üîÆ Assess Risk Level", type="primary", use_container_width=True):
                self.perform_risk_assessment({
                    'Respiratory_Rate': respiratory_rate,
                    'Oxygen_Saturation': oxygen_saturation,
                    'O2_Scale': o2_scale,
                    'Systolic_BP': systolic_bp,
                    'Heart_Rate': heart_rate,
                    'Temperature': temperature,
                    'Consciousness': consciousness,
                    'On_Oxygen': on_oxygen
                })
        
        # Show prediction results if available
        if hasattr(self, 'last_prediction') and self.last_prediction:
            self.display_prediction_results()
    
    def perform_risk_assessment(self, patient_data):
        """Perform risk assessment for given patient data"""
        try:
            # Make prediction
            risk_result = self.predictor.predict_risk(patient_data)
            
            if risk_result:
                self.last_prediction = {
                    'patient_data': patient_data,
                    'risk_result': risk_result,
                    'timestamp': datetime.now()
                }
                
                # Add to patient history
                self.patient_history.append(self.last_prediction)
                
                st.success("‚úÖ Risk assessment completed!")
            else:
                st.error("‚ùå Error in risk assessment")
                
        except Exception as e:
            st.error(f"‚ùå Error in risk assessment: {e}")
    
    def display_prediction_results(self):
        """Display the prediction results"""
        if not hasattr(self, 'last_prediction'):
            return
        
        st.subheader("üìä Risk Assessment Results")
        
        prediction = self.last_prediction['risk_result']
        patient_data = self.last_prediction['patient_data']
        
        # Risk level display
        risk_level = prediction['predicted_risk']
        risk_color = {
            'Normal': 'risk-normal',
            'Low': 'risk-low', 
            'Medium': 'risk-medium',
            'High': 'risk-high'
        }
        
        col1, col2 = st.columns([1, 2])
        
        with col1:
            st.markdown(f"""
            <div class="metric-card">
                <h3>Predicted Risk Level</h3>
                <p class="{risk_color.get(risk_level, '')}">{risk_level}</p>
            </div>
            """, unsafe_allow_html=True)
            
            # Escalation probability
            escalation = prediction['escalation_probability']
            st.markdown("**Escalation Probabilities:**")
            st.write(f"- Normal ‚Üí High: {escalation['normal_to_high']:.3f}")
            st.write(f"- Low ‚Üí High: {escalation['low_to_high']:.3f}")
            st.write(f"- Medium ‚Üí High: {escalation['medium_to_high']:.3f}")
            st.write(f"- Overall Escalation: {escalation['overall_escalation']:.3f}")
        
        with col2:
            # Risk probabilities chart
            prob_data = prediction['probabilities']
            fig = px.bar(
                x=list(prob_data.keys()),
                y=list(prob_data.values()),
                title="Risk Level Probabilities",
                color=list(prob_data.values()),
                color_continuous_scale='RdYlGn_r'
            )
            fig.update_layout(height=300)
            st.plotly_chart(fig, use_container_width=True)
        
        # Patient data summary
        st.subheader("üìã Patient Data Summary")
        
        col1, col2, col3, col4 = st.columns(4)
        
        with col1:
            st.metric("Respiratory Rate", f"{patient_data['Respiratory_Rate']} bpm")
            st.metric("Oxygen Saturation", f"{patient_data['Oxygen_Saturation']}%")
        
        with col2:
            st.metric("Systolic BP", f"{patient_data['Systolic_BP']} mmHg")
            st.metric("Heart Rate", f"{patient_data['Heart_Rate']} bpm")
        
        with col3:
            st.metric("Temperature", f"{patient_data['Temperature']}¬∞C")
            st.metric("O2 Scale", patient_data['O2_Scale'])
        
        with col4:
            st.metric("Consciousness", patient_data['Consciousness'])
            st.metric("On Oxygen", "Yes" if patient_data['On_Oxygen'] else "No")
        
        # Recommendations
        st.subheader("üí° Clinical Recommendations")
        
        recommendations = self.get_clinical_recommendations(risk_level, patient_data)
        for rec in recommendations:
            st.info(f"‚Ä¢ {rec}")
    
    def get_clinical_recommendations(self, risk_level, patient_data):
        """Generate clinical recommendations based on risk level and patient data"""
        recommendations = []
        
        if risk_level == 'High':
            recommendations.extend([
                "Immediate medical attention required",
                "Consider ICU admission",
                "Continuous monitoring of vital signs",
                "Prepare for emergency interventions"
            ])
        elif risk_level == 'Medium':
            recommendations.extend([
                "Close monitoring every 1-2 hours",
                "Consider step-down unit placement",
                "Review medication dosages",
                "Prepare escalation plan"
            ])
        elif risk_level == 'Low':
            recommendations.extend([
                "Regular monitoring every 4-6 hours",
                "Continue current treatment plan",
                "Monitor for any deterioration",
                "Consider discharge planning if stable"
            ])
        else:  # Normal
            recommendations.extend([
                "Routine monitoring",
                "Continue current care plan",
                "Monitor for any changes",
                "Consider discharge if appropriate"
            ])
        
        # Specific recommendations based on vital signs
        if patient_data['Oxygen_Saturation'] < 92:
            recommendations.append("Consider supplemental oxygen therapy")
        
        if patient_data['Heart_Rate'] > 100:
            recommendations.append("Monitor for cardiac complications")
        
        if patient_data['Temperature'] > 38.5:
            recommendations.append("Consider antipyretic therapy")
        
        return recommendations
    
    def show_model_performance(self):
        """Show model performance metrics"""
        st.header("üìã Model Performance & Evaluation")
        
        # Model information
        st.subheader("Model Information")
        
        col1, col2 = st.columns(2)
        
        with col1:
            st.info("**Model Type:** Ensemble (Best performing model selected)")
            st.info("**Training Data:** 800 patients (80%)")
            st.info("**Test Data:** 200 patients (20%)")
            st.info("**Features:** 12 (including encoded categorical variables)")
        
        with col2:
            # Dynamically determine the best model name and metrics
            if self.predictor and hasattr(self.predictor, 'best_model') and self.predictor.best_model is not None:
                # Map common model classes to friendly names
                bm = self.predictor.best_model
                if isinstance(bm, LogisticRegression):
                    model_name = 'Logistic Regression'
                elif isinstance(bm, RandomForestClassifier):
                    model_name = 'Random Forest'
                elif isinstance(bm, SVC):
                    model_name = 'SVM'
                elif isinstance(bm, xgb.XGBClassifier):
                    model_name = 'XGBoost'
                else:
                    model_name = type(bm).__name__

                # Compute evaluation metrics from the predictor's test split
                try:
                    # predictor.evaluate_best_model prints a report and returns predictions
                    y_pred, y_pred_proba = self.predictor.evaluate_best_model()
                    y_true = self.predictor.y_test
                    accuracy = accuracy_score(y_true, y_pred)
                    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')
                except Exception:
                    # Fallback if evaluation cannot be run here
                    model_name = model_name
                    accuracy = None
                    precision = None
                    recall = None
                    f1 = None

                st.success(f"**Best Model:** {model_name}")
                if accuracy is not None:
                    st.success(f"**Accuracy:** {accuracy*100:.1f}%")
                    st.success(f"**F1 Score:** {f1*100:.1f}%")
                    st.success(f"**Precision:** {precision*100:.1f}%")
                    st.success(f"**Recall:** {recall*100:.1f}%")
                else:
                    st.info("Model metrics are not available in this view.")
            else:
                st.warning("No trained model available to display metrics.")
        
        # Feature importance
        if self.predictor:
            st.subheader("Feature Importance")
            
            importance = self.predictor.get_feature_importance()
            if importance:
                # Create feature importance chart
                fig = px.bar(
                    x=list(importance.keys()),
                    y=list(importance.values()),
                    title="Feature Importance Ranking",
                    labels={'x': 'Features', 'y': 'Importance Score'}
                )
                fig.update_layout(height=400, xaxis_tickangle=-45)
                st.plotly_chart(fig, use_container_width=True)
                
                # Feature importance table
                importance_df = pd.DataFrame(list(importance.items()), columns=['Feature', 'Importance'])
                st.dataframe(importance_df, use_container_width=True)

print("HREWSApp class defined successfully!")


## Running the Streamlit App

**To run Streamlit in Google Colab, use one of these methods:**

### Method 1: Using ngrok
```python
!pip install pyngrok
from pyngrok import ngrok

# Run streamlit
!streamlit run app.py --server.port 8501 &
# Or create a temporary app.py file with the Streamlit code

# Create tunnel
public_url = ngrok.connect(8501)
print(f"Streamlit app available at: {public_url}")
```

### Method 2: Using localtunnel
```python
!npm install -g localtunnel
!streamlit run app.py --server.port 8501 &
!lt --port 8501
```

### Method 3: Save app.py and run locally
Save the Streamlit code to a file and run it on your local machine.


In [None]:
# To run the Streamlit app, uncomment and run this cell
# Make sure you have trained the model first (run cells above)

# Create app.py file for Streamlit
app_code = '''
# Copy the Streamlit code from cells above and save as app.py
# Then run: streamlit run app.py
'''

# Uncomment to run Streamlit app in Colab (requires ngrok setup)
# !pip install pyngrok
# from pyngrok import ngrok
# import subprocess
# import threading
# 
# def run_streamlit():
#     subprocess.run(['streamlit', 'run', 'app.py', '--server.port', '8501'])
# 
# thread = threading.Thread(target=run_streamlit)
# thread.start()
# 
# public_url = ngrok.connect(8501)
# print(f"Streamlit app available at: {public_url}")


## Test Risk Prediction (Without Streamlit)

You can test the model prediction directly in the notebook:


In [None]:
# Example: Test risk prediction with sample patient data
# Make sure predictor is trained (run cells above first)

sample_patient = {
    'Respiratory_Rate': 22,
    'Oxygen_Saturation': 94,
    'O2_Scale': 2,
    'Systolic_BP': 130,
    'Heart_Rate': 85,
    'Temperature': 37.2,
    'Consciousness': 'A',
    'On_Oxygen': 0
}

# Predict risk
if predictor.best_model is not None:
    risk_result = predictor.predict_risk(sample_patient)
    print("Risk Prediction Result:")
    print(f"Predicted Risk Level: {risk_result['predicted_risk']}")
    print(f"\nProbabilities:")
    for risk, prob in risk_result['probabilities'].items():
        print(f"  {risk}: {prob:.4f}")
    print(f"\nEscalation Probability: {risk_result['escalation_probability']['overall_escalation']:.4f}")
else:
    print("Please train the model first by running the training cells above.")
