In [1]:
"""
Patient Risk Prediction using SHAP Values

This script calculates the risk score for a patient based on their medical data
and provides insights into the contributing factors using SHAP values.

Usage:

1. Import necessary libraries:
   - numpy
   - pandas
   - joblib
   - shap

2. Load the trained prediction model using joblib:
   ```python
   model = joblib.load("path_to_your_model.joblib")



### Additional Notes

- Ensure that you have installed the required libraries (`numpy`, `pandas`, `joblib`, and `shap`) in your Python environment.

"""

'\nPatient Risk Prediction using SHAP Values\n\nThis script calculates the risk score for a patient based on their medical data\nand provides insights into the contributing factors using SHAP values.\n\nUsage:\n\n1. Import necessary libraries:\n   - numpy\n   - pandas\n   - joblib\n   - shap\n\n2. Load the trained prediction model using joblib:\n   ```python\n   model = joblib.load("path_to_your_model.joblib")\n\n\n\n### Additional Notes\n\n- Ensure that you have installed the required libraries (`numpy`, `pandas`, `joblib`, and `shap`) in your Python environment.\n\n'

In [2]:
#####################################################################################
#### Edit the following dictionary to input the patient's data for risk prediction###
#####################################################################################
patient1 = {
    'urg data hora admissao': '2023-05-01 11:30:00',
    'IDADE': 105,
    'DIAS PRE OP': 3,
    'urg_data_hora_admissao_mes': 4,
    'COD GDH': 308,
    'ORDEM PBASE DESC': 1,
    'ANESTESIA': 'GERAL - BALANCEADA',
    'DIAS PRE OP_label': 4,
    'COD ESPEC DEST': 33031,
    'SEXO': 'FEMALE',
    'TIPO ANESTESIA': 'GERAL',
}

In [3]:
import numpy as np
import pandas as pd
from datetime import datetime
import joblib  # Import joblib for saving/loading the model
import shap

def prepare_data(df):
    # Transformar 'urg data hora admissao' em datetime
    df['urg_data_hora_admissao'] = pd.to_datetime(df['urg data hora admissao'], format='%Y-%m-%d %H:%M:%S')
    
    # Criar coluna 'urg_data_hora_admissao_int'
    df['urg_data_hora_admissao_int'] = df['urg_data_hora_admissao'].apply(
        lambda x: int(f"{x.month:02d}{x.day:02d}{x.hour:02d}{x.minute:02d}{x.second:02d}")
    )

    # Atualizar a coluna original
    df['urg data hora admissao'] = df['urg_data_hora_admissao_int']

    # Converter 'DIAS PRE OP' para int
    df["DIAS PRE OP"] = df["DIAS PRE OP"].astype(int)

    # Mapear SEXO
    df['SEXO'] = df['SEXO'].map({'MALE': 2, 'FEMALE': 1})

    # Mapear ANESTESIA
    anestesia_mapping = {
        'GERAL - BALANCEADA': 1,
        'SUB ARACNOIDEA': 2,
        'GERAL': 3,
        'LOCO-REGIONAL': 4,
        'COMBINADA': 5,
        'PLEXO BRAQUIAL': 6,
        'SEDACAO': 7,
        'LOCAL': 8,
        'SEQUENCIAL': 9,
        'GERAL - INTRAVENOSA': 10,
        'BLOQUEIO NERVO PERIF': 11,
        'GERAL PLEXO BRAQUIAL': 12,
        'LOCO-REG. C/SEDACAO': 13,
        'SED/NL-ANALGESIA': 14,
        'EPIDURAL': 15,
        'GERAL + OUTRA A ESPE': 16
    }
    df['ANESTESIA'] = df['ANESTESIA'].map(anestesia_mapping)

    # Mapear TIPO ANESTESIA
    tipo_anestesia_mapping = {
        'GERAL': 1,
        'LOCO-REGIONAL': 2,
        'COMBINADA': 3,
        'SEDACAO': 4,
        'LOCAL': 5,
        'LOCO-REG. C/SEDACAO': 6,
        'SED/NL-ANALGESIA': 7,
        'GERAL-REGIONAL': 8
    }
    df['TIPO ANESTESIA'] = df['TIPO ANESTESIA'].map(tipo_anestesia_mapping)

    # Categorizar 'DIAS PRE OP_label'
    limits = [0, 1, 4, 6, 1000]
    labels = [1, 2, 3, 4]
    df["DIAS PRE OP_label"] = pd.cut(df["DIAS PRE OP"], bins=limits, labels=labels, right=False)
    df["DIAS PRE OP_label"] = df["DIAS PRE OP_label"].astype(int)
    # Definir 'urg_data_hora_admissao_mes'
    df['urg_data_hora_admissao_mes'] = df['urg_data_hora_admissao'].dt.month.astype(int)

    # Converter variáveis para o tipo adequado
    df['COD GDH'] = df['COD GDH'].astype(int)
    df['ORDEM PBASE DESC'] = df['ORDEM PBASE DESC'].astype(int)
    df['COD ESPEC DEST'] = df['COD ESPEC DEST'].astype(int)
    
    return df

# Dictionary to map variable names from Portuguese to English
variable_mapping = {
    'urg data hora admissao': 'Emergency Admission Date Time',
    'IDADE': 'Age',
    'DIAS PRE OP': 'Pre OP Days',
    'urg_data_hora_admissao_mes': 'Emergency Admission Month',
    'COD GDH': 'DRG Code',
    'ORDEM PBASE DESC': 'Base Procedure Order Description',
    'ANESTESIA': 'Anesthesia',
    'TIPO ANESTESIA': 'Anesthesia Type',
    'DIAS PRE OP_label': 'Pre OP Days Label',
    'COD ESPEC DEST': 'Destination Specialty Code',
    'SEXO': 'Gender'
}


def load_shap_values(file_path):
    """Load SHAP values from a CSV file."""
    return pd.read_csv(file_path)

def calculate_patient_risk(patient_data, model, feature_names, explainer):
    """Calculate the risk score and SHAP values for a patient."""
    # Convert patient data to a DataFrame
    patient_df = pd.DataFrame([patient_data])
    
    # Prepare patient data
    patient_df = prepare_data(patient_df)
    
    # Select only the features used by the model, in the correct order
    patient_features = patient_df[feature_names]
    
    # Calculate the risk score
    risk_score = model.predict_proba(patient_features)[0][1]
    
    # Calculate SHAP values for the patient
    shap_values = explainer.shap_values(patient_features)

    if isinstance(shap_values, list):
        shap_values_positive = shap_values[1]
    else:
        shap_values_positive = shap_values
    
    # Create DataFrame for SHAP values
    shap_contributions = pd.DataFrame({
        'Feature': feature_names,
        'SHAP Value': shap_values_positive.flatten()
    })
    
    # Replace feature names with their English equivalents
    shap_contributions['Feature'] = shap_contributions['Feature'].map(variable_mapping)
    
    # Sort SHAP values by importance
    shap_contributions = shap_contributions.sort_values(by='SHAP Value', ascending=False)
    
    return risk_score, shap_contributions

# Defina os nomes das características na ordem correta
feature_names = [
    'urg data hora admissao', 'IDADE', 'DIAS PRE OP',
    'urg_data_hora_admissao_mes', 'COD GDH', 'ORDEM PBASE DESC',
    'ANESTESIA', 'DIAS PRE OP_label', 'COD ESPEC DEST', 'SEXO',
    'TIPO ANESTESIA']

# Loading the model from a file
model = joblib.load("OrthoMortPred_model.joblib")

# Crie um objeto explicador SHAP para o modelo
explainer = shap.TreeExplainer(model)


risk_score, shap_contributions = calculate_patient_risk(patient1, model, feature_names, explainer)

# Imprime os resultados
print(f"Risk Score: {risk_score:.4f}, value >0.5 represents High risk of death")
print("Top Contributing Factors (SHAP values):")
print(shap_contributions)



Risk Score: 0.9997, value >0.5 represents High risk of death
Top Contributing Factors (SHAP values):
                             Feature  SHAP Value
1                                Age    7.967469
9                             Gender    0.633194
7                  Pre OP Days Label    0.579084
4                           DRG Code    0.451451
5   Base Procedure Order Description    0.116784
10                   Anesthesia Type    0.057398
3          Emergency Admission Month   -0.001187
6                         Anesthesia   -0.150029
8         Destination Specialty Code   -0.164729
2                        Pre OP Days   -0.742001
0      Emergency Admission Date Time   -0.904295


