<a href="https://colab.research.google.com/github/cbonnin88/Hospital_Admissions/blob/main/readmission_risk.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [49]:
from google.cloud import bigquery
from google.colab import auth
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix,roc_auc_score
import plotly.graph_objects as go
import plotly.figure_factory as ff
from sklearn.metrics import confusion_matrix
import plotly.express as px

# **Pulling the modeled data from BigQuery**

In [27]:
auth.authenticate_user()

In [28]:
client = bigquery.Client(project='france-healthcare-analytics')
print(f"BigQuery client project set to: {client.project}")

BigQuery client project set to: france-healthcare-analytics


In [29]:
query = """
SELECT
    age,
    gender,
    dept_code,
    icd10_code,
    cost_euro,
    CASE
      WHEN days_since_prev_discharge <= 30 THEN 1
      ELSE 0
    END AS is_readmitted
  FROM `france-healthcare-analytics.dbt_france_healthcare.fct_patient_pathway`
  WHERE cost_euro IS NOT NULL
"""

In [30]:
df_health = client.query(query).to_dataframe()

In [31]:
display(df_health.head())

Unnamed: 0,age,gender,dept_code,icd10_code,cost_euro,is_readmitted
0,18,U,94,J45.9,4181.65,0
1,18,F,93,J44.0,9942.6,0
2,18,M,92,J45.9,10000.51,0
3,18,F,92,J44.0,8844.55,0
4,18,F,92,I10,13306.32,1


# **Preprocessing**
- Converting categorical strings to numbers (One-Hot Encoding)

In [32]:
df_ml = pd.get_dummies(df_health,columns=['gender','dept_code','icd10_code'],drop_first=True)

# **Splitting the Data**

In [34]:
X = df_ml.drop('is_readmitted', axis=1)
y = df_ml['is_readmitted']
X_train, X_test, y_train,y_test = train_test_split(X,y,test_size=0.2, random_state=42)

# **Training the Model**

- Using the **class_weight='balanced'** because readmissions are usually fewer than normal stays

In [35]:
model = RandomForestClassifier(n_estimators=100,max_depth=10, class_weight='balanced', random_state=42)
model.fit(X_train, y_train)

# **Evaluate**

In [36]:
y_pred = model.predict(X_test)
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

         0.0       0.90      0.70      0.79     21668
         1.0       0.10      0.30      0.15      2332

    accuracy                           0.66     24000
   macro avg       0.50      0.50      0.47     24000
weighted avg       0.82      0.66      0.73     24000



# **Visualizing Feature Importance**

In [38]:
# Get importance Levels
importances = model.feature_importances_
feature_names = X.columns
feature_imortance_df = pd.DataFrame({'feature':feature_names,'importance':importances}).sort_values('importance',ascending=False).head(10)

In [39]:
fig_importance = go.Figure(
    go.Bar(
        x=feature_imortance_df['importance'],
        y=feature_imortance_df['feature'],
        orientation='h',
        marker_color='teal'
    )
)
fig_importance.update_layout(title='Top 10 Predictors od Hospital Readmission', xaxis_title='Importance Score',yaxis_title='Feature')
fig_importance.show()

# **Confusion Matrix Heatmap**

In [41]:
cm = confusion_matrix(y_test,y_pred)

In [42]:
# Define Labels
x = ['Predicted: No','Predicted: Yes']
y= ['Actual: No','Actual: Yes']

In [44]:
fig_cm = ff.create_annotated_heatmap(
    z=cm,
    x=x,
    y=y,
    annotation_text=cm,
    colorscale='Viridis'
)

fig_cm.update_layout(
    title='Confusion Matrix: Readmission Risk Model',
    xaxis_title='Predicted Class',
    yaxis_title='Acutal Class'
)

fig_cm.show()

# **Classification Report Heatmap**

In [45]:
# Generating the report as a diction
report = classification_report(y_test,y_pred,output_dict=True)

In [46]:
# Convert to DataFrame and clean it up for plotting
# Removing accuracy because its a single value, and weighteed avg for clarity

report_df = pd.DataFrame(report).transpose()
report_df = report_df.iloc[:2, :3] # Only take classes 0 and 1, and metrics P, R, F1

In [51]:
fig_report = px.imshow(
    report_df,
    labels=dict(x='Metric',y='Class',color='Score'),
    x=['Precision','Recall','F1-Score'],
    y=['No Readmission','Readmission'],
    text_auto='.2f',
    aspect= 'auto',
    color_continuous_scale='Viridis_r'
)

fig_report.update_layout(title='Classification Metrics Heatmap')
fig_report.show()