In [3]:
# This is a simplified example dataset. The original dataset from the EHR was messy, so I created this clean version for demonstration purposes."
import pandas as pd
import numpy as np

# Creating a fake dataset
np.random.seed(42)
data = pd.DataFrame({
    'Patient_ID': range(1, 101),
    'Age': np.random.randint(18, 90, 100),
    'Gender': np.random.choice(['Male', 'Female'], 100),
    'Comorbidities': np.random.randint(0, 5, 100),  # Number of chronic conditions
    'Length_of_Stay': np.random.randint(1, 15, 100),  # Length of hospital stay in days
    'Discharge_Planning_Score': np.random.uniform(0, 1, 100),  # Score for discharge readiness
    'Follow_Up_Scheduled': np.random.choice([0, 1], 100),  # Whether follow-up was scheduled
    'Previous_Readmissions': np.random.randint(0, 4, 100),  # Number of prior readmissions
    'Readmitted': np.random.choice([0, 1], 100, p=[0.7, 0.3])  # Target variable
})

# Displaying the dataset
data.head()


Unnamed: 0,Patient_ID,Age,Gender,Comorbidities,Length_of_Stay,Discharge_Planning_Score,Follow_Up_Scheduled,Previous_Readmissions,Readmitted
0,1,69,Female,0,14,0.501516,1,2,1
1,2,32,Female,3,5,0.798295,0,1,0
2,3,89,Male,0,4,0.649964,1,0,0
3,4,78,Male,0,8,0.701967,1,2,1
4,5,38,Male,1,8,0.795793,1,2,0


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Split the dataset
X = data.drop(['Patient_ID', 'Readmitted'], axis=1)
y = data['Readmitted']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocessing for categorical variables
categorical_features = ['Gender']
categorical_transformer = OneHotEncoder()

# Preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical_features)],
    remainder='passthrough'  # Leave other columns as is
)

# Create the model pipeline
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])

# Train the model
pipeline.fit(X_train, y_train)

# Predict on the test set
y_pred = pipeline.predict(X_test)

# Evaluate the model
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.71      0.86      0.77        14
           1       0.33      0.17      0.22         6

    accuracy                           0.65        20
   macro avg       0.52      0.51      0.50        20
weighted avg       0.59      0.65      0.61        20

