In [None]:
# Import Dependencies
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_auc_score
import xgboost as xgb
import joblib
import warnings
warnings.filterwarnings('ignore')

In [4]:
# Load Dataset
df = pd.read_csv('D:\ARCAP REIT\Task\T -1 Prediction System\Data\hospital_readmissions.csv')  # Replace with your CSV file path
print("Shape:", df.shape)
print(df.head())

Shape: (30000, 12)
   patient_id  age  gender blood_pressure  cholesterol   bmi diabetes  \
0           1   74   Other         130/72          240  31.5      Yes   
1           2   46  Female         120/92          292  36.3       No   
2           3   89   Other         135/78          153  30.3       No   
3           4   84  Female         123/80          153  31.5       No   
4           5   32   Other         135/84          205  18.4       No   

  hypertension  medication_count  length_of_stay discharge_destination  \
0           No                 5               1      Nursing_Facility   
1           No                 4               3      Nursing_Facility   
2          Yes                 1               1                  Home   
3          Yes                 3              10                  Home   
4          Yes                 6               4      Nursing_Facility   

  readmitted_30_days  
0                Yes  
1                 No  
2                 No  
3    

In [5]:
# Data Preprocessing

# Handle blood pressure (split into systolic & diastolic)
df[['systolic_bp', 'diastolic_bp']] = df['blood_pressure'].str.split('/', expand=True).astype(float)
df.drop('blood_pressure', axis=1, inplace=True)

In [6]:
# Convert categorical columns
cat_cols = ['gender', 'diabetes', 'hypertension', 'discharge_destination', 'readmitted_30_days']
for col in cat_cols:
    df[col] = LabelEncoder().fit_transform(df[col])

In [None]:
# Drop patient_id (not predictive)
df.drop('patient_id', axis=1, inplace=True)

In [9]:
# Feature / Target Split
X = df.drop('readmitted_30_days', axis=1)
y = df['readmitted_30_days']

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Train XGBoost Model
xgb_model = xgb.XGBClassifier(
    objective='binary:logistic',
    eval_metric='auc',
    random_state=42,
    learning_rate=0.1,
    n_estimators=150,
    max_depth=5
)

xgb_model.fit(X_train, y_train)

# Evaluate Model
y_pred = xgb_model.predict(X_test)
y_proba = xgb_model.predict_proba(X_test)[:, 1]

print("\nModel Evaluation Results:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("ROC-AUC Score:", roc_auc_score(y_test, y_proba))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))



Model Evaluation Results:
Accuracy: 0.8758333333333334
ROC-AUC Score: 0.5571705073292375

Classification Report:
               precision    recall  f1-score   support

           0       0.88      1.00      0.93      5265
           1       0.00      0.00      0.00       735

    accuracy                           0.88      6000
   macro avg       0.44      0.50      0.47      6000
weighted avg       0.77      0.88      0.82      6000

Confusion Matrix:
 [[5255   10]
 [ 735    0]]


In [11]:
# Save Model for Deployment
joblib.dump(xgb_model, "patient_readmission_model.pkl")
print("\nModel saved as 'patient_readmission_model.pkl'")



Model saved as 'patient_readmission_model.pkl'
