In [1]:
# Cell 1: Import libraries and load Phase 1 outputs
import pandas as pd
import numpy as np
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 50)

print("=" * 80)
print("üîß PHASE 2: FEATURE ENGINEERING FOR READMISSION PREDICTION")
print("=" * 80)

# Data path
DATA_PATH = "/mnt/d/Datasets/mimic-iii-demo/mimic-iii-clinical-database-demo-1.4/"

# Load tables
print("\nüìÇ Loading MIMIC-III tables...")
patients = pd.read_csv(DATA_PATH + "PATIENTS.csv")
admissions = pd.read_csv(DATA_PATH + "admissions_processed.csv")  # From Phase 1
diagnoses = pd.read_csv(DATA_PATH + "DIAGNOSES_ICD.csv")
icustays = pd.read_csv(DATA_PATH + "ICUSTAYS.csv")
procedures = pd.read_csv(DATA_PATH + "PROCEDURES_ICD.csv")

# Convert date columns
patients['dob'] = pd.to_datetime(patients['dob'])
patients['dod'] = pd.to_datetime(patients['dod'])
admissions['admittime'] = pd.to_datetime(admissions['admittime'])
admissions['dischtime'] = pd.to_datetime(admissions['dischtime'])

print(f"‚úÖ Loaded:")
print(f"   - Patients: {len(patients)}")
print(f"   - Admissions: {len(admissions)}")
print(f"   - Diagnoses: {len(diagnoses)}")
print(f"   - ICU Stays: {len(icustays)}")
print(f"   - Procedures: {len(procedures)}")
print(f"\n   Target: {admissions['readmitted_30'].sum()} readmissions (positive class)")


üîß PHASE 2: FEATURE ENGINEERING FOR READMISSION PREDICTION

üìÇ Loading MIMIC-III tables...
‚úÖ Loaded:
   - Patients: 100
   - Admissions: 129
   - Diagnoses: 1761
   - ICU Stays: 136
   - Procedures: 506

   Target: 11 readmissions (positive class)


In [4]:
# Cell 2: Feature Engineering - Age at Admission (SIMPLIFIED for MIMIC-III)
print("\n" + "=" * 80)
print("üë§ FEATURE 1: AGE AT ADMISSION")
print("=" * 80)

# Merge patients with admissions
df = admissions.merge(patients[['subject_id', 'dob', 'gender']], on='subject_id', how='left')

# Extract years for calculation
df['dob_year'] = df['dob'].dt.year
df['admit_year'] = df['admittime'].dt.year

# MIMIC-III anonymization: Patients >89 have DOB year > 2100
print("\nüîç Checking for anonymized DOB (year 2200+)...")
anonymized_mask = df['dob_year'] > 2100
anonymized_count = anonymized_mask.sum()
print(f"   Found {anonymized_count} admissions with anonymized DOB (patients >89 years old)")

# Calculate age using simple year difference (handles anonymization automatically)
def calculate_age_simple(row):
    """Calculate age from year difference, handling MIMIC-III anonymization"""
    dob_year = row['dob_year']
    admit_year = row['admit_year']
    
    # If DOB year is > 2100, it's shifted by 300 years
    if dob_year > 2100:
        # Shift back 300 years
        actual_birth_year = dob_year - 300
        age = admit_year - actual_birth_year
    else:
        # Normal calculation
        age = admit_year - dob_year
    
    # Age should be positive and reasonable (<120)
    if age < 0 or age > 120:
        return np.nan
    
    return age

print("\n‚è≥ Calculating ages (handling anonymization)...")
df['age_years'] = df.apply(calculate_age_simple, axis=1)

# Check for any NaN values
nan_count = df['age_years'].isna().sum()
if nan_count > 0:
    print(f"‚ö†Ô∏è Warning: {nan_count} ages could not be calculated")
    # Fill with median
    df['age_years'] = df['age_years'].fillna(df['age_years'].median())

print("\n1Ô∏è‚É£ Age statistics:")
print(df['age_years'].describe())

print("\n2Ô∏è‚É£ Age distribution by readmission status:")
age_by_readmit = df.groupby('readmitted_30')['age_years'].agg(['mean', 'median', 'min', 'max', 'std'])
print(age_by_readmit)

print("\n3Ô∏è‚É£ Gender distribution:")
print(df['gender'].value_counts())
print("\n   Gender by readmission:")
gender_readmit = pd.crosstab(df['gender'], df['readmitted_30'], margins=True)
print(gender_readmit)
print("\n   Readmission rate by gender:")
gender_rate = df.groupby('gender')['readmitted_30'].mean() * 100
print(gender_rate)

# Final age feature (already cleaned)
df['age_years_cleaned'] = df['age_years']

print("\n4Ô∏è‚É£ Age distribution:")
age_bins = [0, 40, 60, 75, 200]
age_labels = ['<40', '40-60', '60-75', '>75']
df['age_group'] = pd.cut(df['age_years_cleaned'], bins=age_bins, labels=age_labels)
print(df['age_group'].value_counts().sort_index())

print("\n5Ô∏è‚É£ Readmission rate by age group:")
age_readmit_rate = df.groupby('age_group', observed=True)['readmitted_30'].agg(['sum', 'count', 'mean'])
age_readmit_rate['readmit_rate_%'] = age_readmit_rate['mean'] * 100
print(age_readmit_rate)

# Drop temporary columns
df = df.drop(['dob_year', 'admit_year', 'age_group'], axis=1)

print("\n‚úÖ Feature created: age_years_cleaned")



üë§ FEATURE 1: AGE AT ADMISSION

üîç Checking for anonymized DOB (year 2200+)...
   Found 37 admissions with anonymized DOB (patients >89 years old)

‚è≥ Calculating ages (handling anonymization)...

1Ô∏è‚É£ Age statistics:
count    129.000000
mean      73.558140
std       11.834739
min       27.000000
25%       71.000000
50%       76.000000
75%       80.000000
max       89.000000
Name: age_years, dtype: float64

2Ô∏è‚É£ Age distribution by readmission status:
                    mean  median   min   max        std
readmitted_30                                          
0              73.779661    76.0  27.0  89.0  11.869040
1              71.181818    76.0  53.0  88.0  11.737276

3Ô∏è‚É£ Gender distribution:
gender
M    70
F    59
Name: count, dtype: int64

   Gender by readmission:
readmitted_30    0   1  All
gender                     
F               57   2   59
M               61   9   70
All            118  11  129

   Readmission rate by gender:
gender
F     3.389831
M    12.

In [5]:
# Cell 3: Feature Engineering - Admission History
print("\n" + "=" * 80)
print("üìÖ FEATURE 2: ADMISSION HISTORY")
print("=" * 80)

# Sort by patient and time
df = df.sort_values(['subject_id', 'admittime']).reset_index(drop=True)

# Count previous admissions for each patient
df['previous_admissions'] = df.groupby('subject_id').cumcount()

# Days since last admission (for patients with prior admits)
df['days_since_last_admit'] = df.groupby('subject_id')['admittime'].diff().dt.days

# Fill NaN (first admission) with -1
df['days_since_last_admit'] = df['days_since_last_admit'].fillna(-1)

print("\n1Ô∏è‚É£ Previous admissions statistics:")
print(df['previous_admissions'].describe())

print("\n2Ô∏è‚É£ Distribution of previous admissions:")
print(df['previous_admissions'].value_counts().sort_index())

print("\n3Ô∏è‚É£ Readmission rate by admission history:")
readmit_by_history = df.groupby('previous_admissions')['readmitted_30'].agg(['sum', 'count', 'mean'])
readmit_by_history['readmit_rate_%'] = readmit_by_history['mean'] * 100
print(readmit_by_history)

print("\n4Ô∏è‚É£ Days since last admission (for repeat patients):")
print(df[df['days_since_last_admit'] > 0]['days_since_last_admit'].describe())

print("\n‚úÖ Features created: previous_admissions, days_since_last_admit")



üìÖ FEATURE 2: ADMISSION HISTORY

1Ô∏è‚É£ Previous admissions statistics:
count    129.000000
mean       0.945736
std        2.681855
min        0.000000
25%        0.000000
50%        0.000000
75%        0.000000
max       14.000000
Name: previous_admissions, dtype: float64

2Ô∏è‚É£ Distribution of previous admissions:
previous_admissions
0     100
1      14
2       3
3       1
4       1
5       1
6       1
7       1
8       1
9       1
10      1
11      1
12      1
13      1
14      1
Name: count, dtype: int64

3Ô∏è‚É£ Readmission rate by admission history:
                     sum  count      mean  readmit_rate_%
previous_admissions                                      
0                      8    100  0.080000        8.000000
1                      2     14  0.142857       14.285714
2                      0      3  0.000000        0.000000
3                      0      1  0.000000        0.000000
4                      0      1  0.000000        0.000000
5                      0  

In [6]:
# Cell 4: Feature Engineering - Diagnoses & Comorbidity
print("\n" + "=" * 80)
print("ü©∫ FEATURE 3: DIAGNOSIS COMPLEXITY & CHARLSON COMORBIDITY INDEX")
print("=" * 80)

# Count diagnoses per admission
diag_counts = diagnoses.groupby('hadm_id').size().reset_index(name='diagnosis_count')
df = df.merge(diag_counts, on='hadm_id', how='left')
df['diagnosis_count'] = df['diagnosis_count'].fillna(0).astype(int)

print("\n1Ô∏è‚É£ Diagnosis count statistics:")
print(df['diagnosis_count'].describe())

print("\n2Ô∏è‚É£ Diagnosis count by readmission status:")
print(df.groupby('readmitted_30')['diagnosis_count'].agg(['mean', 'median', 'min', 'max']))

# Charlson Comorbidity Index (CCI) - Simplified ICD-9 mapping
print("\n3Ô∏è‚É£ Calculating Charlson Comorbidity Index...")

# Charlson category ICD-9 prefixes (simplified for demo)
charlson_map = {
    # Myocardial infarction
    '410': 1, '412': 1,
    # Congestive heart failure
    '428': 1,
    # Peripheral vascular disease
    '443': 1, '441': 1,
    # Cerebrovascular disease
    '430': 1, '431': 1, '432': 1, '433': 1, '434': 1, '435': 1, '436': 1,
    # Dementia
    '290': 1,
    # Chronic pulmonary disease
    '490': 1, '491': 1, '492': 1, '493': 1, '494': 1, '495': 1, '496': 1,
    # Rheumatologic disease
    '710': 1, '714': 1,
    # Peptic ulcer disease
    '531': 1, '532': 1, '533': 1, '534': 1,
    # Mild liver disease
    '571': 1,
    # Diabetes without complications
    '250': 1,
    # Diabetes with complications
    '2504': 2, '2505': 2, '2506': 2, '2507': 2,
    # Hemiplegia or paraplegia
    '342': 2, '344': 2,
    # Renal disease
    '582': 2, '583': 2, '585': 2, '586': 2,
    # Cancer (any malignancy)
    '14': 2, '15': 2, '16': 2, '17': 2, '18': 2, '19': 2, '20': 2,
    # Moderate or severe liver disease
    '5722': 3, '5723': 3, '5724': 3,
    # Metastatic solid tumor
    '196': 6, '197': 6, '198': 6, '199': 6,
    # AIDS
    '042': 6, '043': 6, '044': 6
}

def calculate_charlson(hadm_id):
    """Calculate Charlson score for an admission"""
    patient_diags = diagnoses[diagnoses['hadm_id'] == hadm_id]['icd9_code'].astype(str)
    score = 0
    for diag in patient_diags:
        # Check prefixes
        for prefix, weight in charlson_map.items():
            if diag.startswith(prefix):
                score += weight
                break  # Only count once per diagnosis
    return score

# Calculate CCI for each admission (this may take 10-20 seconds)
print("   Computing Charlson scores for 129 admissions...")
df['charlson_score'] = df['hadm_id'].apply(calculate_charlson)

print("\n4Ô∏è‚É£ Charlson Comorbidity Index statistics:")
print(df['charlson_score'].describe())

print("\n5Ô∏è‚É£ Charlson score by readmission status:")
print(df.groupby('readmitted_30')['charlson_score'].agg(['mean', 'median', 'max']))

print("\n6Ô∏è‚É£ Charlson score distribution:")
print(df['charlson_score'].value_counts().sort_index())

print("\n‚úÖ Features created: diagnosis_count, charlson_score")



ü©∫ FEATURE 3: DIAGNOSIS COMPLEXITY & CHARLSON COMORBIDITY INDEX

1Ô∏è‚É£ Diagnosis count statistics:
count    129.000000
mean      13.651163
std        6.465837
min        3.000000
25%        9.000000
50%       12.000000
75%       17.000000
max       37.000000
Name: diagnosis_count, dtype: float64

2Ô∏è‚É£ Diagnosis count by readmission status:
                    mean  median  min  max
readmitted_30                             
0              13.677966    12.5    3   37
1              13.363636    12.0    4   24

3Ô∏è‚É£ Calculating Charlson Comorbidity Index...
   Computing Charlson scores for 129 admissions...

4Ô∏è‚É£ Charlson Comorbidity Index statistics:
count    129.000000
mean       3.294574
std        2.857017
min        0.000000
25%        1.000000
50%        2.000000
75%        4.000000
max       14.000000
Name: charlson_score, dtype: float64

5Ô∏è‚É£ Charlson score by readmission status:
                   mean  median  max
readmitted_30                       
0         

In [7]:
# Cell 5: Feature Engineering - Procedures
print("\n" + "=" * 80)
print("üî¨ FEATURE 4: PROCEDURE COUNTS")
print("=" * 80)

# Count procedures per admission
proc_counts = procedures.groupby('hadm_id').size().reset_index(name='procedure_count')
df = df.merge(proc_counts, on='hadm_id', how='left')
df['procedure_count'] = df['procedure_count'].fillna(0).astype(int)

print("\n1Ô∏è‚É£ Procedure count statistics:")
print(df['procedure_count'].describe())

print("\n2Ô∏è‚É£ Procedure count by readmission status:")
print(df.groupby('readmitted_30')['procedure_count'].agg(['mean', 'median', 'min', 'max']))

print("\n3Ô∏è‚É£ Patients with no procedures:")
print(f"   Count: {(df['procedure_count'] == 0).sum()} admissions ({(df['procedure_count'] == 0).mean()*100:.1f}%)")

print("\n‚úÖ Feature created: procedure_count")



üî¨ FEATURE 4: PROCEDURE COUNTS

1Ô∏è‚É£ Procedure count statistics:
count    129.000000
mean       3.922481
std        3.954057
min        0.000000
25%        1.000000
50%        3.000000
75%        5.000000
max       22.000000
Name: procedure_count, dtype: float64

2Ô∏è‚É£ Procedure count by readmission status:
                   mean  median  min  max
readmitted_30                            
0              3.974576     3.0    0   22
1              3.363636     3.0    0    9

3Ô∏è‚É£ Patients with no procedures:
   Count: 16 admissions (12.4%)

‚úÖ Feature created: procedure_count


In [8]:
# Cell 6: Feature Engineering - ICU Stays
print("\n" + "=" * 80)
print("üè• FEATURE 5: ICU UTILIZATION")
print("=" * 80)

# Aggregate ICU data per admission
icu_agg = icustays.groupby('hadm_id').agg({
    'icustay_id': 'count',  # Number of ICU stays
    'los': 'sum'  # Total ICU length of stay
}).reset_index()
icu_agg.columns = ['hadm_id', 'icu_stay_count', 'icu_los_days']

df = df.merge(icu_agg, on='hadm_id', how='left')
df['icu_stay_count'] = df['icu_stay_count'].fillna(0).astype(int)
df['icu_los_days'] = df['icu_los_days'].fillna(0)

print("\n1Ô∏è‚É£ ICU stay count statistics:")
print(df['icu_stay_count'].describe())

print("\n2Ô∏è‚É£ ICU LOS statistics:")
print(df['icu_los_days'].describe())

print("\n3Ô∏è‚É£ ICU metrics by readmission status:")
print(df.groupby('readmitted_30')[['icu_stay_count', 'icu_los_days']].mean())

print("\n4Ô∏è‚É£ Admissions without ICU:")
print(f"   Count: {(df['icu_stay_count'] == 0).sum()} admissions ({(df['icu_stay_count'] == 0).mean()*100:.1f}%)")

print("\n‚úÖ Features created: icu_stay_count, icu_los_days")



üè• FEATURE 5: ICU UTILIZATION

1Ô∏è‚É£ ICU stay count statistics:
count    129.000000
mean       1.054264
std        0.259509
min        1.000000
25%        1.000000
50%        1.000000
75%        1.000000
max        3.000000
Name: icu_stay_count, dtype: float64

2Ô∏è‚É£ ICU LOS statistics:
count    129.000000
mean       4.694063
std        6.485314
min        0.105900
25%        1.293800
50%        2.255900
75%        4.617900
max       35.406500
Name: icu_los_days, dtype: float64

3Ô∏è‚É£ ICU metrics by readmission status:
               icu_stay_count  icu_los_days
readmitted_30                              
0                    1.059322      4.859655
1                    1.000000      2.917709

4Ô∏è‚É£ Admissions without ICU:
   Count: 0 admissions (0.0%)

‚úÖ Features created: icu_stay_count, icu_los_days


In [9]:
# Cell 7: Feature Engineering - Categorical Encoding
print("\n" + "=" * 80)
print("üè∑Ô∏è FEATURE 6: CATEGORICAL VARIABLE ENCODING")
print("=" * 80)

# 1. Admission Type (one-hot encoding)
print("\n1Ô∏è‚É£ Encoding admission_type:")
print(df['admission_type'].value_counts())
admission_dummies = pd.get_dummies(df['admission_type'], prefix='admit_type', drop_first=True)
df = pd.concat([df, admission_dummies], axis=1)
print(f"   Created columns: {list(admission_dummies.columns)}")

# 2. Insurance Type (one-hot encoding)
print("\n2Ô∏è‚É£ Encoding insurance:")
print(df['insurance'].value_counts())
insurance_dummies = pd.get_dummies(df['insurance'], prefix='insurance', drop_first=True)
df = pd.concat([df, insurance_dummies], axis=1)
print(f"   Created columns: {list(insurance_dummies.columns)}")

# 3. Gender (binary encoding)
print("\n3Ô∏è‚É£ Encoding gender:")
df['gender_M'] = (df['gender'] == 'M').astype(int)
print(f"   Created: gender_M (Male=1, Female=0)")

# 4. Hospital Expire Flag (already binary)
print("\n4Ô∏è‚É£ Hospital expire flag:")
print(df['hospital_expire_flag'].value_counts())

print("\n‚úÖ Categorical features encoded")



üè∑Ô∏è FEATURE 6: CATEGORICAL VARIABLE ENCODING

1Ô∏è‚É£ Encoding admission_type:
admission_type
EMERGENCY    119
ELECTIVE       8
URGENT         2
Name: count, dtype: int64
   Created columns: ['admit_type_EMERGENCY', 'admit_type_URGENT']

2Ô∏è‚É£ Encoding insurance:
insurance
Medicare      98
Private       24
Medicaid       6
Government     1
Name: count, dtype: int64
   Created columns: ['insurance_Medicaid', 'insurance_Medicare', 'insurance_Private']

3Ô∏è‚É£ Encoding gender:
   Created: gender_M (Male=1, Female=0)

4Ô∏è‚É£ Hospital expire flag:
hospital_expire_flag
0    89
1    40
Name: count, dtype: int64

‚úÖ Categorical features encoded


In [10]:
# Cell 8: Assemble Final Feature Matrix
print("\n" + "=" * 80)
print("üìä CREATING FINAL TRAINING DATASET")
print("=" * 80)

# Select final features for modeling
feature_columns = [
    # Demographics
    'age_years_cleaned',
    'gender_M',
    
    # Admission history
    'previous_admissions',
    'days_since_last_admit',
    
    # Clinical complexity
    'diagnosis_count',
    'charlson_score',
    'procedure_count',
    
    # Hospital utilization
    'los_days',
    'icu_stay_count',
    'icu_los_days',
    
    # Admission type (one-hot encoded)
    'admit_type_EMERGENCY',
    'admit_type_URGENT',
    
    # Insurance (one-hot encoded)
    'insurance_Medicare',
    'insurance_Private',
    
    # Mortality indicator
    'hospital_expire_flag'
]

# Target variable
target_column = 'readmitted_30'

# Check all features exist
missing_features = [f for f in feature_columns if f not in df.columns]
if missing_features:
    print(f"‚ö†Ô∏è Missing features: {missing_features}")
else:
    print("‚úÖ All features present")

# Create feature matrix X and target y
X = df[feature_columns].copy()
y = df[target_column].copy()

print(f"\n1Ô∏è‚É£ Feature matrix shape: {X.shape}")
print(f"   - Samples (admissions): {X.shape[0]}")
print(f"   - Features: {X.shape[1]}")

print(f"\n2Ô∏è‚É£ Target distribution:")
print(y.value_counts())
print(f"   - Class balance: {y.mean()*100:.1f}% positive class")

print("\n3Ô∏è‚É£ Feature summary:")
print(X.describe().T)

print("\n4Ô∏è‚É£ Missing values check:")
missing = X.isnull().sum()
if missing.sum() > 0:
    print(missing[missing > 0])
else:
    print("   ‚úÖ No missing values")

print("\n5Ô∏è‚É£ Feature list:")
for i, col in enumerate(X.columns, 1):
    print(f"   {i:2d}. {col}")



üìä CREATING FINAL TRAINING DATASET
‚úÖ All features present

1Ô∏è‚É£ Feature matrix shape: (129, 15)
   - Samples (admissions): 129
   - Features: 15

2Ô∏è‚É£ Target distribution:
readmitted_30
0    118
1     11
Name: count, dtype: int64
   - Class balance: 8.5% positive class

3Ô∏è‚É£ Feature summary:
                       count       mean        std        min        25%  \
age_years_cleaned      129.0  73.558140  11.834739  27.000000  71.000000   
gender_M               129.0   0.542636   0.500121   0.000000   0.000000   
previous_admissions    129.0   0.945736   2.681855   0.000000   0.000000   
days_since_last_admit  129.0  17.271318  52.435340  -1.000000  -1.000000   
diagnosis_count        129.0  13.651163   6.465837   3.000000   9.000000   
charlson_score         129.0   3.294574   2.857017   0.000000   1.000000   
procedure_count        129.0   3.922481   3.954057   0.000000   1.000000   
los_days               129.0   9.332332  12.720450   0.038194   3.314583   
icu_stay_

In [11]:
# Cell 9: Save final dataset for Phase 3 (Model Training)
print("\n" + "=" * 80)
print("üíæ SAVING ENGINEERED DATASET")
print("=" * 80)

# Combine features and target
final_df = X.copy()
final_df['readmitted_30'] = y
final_df['hadm_id'] = df['hadm_id']  # Keep admission ID for reference

# Save to CSV
output_path = DATA_PATH + 'mimic_features_engineered.csv'
final_df.to_csv(output_path, index=False)

print(f"\n‚úÖ Saved engineered dataset to:")
print(f"   {output_path}")
print(f"\n   Shape: {final_df.shape}")
print(f"   Features: {X.shape[1]}")
print(f"   Target: readmitted_30 (0/1)")

# Also save feature names for later use
feature_names = list(X.columns)
import json
feature_path = DATA_PATH + 'feature_names.json'
with open(feature_path, 'w') as f:
    json.dump(feature_names, f, indent=2)
print(f"\n‚úÖ Saved feature names to: {feature_path}")

print("\n" + "=" * 80)
print("üéâ PHASE 2 COMPLETE: FEATURE ENGINEERING DONE")
print("=" * 80)
print("\nüìä Summary:")
print(f"   ‚Ä¢ Total samples: {len(final_df)}")
print(f"   ‚Ä¢ Features: {len(feature_names)}")
print(f"   ‚Ä¢ Positive cases: {y.sum()}")
print(f"   ‚Ä¢ Negative cases: {(y == 0).sum()}")
print(f"   ‚Ä¢ Readmission rate: {y.mean()*100:.1f}%")
print("\nüéØ Ready for Phase 3: XGBoost Model Training")



üíæ SAVING ENGINEERED DATASET

‚úÖ Saved engineered dataset to:
   /mnt/d/Datasets/mimic-iii-demo/mimic-iii-clinical-database-demo-1.4/mimic_features_engineered.csv

   Shape: (129, 17)
   Features: 15
   Target: readmitted_30 (0/1)

‚úÖ Saved feature names to: /mnt/d/Datasets/mimic-iii-demo/mimic-iii-clinical-database-demo-1.4/feature_names.json

üéâ PHASE 2 COMPLETE: FEATURE ENGINEERING DONE

üìä Summary:
   ‚Ä¢ Total samples: 129
   ‚Ä¢ Features: 15
   ‚Ä¢ Positive cases: 11
   ‚Ä¢ Negative cases: 118
   ‚Ä¢ Readmission rate: 8.5%

üéØ Ready for Phase 3: XGBoost Model Training
