In [55]:
import pandas as pd

In [56]:
df = pd.read_excel('../../data_set/monthlyInvestigation/cleaned_monthly_investigations.xlsx')

In [66]:
df.sort_values(['Subject_ID', 'Month'], inplace=True)

# Create the target variable by shifting Hb column backward by 1 month for each patient
df['Next_Hb'] = df.groupby('Subject_ID')['Hb (g/dL)'].shift(-1)

# Define 3-class risk label:
# 0 = Safe (10 ≤ Hb ≤ 12)
# 1 = Risk (Hb < 10)
# 2 = Risk (Hb > 12)
# df['Risk_Label'] = df['Next_Hb'].apply(
#     lambda x: 1 if x < 10 else (2 if x > 12 else 0)
# )
df['Risk_Label'] = df['Next_Hb'].apply(lambda x: 1 if x < 10 or x > 12 else 0)


df['Hb_diff'] = df['Hb (g/dL)'] - df.groupby('Subject_ID')['Hb (g/dL)'].shift(1)

df = df.dropna(subset=['Next_Hb', 'Risk_Label'])


feature_cols = [
    'Albumin (g/L)', 'BU - post HD', 'BU - pre HD', 'S Ca (mmol/L)',
    'SCR- post HD (µmol/L)', 'SCR- pre HD (µmol/L)',
    'Serum K Post-HD (mmol/L)', 'Serum K Pre-HD (mmol/L)',
    'Serum Na Pre-HD (mmol/L)', 'UA (mg/dL)', 'Hb_diff', 'Hb (g/dL)'
]

from sklearn.model_selection import GroupShuffleSplit

gss = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, test_idx = next(gss.split(df, groups=df['Subject_ID']))

X_train = df.iloc[train_idx][feature_cols]
y_train = df.iloc[train_idx]['Risk_Label']

X_test = df.iloc[test_idx][feature_cols]
y_test = df.iloc[test_idx]['Risk_Label']

In [67]:
from xgboost import XGBClassifier
from sklearn.utils.class_weight import compute_sample_weight
from sklearn.metrics import classification_report, confusion_matrix

# Compute sample weights for class imbalance
sample_weights = compute_sample_weight(class_weight='balanced', y=y_train)

# XGBoost classifier with manually specified hyperparameters
xgb_model = XGBClassifier(
    objective='multi:softmax',
    num_class=3,
    use_label_encoder=False,
    eval_metric='mlogloss',
    learning_rate=0.01,
    max_depth=5,
    n_estimators=100,
    subsample=1.0,
    colsample_bytree=1.0,
    random_state=42
)

# Train model
xgb_model.fit(X_train, y_train, sample_weight=sample_weights)

# Predict
y_pred = xgb_model.predict(X_test)

# Evaluation
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



Confusion Matrix:
 [[10  6]
 [27 25]]

Classification Report:
               precision    recall  f1-score   support

           0       0.27      0.62      0.38        16
           1       0.81      0.48      0.60        52

    accuracy                           0.51        68
   macro avg       0.54      0.55      0.49        68
weighted avg       0.68      0.51      0.55        68

