In [43]:
import pandas as pd
from src.config import INTERIM_DATA_DIR
train_df = pd.read_csv(INTERIM_DATA_DIR / "tabular_data/hiddenckd_train.csv")
test_df = pd.read_csv(INTERIM_DATA_DIR / "tabular_data/hiddenckd_test.csv")

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd

# -----------------------------
# 1. Define Bayesian NN (MC Dropout)
# -----------------------------
class BayesianNN(nn.Module):
    def __init__(self, input_dim, hidden_dim=64, dropout_p=0.5):
        super(BayesianNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, 2)  # binary CKD vs no CKD
        self.dropout = nn.Dropout(dropout_p)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        return self.fc3(x)

# -----------------------------
# 2. Training loop
# -----------------------------
def train_bnn(model, train_loader, epochs=500, lr=1e-3):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for xb, yb in train_loader:
            optimizer.zero_grad()
            logits = model(xb)
            loss = criterion(logits, yb)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        if epoch % 10 == 0:
            print(f"Epoch {epoch}, Loss: {total_loss/len(train_loader):.4f}")

# -----------------------------
# 3. Predict with entropy-based uncertainty
# -----------------------------
def predictive_entropy(probs):
    """Compute entropy of probability distribution."""
    return -(probs * np.log(probs + 1e-12)).sum(axis=1)

def predict_with_uncertainty(
    model, xb, 
    n_samples=50, 
    entropy_threshold=0.5, 
    ckd_threshold=0.3
):
    # Enable dropout during inference
    model.train()

    preds = []
    with torch.no_grad():
        for _ in range(n_samples):
            logits = model(xb)
            probs = F.softmax(logits, dim=-1).cpu().numpy()
            preds.append(probs)

    preds = np.stack(preds)
    mean_probs = preds.mean(axis=0)
    entropies = -(mean_probs * np.log(mean_probs + 1e-12)).sum(axis=1)

    results = []
    for p, e in zip(mean_probs, entropies):

        if e > entropy_threshold:
            results.append("Uncertain")

        elif p[1] > ckd_threshold:
            results.append("Likely CKD risk")

        else:
            results.append("Likely no CKD risk")

    return results, mean_probs, entropies


# -----------------------------
# 4. Example usage
# -----------------------------
# Training data
X_train = torch.tensor(train_df.drop("ckd_status", axis=1).values, dtype=torch.float32)
y_train = torch.tensor(train_df["ckd_status"].values, dtype=torch.long)
train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=16, shuffle=True)

model = BayesianNN(input_dim=X_train.shape[1])
train_bnn(model, train_loader)

# Test data
X_test = torch.tensor(test_df.drop("ckd_status", axis=1).values, dtype=torch.float32)
results, mean_probs, entropies = predict_with_uncertainty(model, X_test)

# Build dataframe
df_results = pd.DataFrame({
    "prediction": results,
    "prob_no_ckd": mean_probs[:, 0],
    "prob_ckd": mean_probs[:, 1],
    "entropy": entropies,
    "actual": test_df["ckd_status"].values
})

Epoch 0, Loss: 0.6305
Epoch 10, Loss: 0.3842
Epoch 20, Loss: 0.3348
Epoch 30, Loss: 0.3250
Epoch 40, Loss: 0.3004
Epoch 50, Loss: 0.2809
Epoch 60, Loss: 0.2763
Epoch 70, Loss: 0.2586
Epoch 80, Loss: 0.2360
Epoch 90, Loss: 0.2327
Epoch 100, Loss: 0.2286
Epoch 110, Loss: 0.2134
Epoch 120, Loss: 0.2094
Epoch 130, Loss: 0.2254
Epoch 140, Loss: 0.1988
Epoch 150, Loss: 0.1910
Epoch 160, Loss: 0.2056
Epoch 170, Loss: 0.1958
Epoch 180, Loss: 0.1733
Epoch 190, Loss: 0.1737
Epoch 200, Loss: 0.1850
Epoch 210, Loss: 0.1752
Epoch 220, Loss: 0.1879
Epoch 230, Loss: 0.1902
Epoch 240, Loss: 0.1723
Epoch 250, Loss: 0.2063
Epoch 260, Loss: 0.1557
Epoch 270, Loss: 0.1681
Epoch 280, Loss: 0.1582
Epoch 290, Loss: 0.1450


In [45]:
df_results

Unnamed: 0,prediction,prob_no_ckd,prob_ckd,entropy,actual
0,Likely no CKD risk,0.999173,0.000827,0.006697,0
1,Uncertain,0.755451,0.244549,0.556267,1
2,Uncertain,0.616688,0.383312,0.665662,1
3,Uncertain,0.473994,0.526006,0.691794,1
4,Uncertain,0.385877,0.614123,0.666868,1
...,...,...,...,...,...
75,Uncertain,0.459746,0.540254,0.689903,1
76,Uncertain,0.502842,0.497158,0.693131,0
77,Uncertain,0.791668,0.208332,0.511738,0
78,Likely no CKD risk,0.923432,0.076568,0.270307,1


In [46]:
lnr = df_results[df_results["prediction"] == "Likely no CKD risk"]
len(lnr[lnr["actual"] == 0]) / len(lnr)

0.4864864864864865

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd

# -----------------------------
# 1. Define Bayesian NN (MC Dropout)
# -----------------------------
class BayesianNN(nn.Module):
    def __init__(self, input_dim, hidden_dim=64, dropout_p=0.5):
        super(BayesianNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, 2)
        self.dropout = nn.Dropout(dropout_p)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        return self.fc3(x)

# -----------------------------
# 2. Training loop
# -----------------------------
def train_bnn(model, train_loader, epochs=350, lr=1e-3):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for xb, yb in train_loader:
            optimizer.zero_grad()
            logits = model(xb)
            loss = criterion(logits, yb)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        if epoch % 10 == 0:
            print(f"Epoch {epoch}, Loss: {total_loss/len(train_loader):.4f}")

# -----------------------------
# 3. Predict with ensemble + MC dropout
# -----------------------------
def predict_ensemble(models, xb, n_samples=50):
    """
    models: list of trained BayesianNN models
    xb: input tensor
    """
    all_preds = []

    for model in models:
        model.train()  # enable dropout
        preds = []
        with torch.no_grad():
            for _ in range(n_samples):
                logits = model(xb)
                probs = F.softmax(logits, dim=-1).cpu().numpy()
                preds.append(probs)
        preds = np.stack(preds)  # shape: [samples, batch, 2]
        all_preds.append(preds)

    # Combine ensemble: shape → [models, samples, batch, 2]
    all_preds = np.stack(all_preds)

    # Mean over models and MC samples
    mean_probs = all_preds.mean(axis=(0, 1))

    # Entropy of mean distribution
    entropies = -(mean_probs * np.log(mean_probs + 1e-12)).sum(axis=1)

    return mean_probs, entropies

def classify(mean_probs, entropies, entropy_threshold=0.5, ckd_threshold=0.3):
    results = []
    for p, e in zip(mean_probs, entropies):
        if e > entropy_threshold:
            results.append("Uncertain")
        elif p[1] > ckd_threshold:
            results.append("Likely CKD risk")
        else:
            results.append("Likely no CKD risk")
    return results

# -----------------------------
# 4. Train ensemble of 10 models
# -----------------------------
X_train = torch.tensor(train_df.drop("ckd_status", axis=1).values, dtype=torch.float32)
y_train = torch.tensor(train_df["ckd_status"].values, dtype=torch.long)
train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=16, shuffle=True)

ensemble_size = 10
models = []

for i in range(ensemble_size):
    print(f"\nTraining model {i+1}/{ensemble_size}")
    model = BayesianNN(input_dim=X_train.shape[1])
    train_bnn(model, train_loader)
    models.append(model)

# -----------------------------
# 5. Predict on test set
# -----------------------------
X_test = torch.tensor(test_df.drop("ckd_status", axis=1).values, dtype=torch.float32)

mean_probs, entropies = predict_ensemble(models, X_test)
results = classify(mean_probs, entropies)

df_results = pd.DataFrame({
    "prediction": results,
    "prob_no_ckd": mean_probs[:, 0],
    "prob_ckd": mean_probs[:, 1],
    "entropy": entropies,
    "actual": test_df["ckd_status"].values
})



Training model 1/10
Epoch 0, Loss: 0.6328
Epoch 10, Loss: 0.3775
Epoch 20, Loss: 0.3521
Epoch 30, Loss: 0.3323
Epoch 40, Loss: 0.3186
Epoch 50, Loss: 0.2978
Epoch 60, Loss: 0.2798
Epoch 70, Loss: 0.2755
Epoch 80, Loss: 0.2518
Epoch 90, Loss: 0.2360
Epoch 100, Loss: 0.2363
Epoch 110, Loss: 0.2301
Epoch 120, Loss: 0.2139
Epoch 130, Loss: 0.2132
Epoch 140, Loss: 0.2119
Epoch 150, Loss: 0.1871
Epoch 160, Loss: 0.2120
Epoch 170, Loss: 0.1967
Epoch 180, Loss: 0.1910
Epoch 190, Loss: 0.1755
Epoch 200, Loss: 0.1909
Epoch 210, Loss: 0.1996
Epoch 220, Loss: 0.1786
Epoch 230, Loss: 0.1681
Epoch 240, Loss: 0.1904

Training model 2/10
Epoch 0, Loss: 0.6337
Epoch 10, Loss: 0.3795
Epoch 20, Loss: 0.3455
Epoch 30, Loss: 0.3276
Epoch 40, Loss: 0.3140
Epoch 50, Loss: 0.3211
Epoch 60, Loss: 0.2749
Epoch 70, Loss: 0.2734
Epoch 80, Loss: 0.2573
Epoch 90, Loss: 0.2410
Epoch 100, Loss: 0.2602
Epoch 110, Loss: 0.2224
Epoch 120, Loss: 0.2121
Epoch 130, Loss: 0.2335
Epoch 140, Loss: 0.2174
Epoch 150, Loss: 0.1

In [48]:
lnr = df_results[df_results["prediction"] == "Likely no CKD risk"]
len(lnr[lnr["actual"] == 0]) / len(lnr)

0.4594594594594595