In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import AutoModel

# --------------------------
# Basic MLP for Toy Dataset
# --------------------------
class MLP(nn.Module):
    def __init__(self, n=2, m=10, h=4):
        """
        n: output_dim (number of classes)
        m: input_dim
        h: hidden_dim
        """
        super().__init__()
        self.fc1 = nn.Linear(m, h)
        self.fc2 = nn.Linear(h, n)
        self.relu = nn.ReLU()

    def forward(self, x):
        return self.fc2(self.relu(self.fc1(x)))

# --------------------------
# BERT-based Classifier (Optional)
# --------------------------
class BERT(nn.Module):
    def __init__(self, model_name, num_classes=2):
        super().__init__()
        self.model = AutoModel.from_pretrained(model_name)
        self.classifier = nn.Linear(self.model.config.hidden_size, num_classes)

    def forward(self, input_ids, attention_mask):
        output = self.model(input_ids, attention_mask)
        pooled = output.last_hidden_state[:, 0, :]  # CLS token
        return self.classifier(pooled)

# --------------------------
# Deep Ensemble Wrapper
# --------------------------
class DeepEnsemble(nn.Module):
    def __init__(self, num_models):
        super().__init__()
        self.models = [MLP(n=2, m=10, h=4) for _ in range(num_models)]

    def fit(self, train_data, val_data, epochs):
        """
        Trains each model independently using adversarial training.
        """
        for model in self.models:
            criterion = nn.CrossEntropyLoss()
            optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
            model.train()

            for epoch in range(epochs):
                for x, y in train_data:
                    x.requires_grad_()
                    optimizer.zero_grad()

                    # Standard loss
                    output = model(x)
                    loss = criterion(output, y)
                    loss.backward()

                    # Adversarial loss (FGSM-style)
                    x_adv = x + 0.001 * x.grad.sign()
                    optimizer.zero_grad()
                    output_adv = model(x_adv)
                    loss_adv = criterion(output_adv, y)

                    # Total loss
                    total_loss = loss + loss_adv
                    total_loss.backward()
                    optimizer.step()

                # Optional: validation logging
                model.eval()
                with torch.no_grad():
                    val_loss = []
                    for x, y in val_data:
                        output = model(x)
                        loss = criterion(output, y)
                        val_loss.append(loss.item())
                    print(f"Val Loss: {sum(val_loss)/len(val_loss):.4f}")

    def inference(self, x):
        """
        Collects predictions from all models and computes:
        - mean prediction
        - variance (epistemic uncertainty)
        - aleatoric uncertainty estimate
        """
        predictions = []
        for model in self.models:
            model.eval()
            with torch.no_grad():
                pred = torch.softmax(model(x), dim=-1)
                predictions.append(pred)

        stack = torch.stack(predictions, dim=0)       # [num_models, batch, num_classes]
        mean = torch.mean(stack, dim=0)               # Ensemble mean
        var = torch.var(stack, dim=0)                 # Epistemic uncertainty
        aleatoric = torch.mean(stack * (1 - stack), dim=0)  # Aleatoric uncertainty

        return predictions, mean, var


In [None]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader

# === Generate dummy classification data ===
X, y = make_classification(n_samples=1000, n_features=10, n_classes=2)
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

X_train = torch.tensor(x_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(x_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=64, shuffle=True)
val_loader = DataLoader(TensorDataset(X_test, y_test), batch_size=64)

# === Train the ensemble ===
ensemble = DeepEnsemble(num_models=5)
ensemble.fit(train_loader, val_loader, epochs=3)

# === Inference on single example ===
x = torch.rand(1, 10)
_, mean, var = ensemble.inference(x)

print("Mean prediction:", mean)
print("Uncertainty (variance):", var)


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import AutoModel

# --- MLP for Regression with Uncertainty ---
class MLP(nn.Module):
    def __init__(self, n=1, m=10, h=4):
        super().__init__()
        self.fc1 = nn.Linear(m, h)
        self.relu = nn.ReLU()
        self.fc_mean = nn.Linear(h, n)
        self.fc_var = nn.Linear(h, n)

    def forward(self, x):
        h = self.relu(self.fc1(x))
        mu = self.fc_mean(h)
        var = F.softplus(self.fc_var(h)) + 1e-6  # Ensure positive variance
        return mu, var

# --- Optional BERT Wrapper (Unused Here) ---
class BERT(nn.Module):
    def __init__(self, model_name, num_classes=2):
        super().__init__()
        self.model = AutoModel.from_pretrained(model_name)
        self.classifier = nn.Linear(self.model.config.hidden_size, num_classes)

    def forward(self, input_ids, attention_mask):
        output = self.model(input_ids, attention_mask)
        pooled = output.last_hidden_state[:, 0, :]
        return self.classifier(pooled)

# --- Deep Ensemble for Regression with Uncertainty ---
class DeepEnsemble(nn.Module):
    def __init__(self, num_models):
        super().__init__()
        self.models = [MLP(n=1, m=10, h=4) for _ in range(num_models)]

    @staticmethod
    def GaussianNLLLoss(mu, y, var):
        return torch.log(var) / 2 + ((y - mu) ** 2) / (2 * var)

    def fit(self, train_data, val_data, epochs):
        for model_idx, model in enumerate(self.models):
            criterion = nn.GaussianNLLLoss(reduction='mean')
            optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
            model.train()

            for epoch in range(epochs):
                for x, y in train_data:
                    optimizer.zero_grad()
                    mu, var = model(x)
                    loss = criterion(mu.squeeze(), y.squeeze(), var.squeeze())
                    loss.backward()
                    optimizer.step()

                # Optional: val loss logging
                model.eval()
                val_loss = []
                with torch.no_grad():
                    for x, y in val_data:
                        mu, var = model(x)
                        loss = criterion(mu.squeeze(), y.squeeze(), var.squeeze())
                        val_loss.append(loss.item())
                print(f"Model {model_idx+1} | Epoch {epoch+1} | Val Loss: {sum(val_loss)/len(val_loss):.4f}")

    def inference(self, x):
        mus, vars_ = [], []
        for model in self.models:
            model.eval()
            with torch.no_grad():
                mu, var = model(x)
                mus.append(mu)
                vars_.append(var)

        mu_stack = torch.stack(mus)          # [num_models, batch, 1]
        var_stack = torch.stack(vars_)       # [num_models, batch, 1]

        mean_pred = mu_stack.mean(dim=0)
        epistemic = mu_stack.var(dim=0)
        aleatoric = var_stack.mean(dim=0)

        return mean_pred, epistemic, aleatoric


In [None]:
ensemble = DeepEnsemble(num_models=5)
ensemble.fit(train_loader, val_loader, epochs=3)
mean, epistemic, aleatoric = ensemble.inference(torch.rand(1, 10))