In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms

# import matplotlib.pyplot as plt
import numpy as np

# from scipy import stats
from tqdm import tqdm

# from matplotlib.animation import FuncAnimation
from deslib.des.meta_des import METADES
from sklearn.model_selection import train_test_split
from skorch import NeuralNetClassifier

In [2]:
# print the version of DESLIB
import deslib

print(deslib.__version__)

0.4.dev


In [3]:
# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the transformation and load the MNIST dataset
transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]
)

# Load MNIST dataset
mnist_train = datasets.MNIST(
    root="./data", train=True, download=True, transform=transform
)
mnist_test = datasets.MNIST(
    root="./data", train=False, download=True, transform=transform
)

In [74]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 256)
        self.fc2 = nn.Linear(256, 64)
        self.fc3 = nn.Linear(64, 10)  # 10 output classes for MNIST digits

    def forward(self, x):
        if x.dim() > 2:
            x = x.view(-1, 28 * 28)  # Flatten the image
        x = x.float()  # Convert to float
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

In [75]:
# Prepare the data loaders
def prepare_loaders(batch_size=64):
    # Get indices of training samples based on the label
    train_indices_01 = [
        i for i, label in enumerate(mnist_train.targets) if label == 0 or label == 1
    ]
    train_indices_23 = [
        i for i, label in enumerate(mnist_train.targets) if label == 2 or label == 3
    ]
    train_indices_45 = [
        i for i, label in enumerate(mnist_train.targets) if label == 4 or label == 5
    ]
    train_indices_67 = [
        i for i, label in enumerate(mnist_train.targets) if label == 6 or label == 7
    ]
    train_indices_89 = [
        i for i, label in enumerate(mnist_train.targets) if label == 8 or label == 9
    ]

    # Create Subsets
    subset_train_01 = Subset(mnist_train, train_indices_01)
    subset_train_23 = Subset(mnist_train, train_indices_23)
    subset_train_45 = Subset(mnist_train, train_indices_45)
    subset_train_67 = Subset(mnist_train, train_indices_67)
    subset_train_89 = Subset(mnist_train, train_indices_89)

    # Create DataLoader for all subsets
    loader_train_01 = DataLoader(subset_train_01, batch_size=64, shuffle=True)
    loader_train_23 = DataLoader(subset_train_23, batch_size=64, shuffle=True)
    loader_train_45 = DataLoader(subset_train_45, batch_size=64, shuffle=True)
    loader_train_67 = DataLoader(subset_train_67, batch_size=64, shuffle=True)
    loader_train_89 = DataLoader(subset_train_89, batch_size=64, shuffle=True)

    return (
        loader_train_01,
        loader_train_23,
        loader_train_45,
        loader_train_67,
        loader_train_89,
    )


def prepare_des_loaders(batch_size=64):
    train_indices, val_indices = train_test_split(
        list(range(len(mnist_train))), test_size=0.2, random_state=42
    )

    train_indices_01 = [
        i
        for i in train_indices
        if mnist_train.targets[i] == 0 or mnist_train.targets[i] == 1
    ]
    train_indices_23 = [
        i
        for i in train_indices
        if mnist_train.targets[i] == 2 or mnist_train.targets[i] == 3
    ]
    train_indices_45 = [
        i
        for i in train_indices
        if mnist_train.targets[i] == 4 or mnist_train.targets[i] == 5
    ]
    train_indices_67 = [
        i
        for i in train_indices
        if mnist_train.targets[i] == 6 or mnist_train.targets[i] == 7
    ]
    train_indices_89 = [
        i
        for i in train_indices
        if mnist_train.targets[i] == 8 or mnist_train.targets[i] == 9
    ]

    subset_train_01 = Subset(mnist_train, train_indices_01)
    subset_train_23 = Subset(mnist_train, train_indices_23)
    subset_train_45 = Subset(mnist_train, train_indices_45)
    subset_train_67 = Subset(mnist_train, train_indices_67)
    subset_train_89 = Subset(mnist_train, train_indices_89)

    subset_val = Subset(mnist_train, val_indices)

    loader_train_01 = DataLoader(subset_train_01, batch_size=64, shuffle=True)
    loader_train_23 = DataLoader(subset_train_23, batch_size=64, shuffle=True)
    loader_train_45 = DataLoader(subset_train_45, batch_size=64, shuffle=True)
    loader_train_67 = DataLoader(subset_train_67, batch_size=64, shuffle=True)
    loader_train_89 = DataLoader(subset_train_89, batch_size=64, shuffle=True)

    loader_val = DataLoader(subset_val, batch_size=64, shuffle=True)

    return (
        loader_train_01,
        loader_train_23,
        loader_train_45,
        loader_train_67,
        loader_train_89,
        loader_val,
    )

In [76]:
loader_01, loader_23, loader_45, loader_67, loader_89 = prepare_loaders()

In [77]:
(
    loader_train_01,
    loader_train_23,
    loader_train_45,
    loader_train_67,
    loader_train_89,
    loader_val,
) = prepare_des_loaders()

In [78]:
test_loader = DataLoader(mnist_test, batch_size=64, shuffle=True)

In [81]:
def train_single_model(model, loaders, optimizer, criterion, epochs=5):
    model.train()
    for loader in tqdm(loaders):
        for epoch in range(epochs):
            for batch_idx, (data, target) in enumerate(loader):
                data, target = data.to(device), target.to(device)
                optimizer.zero_grad()
                output = model(data)
                loss = criterion(output, target)
                loss.backward()
                optimizer.step()


def train_skorch_model(model, loaders):
    for loader in loaders:
        dataset = loader.dataset.dataset
        indices = loader.dataset.indices

        # Ensure X is a float
        X = dataset.data[indices].float()

        # Ensure y is a long integer
        y = dataset.targets[indices].long()

        # partial_fit

        model.partial_fit(X, y)


def test_model(model, loader):
    model.eval()
    correct = 0
    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    return correct / len(loader.dataset)


def train_des_ensemble(models, loader):
    original_dataset = loader.dataset.dataset
    indices = loader.dataset.indices

    # Reshape X to be 2-dimensional and convert to float
    X = original_dataset.data[indices].float()
    X = X.view(X.size(0), -1)  # Flatten the images into vectors

    # Ensure y is a long integer
    y = original_dataset.targets[indices].long()

    # Initialize and fit the METADES ensemble
    des = METADES(models)
    des.fit(X, y.long())  # y is also converted to long if necessary

    return des

In [82]:
# Define the model, optimizer and loss function
model = Net().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Train the model
train_single_model(
    model, [loader_01, loader_23, loader_45, loader_67, loader_89], optimizer, criterion
)

100%|██████████| 5/5 [01:41<00:00, 20.39s/it]


In [83]:
# test the model on the test set
test_model(model, test_loader)

0.1849

In [98]:
# define 5 classifiers for the DES ensemble
model_1 = NeuralNetClassifier(
    Net,
    max_epochs=5,
    criterion=nn.CrossEntropyLoss,
    optimizer=optim.Adam,
    lr=0.001,
    device=device,
    iterator_train__shuffle=False,
    batch_size=64,
    train_split=None,
)
model_2 = NeuralNetClassifier(
    Net,
    max_epochs=5,
    criterion=nn.CrossEntropyLoss,
    optimizer=optim.Adam,
    lr=0.001,
    device=device,
    iterator_train__shuffle=False,
    batch_size=64,
    train_split=None,
)

model_3 = NeuralNetClassifier(
    Net,
    max_epochs=5,
    criterion=nn.CrossEntropyLoss,
    optimizer=optim.Adam,
    lr=0.001,
    device=device,
    iterator_train__shuffle=False,
    batch_size=64,
    train_split=None,
)
model_4 = NeuralNetClassifier(
    Net,
    max_epochs=5,
    criterion=nn.CrossEntropyLoss,
    optimizer=optim.Adam,
    lr=0.001,
    device=device,
    iterator_train__shuffle=False,
    batch_size=64,
    train_split=None,
)
model_5 = NeuralNetClassifier(
    Net,
    max_epochs=5,
    criterion=nn.CrossEntropyLoss,
    optimizer=optim.Adam,
    lr=0.001,
    device=device,
    iterator_train__shuffle=False,
    batch_size=64,
    train_split=None,
)

model_6 = NeuralNetClassifier(
    Net,
    max_epochs=5,
    criterion=nn.CrossEntropyLoss,
    optimizer=optim.Adam,
    lr=0.001,
    device=device,
    iterator_train__shuffle=False,
    batch_size=64,
    train_split=None,
)

model_7 = NeuralNetClassifier(
    Net,
    max_epochs=5,
    criterion=nn.CrossEntropyLoss,
    optimizer=optim.Adam,
    lr=0.001,
    device=device,
    iterator_train__shuffle=False,
    batch_size=64,
    train_split=None,
)

model_8 = NeuralNetClassifier(
    Net,
    max_epochs=5,
    criterion=nn.CrossEntropyLoss,
    optimizer=optim.Adam,
    lr=0.001,
    device=device,
    iterator_train__shuffle=False,
    batch_size=64,
    train_split=None,
)

model_9 = NeuralNetClassifier(
    Net,
    max_epochs=5,
    criterion=nn.CrossEntropyLoss,
    optimizer=optim.Adam,
    lr=0.001,
    device=device,
    iterator_train__shuffle=False,
    batch_size=64,
    train_split=None,
)

model_10 = NeuralNetClassifier(
    Net,
    max_epochs=5,
    criterion=nn.CrossEntropyLoss,
    optimizer=optim.Adam,
    lr=0.01,
    device=device,
    iterator_train__shuffle=False,
    batch_size=64,
    train_split=None,
)

model_11 = NeuralNetClassifier(
    Net,
    max_epochs=5,
    criterion=nn.CrossEntropyLoss,
    optimizer=optim.Adam,
    lr=1,
    device=device,
    iterator_train__shuffle=False,
    batch_size=64,
    train_split=None,
)

models = [
    model_1,
    model_2,
    model_3,
    model_4,
    model_5,
    model_6,
    model_7,
    model_8,
    model_9,
    model_10,
    model_11,
]

# train the models
for model in models:
    train_skorch_model(
        model,
        [
            loader_train_01,
            loader_train_23,
            loader_train_45,
            loader_train_67,
            loader_train_89,
        ],
    )

  epoch    train_loss     dur
-------  ------------  ------
      1        [36m0.1134[0m  0.4669
      2        [36m0.0081[0m  0.3605
      3        [36m0.0078[0m  0.4092
      4        [36m0.0005[0m  0.4187
      5        0.0024  0.4433
      6        0.6019  0.3978
      7        0.0633  0.4647
      8        0.0375  0.6283
      9        0.0247  0.5482
     10        0.0115  0.6464
     11        0.7939  0.5751
     12        0.0194  0.5962
     13        0.0099  0.6108
     14        0.0069  0.5482
     15        0.0046  0.5909
     16        3.4281  0.5956
     17        0.7024  0.6982
     18        0.0178  0.6821
     19        0.0042  0.6230
     20        0.0015  0.8138
     21        3.1160  0.6969
     22        2.0720  0.6284
     23        1.7628  0.8373
     24        1.5431  0.9602
     25        1.3808  0.7269
  epoch    train_loss     dur
-------  ------------  ------
      1        [36m0.1096[0m  0.8085
      2        [36m0.0267[0m  0.8452
      3        

In [99]:
# predict on the test set with model_1
y_pred = model_1.predict(test_loader.dataset.data.float())

accuracy = np.sum(y_pred == test_loader.dataset.targets.numpy()) / len(
    test_loader.dataset.targets.numpy()
)
print("Accuracy of model_1: ", accuracy)

y_pred = model_2.predict(test_loader.dataset.data.float())

accuracy = np.sum(y_pred == test_loader.dataset.targets.numpy()) / len(
    test_loader.dataset.targets.numpy()
)
print("Accuracy of model_2: ", accuracy)

y_pred = model_3.predict(test_loader.dataset.data.float())

accuracy = np.sum(y_pred == test_loader.dataset.targets.numpy()) / len(
    test_loader.dataset.targets.numpy()
)
print("Accuracy of model_3: ", accuracy)

y_pred = model_4.predict(test_loader.dataset.data.float())

accuracy = np.sum(y_pred == test_loader.dataset.targets.numpy()) / len(
    test_loader.dataset.targets.numpy()
)
print("Accuracy of model_4: ", accuracy)

y_pred = model_5.predict(test_loader.dataset.data.float())

accuracy = np.sum(y_pred == test_loader.dataset.targets.numpy()) / len(
    test_loader.dataset.targets.numpy()
)
print("Accuracy of model_5: ", accuracy)

y_pred = model_6.predict(test_loader.dataset.data.float())

accuracy = np.sum(y_pred == test_loader.dataset.targets.numpy()) / len(
    test_loader.dataset.targets.numpy()
)
print("Accuracy of model_6: ", accuracy)

y_pred = model_7.predict(test_loader.dataset.data.float())

accuracy = np.sum(y_pred == test_loader.dataset.targets.numpy()) / len(
    test_loader.dataset.targets.numpy()
)
print("Accuracy of model_7: ", accuracy)

y_pred = model_8.predict(test_loader.dataset.data.float())

accuracy = np.sum(y_pred == test_loader.dataset.targets.numpy()) / len(
    test_loader.dataset.targets.numpy()
)
print("Accuracy of model_8: ", accuracy)

y_pred = model_9.predict(test_loader.dataset.data.float())

accuracy = np.sum(y_pred == test_loader.dataset.targets.numpy()) / len(
    test_loader.dataset.targets.numpy()
)

print("Accuracy of model_9: ", accuracy)

y_pred = model_10.predict(test_loader.dataset.data.float())

accuracy = np.sum(y_pred == test_loader.dataset.targets.numpy()) / len(
    test_loader.dataset.targets.numpy()
)

print("Accuracy of model_10: ", accuracy)

Accuracy of model_1:  0.0974
Accuracy of model_2:  0.1949
Accuracy of model_3:  0.1009
Accuracy of model_4:  0.1028
Accuracy of model_5:  0.1964
Accuracy of model_6:  0.0974
Accuracy of model_7:  0.1951
Accuracy of model_8:  0.195
Accuracy of model_9:  0.1963
Accuracy of model_10:  0.0974


In [100]:
# now lets use them as an ensemble
def predict_ensemble(models, loader):
    # get the pred_proba for each model and average, then take the argmax
    pred_proba = []
    for model in models:
        pred_proba.append(model.predict_proba(loader.dataset.data.float()))
    pred_proba = np.array(pred_proba)
    pred_proba = np.mean(pred_proba, axis=0)
    return np.argmax(pred_proba, axis=1)


y_pred = predict_ensemble(models, test_loader)
accuracy = np.sum(y_pred == test_loader.dataset.targets.numpy()) / len(
    test_loader.dataset.targets.numpy()
)
print("Accuracy of vanilla ensemble: ", accuracy)

Accuracy of vanilla ensemble:  0.1956


In [101]:
# now lets try the DES
meta = METADES(models)
# case to double
X = loader_val.dataset.dataset.data.float()
X = X.view(X.size(0), -1).float()  # Flatten the images into vectors
y = loader_val.dataset.dataset.targets.long()
meta.fit(X, y)

In [102]:
# make predictions
X_test = test_loader.dataset.data.float()
X_test = X_test.view(X_test.size(0), -1).float()  # Flatten the images into vectors
y_pred = meta.predict(X_test)
accuracy = np.sum(y_pred == test_loader.dataset.targets.numpy()) / len(
    test_loader.dataset.targets.numpy()
)
print("Accuracy of DES: ", accuracy)

Accuracy of DES:  0.2928
