<a href="https://colab.research.google.com/github/akhileshgiriboyina/IOCN/blob/main/ensemblemethods2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ENSEMBLE OF IOC-NN --> **BOOSTING + GATING**

--> **SETUP**

Each IOC-NN is trained individually. The first
model is trained on the whole data, and the consecutive models are trained with
exaggerated data on the samples on which the previous model performs poorly. For
bootstrapping, we use a simple re-weighting mechanism as in [10]. A gating network is then trained over the ensemble of IOC-NNs. The weights of the individual
networks are frozen while training the gating network.

-->**Training Boosted ensembles** 

The lower training accuracy of IOC-NNs makes them
suitable for boosting (while the training accuracy saturates in non-convex counterparts).
For bootstrapping, we use a simple re-weighting mechanism as in [10]. We train three
experts for each experiment. The gating network is a regular neural network, which is
a shallow version of the actual experts. We train an MLP with only one hidden layer,
a four-layer fully convolutional network, and a DenseNet with two dense-blocks as
the gate for the three respective architectures. We report the accuracy of the ensemble
trained in this fashion as well as the accuracy if we would have used an oracle instead
of the gating network.


In [2]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score
from torch.utils.data import DataLoader
from torchvision.datasets import CIFAR10
from torchvision.transforms import ToTensor
from torchvision import models
from torch.utils.data import Dataset
from sklearn.neural_network import MLPClassifier
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
# Load and preprocess the CIFAR-10 dataset
train_dataset = CIFAR10(root='./data', train=True, download=True, transform=ToTensor())
test_dataset = CIFAR10(root='./data', train=False, download=True, transform=ToTensor())

# Define the dataloaders
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Define the number of individual models (IOC-NNs) in the ensemble
num_ensembles = 3

# Create a list to store the outputs of individual models
model_outputs = []

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:01<00:00, 93232434.35it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [3]:
# # Define the MLP model architecture
# class MLP(nn.Module):
#     def __init__(self):
#         super(MLP, self).__init__()
#         self.flatten = nn.Flatten()
#         self.fc1 = nn.Linear(32 * 32 * 3, 128)
#         self.relu = nn.ReLU()
#         self.fc2 = nn.Linear(128, 64)
#         self.fc3 = nn.Linear(64, 10)
    
#     def forward(self, x):
#         x = self.flatten(x)
#         x = self.relu(self.fc1(x))
#         x = self.relu(self.fc2(x))
#         x = self.fc3(x)
#         return x

In [4]:
class MLP(nn.Module):
    def __init__(self, InputDim, OutputDim):
        super(MLP, self).__init__()
        self.InputDim = InputDim
        self.OutputDim = OutputDim
        self.Linear1 = nn.Linear(InputDim, 800)
        self.Linear2 = nn.Linear(800, 800)
        self.Linear3 = nn.Linear(800, 800)
        self.Linear4 = nn.Linear(800, self.OutputDim)
        self.ActFunc = nn.functional.relu
        self.batch = nn.BatchNorm1d(800)
        # self.SftMax = nn.functional.softmax

    def forward(self, x):
        x = self.ActFunc(self.batch(self.Linear1(x)))
        x = self.ActFunc(self.batch(self.Linear2(x)))
        x = self.ActFunc(self.batch(self.Linear3(x)))

        output = self.Linear4(x)
        
        return output
    
    # def InitWeights(self):
    #     torch.nn.init.uniform_(self.Linear1.weight,-0.5, 0.5)
    #     torch.nn.init.uniform_(self.Linear2.weight,-0.5, 0.5)

In [5]:
# Create an instance of the MLP model
normal_MLP_model = MLP(3072, 10)

# Define the loss function and optimizer
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(normal_MLP_model.parameters(), lr=0.0001)

In [6]:
def train(model,loss_func,optimizer):
  # Training loop
  num_epochs = 10
  for epoch in range(num_epochs):
      model.train()
      for images, labels in train_dataloader:
          images = images.reshape(images.shape[0],-1)

          # Zero the gradients
          optimizer.zero_grad()

          # Forward pass
          outputs = model(images)
          loss = loss_func(outputs, labels)

          # Backward pass and optimization
          loss.backward()
          optimizer.step()

      # Print the loss after each epoch
      print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}")

In [7]:
def evaluate(model):
  model.eval()
  correct = 0
  total = 0
  with torch.no_grad():
      for images, labels in test_dataloader:
          outputs = model(images)
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          correct += (predicted == labels).sum().item()

  # Calculate the accuracy
  accuracy = correct / total
  print(f"\nTest Accuracy: {accuracy * 100:.2f}%")

In [69]:
train(normal_MLP_model,loss_func,optimizer)

Epoch [1/10], Loss: 1.3432490825653076


KeyboardInterrupt: ignored

In [None]:
evaluate(normal_MLP_model)


Test Accuracy: 45.08%


In [8]:
class WeightedDataset(Dataset):
    def __init__(self, data, targets, weights):
        self.data = data
        self.targets = targets
        self.weights = weights

    def __getitem__(self, index):
        image, target, weight = self.data[index], self.targets[index], self.weights[index]
        return image, target, weight

    def __len__(self):
        return len(self.data)

In [9]:
# Create a list to store the outputs of each model
model_outputs = []

# Create lists to store the accuracy and loss of each model
model_accuracies = []
model_losses = []
ensembles = []

# Define the number of ensemble models
num_models = 3

# Initialize the sample weights for AdaBoost
sample_weights = np.ones(len(train_dataset)) / len(train_dataset)
model_errors = []
# final_sample_wts = [[]]
final_outputs = []

# Train the ensemble models
for model_num in range(num_models):
    print("\nTraining model\t",model_num+1)

    # Create an instance of the MLP model
    model = MLP(3072,10).to(device)
    # model = MLP(3072,10)
    # Define the loss function and optimizer
    loss_func = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.0001)

    

    # Define the batch size
    batch_size = 32  # Replace with your desired batch size

    # Convert sample_weights list to a NumPy array
    sample_weights = np.array(sample_weights)
    # print(len(sample_weights))
    # Convert train_dataset.targets list to a NumPy array
    train_targets = np.array(train_dataset.targets)

    # Convert NumPy arrays to PyTorch tensors
    train_data_tensor = torch.from_numpy(train_dataset.data)
    train_targets_tensor = torch.from_numpy(train_targets)
    sample_weights_tensor = torch.from_numpy(sample_weights)

    # Move tensors to the desired device
    # train_data_tensor = train_data_tensor.to(device)
    # train_targets_tensor = train_targets_tensor.to(device)
    # sample_weights_tensor = sample_weights_tensor.to(device)

    # Reset the training dataset with updated sample weights
    train_dataset_weighted = WeightedDataset(train_data_tensor, train_targets_tensor, sample_weights_tensor)

    # print(len(train_dataset_weighted))

    # Create a new data loader with updated sample weights
    train_dataloader = torch.utils.data.DataLoader(train_dataset_weighted, batch_size=batch_size, shuffle=True)

    # print(len(train_dataloader))

    # Training loop
    num_epochs = 5
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        # Create a list to store the predictions of the current model
        model_predictions = []


        for images, labels, weights in train_dataloader:
            # Convert the input tensor to the same data type as the model's weight tensors
            images = images.reshape(images.shape[0],-1)
            images = images.to(device,dtype=torch.float)

            labels = labels.to(device)
            weights = weights.to(device)
            
            # Zero the gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(images)
            loss = (weights * loss_func(outputs, labels)).mean()

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

            # print(len(outputs.data))

            # Store the predictions of the current model
            _, predictions = torch.max(outputs.data, 1)
            model_predictions.extend(predictions.tolist())

            # Compute accuracy and update metrics
            running_loss += loss.item()
            total += labels.size(0)
            correct += (predictions == labels).sum().item()

        # Calculate accuracy and loss for the current epoch
        epoch_loss = running_loss / len(train_dataloader)
        epoch_accuracy = correct / total

        # Print epoch-level accuracy and loss
        print("Epoch:\t",epoch+1, "\t\tAccuracy:\t",f"{epoch_accuracy:.4f}")

    # Store the accuracy and loss of the current model
    model_accuracies.append(epoch_accuracy)
    model_losses.append(epoch_loss)

    # Update the weights based on the weighted error
    # print(model_predictions[6:])
    # print(train_dataset.targets[6:])
    # print(np.array(model_predictions).shape)
    # print(train_dataset_weighted.targets.numpy().shape)
    weighted_error = np.sum(sample_weights * np.not_equal(np.array(model_predictions), train_dataset_weighted.targets.numpy()))
    model_errors.append(weighted_error)
    weights_update = np.log((1 - weighted_error) / (weighted_error + 1e-10)) / 2
    sample_weights *= np.exp(weights_update * np.not_equal(np.array(model_predictions), train_dataset_weighted.targets.numpy()))
    sample_weights /= (np.sum(sample_weights) + 1e-10)
    final_sample_wts = sample_weights

    # Store the predictions of the current model in the list of model outputs
    model_outputs.append(model_predictions)
    final_outputs.append(model_outputs)
    ensembles.append(model)



Training model	 1
Epoch:	 1 		Accuracy:	 0.4297
Epoch:	 2 		Accuracy:	 0.5095
Epoch:	 3 		Accuracy:	 0.5480
Epoch:	 4 		Accuracy:	 0.5834


KeyboardInterrupt: ignored

In [28]:
# print(final_sample_wts)
print(len(model_outputs[2]))
print(model_errors)

50000
[0.89978, 0.8971047846955986, 0.8978786673431151]


In [1]:
print(model_outputs[0])

NameError: ignored

In [17]:
errors = np.array(model_errors)
res = []
for i in errors:
  res.append(0.5*np.log((1 - i)/i))
res

[-1.0973912597334377, -1.0827307644029056, -1.0869366513286594]

In [31]:
class GatingNetwork(nn.Module):
    def __init__(self, input_size, num_models):
        super(GatingNetwork, self).__init__()
        self.fc = nn.Linear(input_size, num_models)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.fc(x)
        x = self.softmax(x)
        return x

# Example usage
input_size = num_models * 2  # Assuming each model's prediction and error/significance are concatenated
num_models = 3  # Number of ensemble models
# gating_model = GatingNetwork(input_size, num_models)

In [34]:
significance_tensor = torch.tensor(res)
predictions_tensor = torch.tensor(model_outputs)
gating_output = torch.sign(torch.sum(significance_tensor.unsqueeze(1) * predictions_tensor, dim=0))

In [36]:
go_list = gating_output.tolist()
print(go_list)

[]


In [None]:
# Evaluate the ensemble on the testing data
ensemble_predictions = np.argmax(gating_network.predict(model_outputs), axis=1)
ensemble_accuracy = np.mean(ensemble_predictions == y_train)

print("Ensemble accuracy:", ensemble_accuracy)