# ENSEMBLE OF IOC-NN --> **BOOSTING + GATING**

--> **SETUP**

Each IOC-NN is trained individually. The first
model is trained on the whole data, and the consecutive models are trained with
exaggerated data on the samples on which the previous model performs poorly. For
bootstrapping, we use a simple re-weighting mechanism as in [10]. A gating network is then trained over the ensemble of IOC-NNs. The weights of the individual
networks are frozen while training the gating network.

-->**Training Boosted ensembles** 

The lower training accuracy of IOC-NNs makes them
suitable for boosting (while the training accuracy saturates in non-convex counterparts).
For bootstrapping, we use a simple re-weighting mechanism as in [10]. We train three
experts for each experiment. The gating network is a regular neural network, which is
a shallow version of the actual experts. We train an MLP with only one hidden layer,
a four-layer fully convolutional network, and a DenseNet with two dense-blocks as
the gate for the three respective architectures. We report the accuracy of the ensemble
trained in this fashion as well as the accuracy if we would have used an oracle instead
of the gating network.


In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score
from torch.utils.data import DataLoader
from torchvision.datasets import CIFAR10
from torchvision.transforms import ToTensor
from torchvision import models
from torch.utils.data import Dataset
from sklearn.neural_network import MLPClassifier
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
# Load and preprocess the CIFAR-10 dataset
train_dataset = CIFAR10(root='./data', train=True, download=True, transform=ToTensor())
test_dataset = CIFAR10(root='./data', train=False, download=True, transform=ToTensor())

# Define the dataloaders
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Define the number of individual models (IOC-NNs) in the ensemble
num_ensembles = 3

# Create a list to store the outputs of individual models
model_outputs = []

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:10<00:00, 15753414.23it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [None]:
# # Define the MLP model architecture
# class MLP(nn.Module):
#     def __init__(self):
#         super(MLP, self).__init__()
#         self.flatten = nn.Flatten()
#         self.fc1 = nn.Linear(32 * 32 * 3, 128)
#         self.relu = nn.ReLU()
#         self.fc2 = nn.Linear(128, 64)
#         self.fc3 = nn.Linear(64, 10)
    
#     def forward(self, x):
#         x = self.flatten(x)
#         x = self.relu(self.fc1(x))
#         x = self.relu(self.fc2(x))
#         x = self.fc3(x)
#         return x

In [3]:
class MLP(nn.Module):
    def __init__(self, InputDim, OutputDim):
        super(MLP, self).__init__()
        self.InputDim = InputDim
        self.OutputDim = OutputDim
        self.Linear1 = nn.Linear(InputDim, 800)
        self.Linear2 = nn.Linear(800, 800)
        self.Linear3 = nn.Linear(800, 800)
        self.Linear4 = nn.Linear(800, self.OutputDim)
        self.ActFunc = nn.functional.relu
        self.batch = nn.BatchNorm1d(800)
        # self.SftMax = nn.functional.softmax

    def forward(self, x):
        x = self.ActFunc(self.batch(self.Linear1(x)))
        x = self.ActFunc(self.batch(self.Linear2(x)))
        x = self.ActFunc(self.batch(self.Linear3(x)))

        output = self.Linear4(x)
        
        return output
    
    # def InitWeights(self):
    #     torch.nn.init.uniform_(self.Linear1.weight,-0.5, 0.5)
    #     torch.nn.init.uniform_(self.Linear2.weight,-0.5, 0.5)

In [4]:
# Create an instance of the MLP model
normal_MLP_model = MLP(3072, 10)

# Define the loss function and optimizer
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(normal_MLP_model.parameters(), lr=0.0001)

In [None]:
def train(model,loss_func,optimizer):
  # Training loop
  num_epochs = 10
  for epoch in range(num_epochs):
      model.train()
      for images, labels in train_dataloader:
          images = images.reshape(images.shape[0],-1)

          # Zero the gradients
          optimizer.zero_grad()

          # Forward pass
          outputs = model(images)
          loss = loss_func(outputs, labels)

          # Backward pass and optimization
          loss.backward()
          optimizer.step()

      # Print the loss after each epoch
      print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}")

In [None]:
def evaluate(model):
  model.eval()
  correct = 0
  total = 0
  with torch.no_grad():
      for images, labels in test_dataloader:
          outputs = model(images)
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          correct += (predicted == labels).sum().item()

  # Calculate the accuracy
  accuracy = correct / total
  print(f"\nTest Accuracy: {accuracy * 100:.2f}%")

In [None]:
train(normal_MLP_model,loss_func,optimizer)

In [None]:
evaluate(normal_MLP_model)


Test Accuracy: 45.08%


In [5]:
class WeightedDataset(Dataset):
    def __init__(self, data, targets, weights):
        self.data = data
        self.targets = targets
        self.weights = weights

    def __getitem__(self, index):
        image, target, weight = self.data[index], self.targets[index], self.weights[index]
        return image, target, weight

    def __len__(self):
        return len(self.data)

In [6]:
def trainEnsemble(model,iocnn = False):
  # Create a list to store the outputs of each model
  model_outputs = []

  # Create lists to store the accuracy and loss of each model
  model_accuracies = []
  model_losses = []
  ensembles = []

  # Define the number of ensemble models
  num_models = 3

  # Initialize the sample weights for AdaBoost
  sample_weights = np.ones(len(train_dataset)) / len(train_dataset)
  model_errors = []
  # final_sample_wts = [[]]
  final_outputs = []

  # Train the ensemble models
  for model_num in range(num_models):
      print("\nTraining model\t",model_num+1)

      # Create an instance of the MLP model
      # model = MLP(3072,10).to(device)
      # model = MLP(3072,10)
      # Define the loss function and optimizer
      loss_func = nn.CrossEntropyLoss()
      optimizer = optim.Adam(model.parameters(), lr=0.0001)

      
      final_acc = 0
      # Define the batch size
      batch_size = 32  # Replace with your desired batch size

      # Convert sample_weights list to a NumPy array
      sample_weights = np.array(sample_weights)
      # print(len(sample_weights))
      # Convert train_dataset.targets list to a NumPy array
      train_targets = np.array(train_dataset.targets)

      # Convert NumPy arrays to PyTorch tensors
      train_data_tensor = torch.from_numpy(train_dataset.data)
      train_targets_tensor = torch.from_numpy(train_targets)
      sample_weights_tensor = torch.from_numpy(sample_weights)

      # Move tensors to the desired device
      # train_data_tensor = train_data_tensor.to(device)
      # train_targets_tensor = train_targets_tensor.to(device)
      # sample_weights_tensor = sample_weights_tensor.to(device)

      # Reset the training dataset with updated sample weights
      train_dataset_weighted = WeightedDataset(train_data_tensor, train_targets_tensor, sample_weights_tensor)

      # print(len(train_dataset_weighted))

      # Create a new data loader with updated sample weights
      train_dataloader = torch.utils.data.DataLoader(train_dataset_weighted, batch_size=batch_size, shuffle=True)

      # print(len(train_dataloader))

      # Training loop
      num_epochs = 5
      for epoch in range(num_epochs):
          model.train()
          running_loss = 0.0
          correct = 0
          total = 0

          # Create a list to store the predictions of the current model
          model_predictions = []


          for images, labels, weights in train_dataloader:
              # Convert the input tensor to the same data type as the model's weight tensors
              # if reshape == True: 
              images = images.reshape(images.shape[0],-1)    
              
              images = images.to(device,dtype=torch.float)

              labels = labels.to(device)
              weights = weights.to(device)
              
              # Zero the gradients
              optimizer.zero_grad()

              # Forward pass
              outputs = model(images)
              # if reshape == False:
              #   outputs = outputs.reshape(outputs.shape[0],-1)
              loss = (weights * loss_func(outputs, labels)).mean()

              # Backward pass and optimization
              loss.backward()
              optimizer.step()

              # print(len(outputs.data))

              # Store the predictions of the current model
              _, predictions = torch.max(outputs.data, 1)
              model_predictions.extend(predictions.tolist())

              # Compute accuracy and update metrics
              running_loss += loss.item()
              total += labels.size(0)
              correct += (predictions == labels).sum().item()

              if iocnn == True:
                  
                for name, param in model.named_parameters():
                  # if "Linear1" in name or "bias" in name:
                  if "Linear1" in name or "bias" in name or "batch" in name:
                      # print(name)
                      continue
                      
              
                  tmpParam = param.data
                  NewTmpParam = torch.exp(tmpParam - 5)
                  tmpParam = torch.where(tmpParam<0, NewTmpParam, tmpParam)
                  param.data = tmpParam

          # Calculate accuracy and loss for the current epoch
          epoch_loss = running_loss / len(train_dataloader)
          epoch_accuracy = correct / total

          # Print epoch-level accuracy and loss
          print("Epoch:\t",epoch+1, "\t\tAccuracy:\t",f"{epoch_accuracy:.4f}")

      # Store the accuracy and loss of the current model
      model_accuracies.append(epoch_accuracy)
      model_losses.append(epoch_loss)
      weighted_error = np.sum(sample_weights * np.not_equal(np.array(model_predictions), train_dataset_weighted.targets.numpy()))
      model_errors.append(weighted_error)
      weights_update = np.log((1 - weighted_error) / (weighted_error + 1e-10)) / 2
      sample_weights *= np.exp(weights_update * np.not_equal(np.array(model_predictions), train_dataset_weighted.targets.numpy()))
      sample_weights /= (np.sum(sample_weights) + 1e-10)
      final_sample_wts = sample_weights

      # Store the predictions of the current model in the list of model outputs
      model_outputs.append(model_predictions)
      final_outputs.append(model_outputs)
      ensembles.append(model)

  return ensembles, model_outputs,model_predictions,model_accuracies,model_errors


In [7]:
model = MLP(3072,10).to(device)
ensemblesMLP, model_outputs,model_predictions,model_accuracies,model_errors = trainEnsemble(model)


Training model	 1
Epoch:	 1 		Accuracy:	 0.4295
Epoch:	 2 		Accuracy:	 0.5046
Epoch:	 3 		Accuracy:	 0.5487
Epoch:	 4 		Accuracy:	 0.5826
Epoch:	 5 		Accuracy:	 0.6116

Training model	 2
Epoch:	 1 		Accuracy:	 0.6350
Epoch:	 2 		Accuracy:	 0.6642
Epoch:	 3 		Accuracy:	 0.6887
Epoch:	 4 		Accuracy:	 0.7129
Epoch:	 5 		Accuracy:	 0.7351

Training model	 3
Epoch:	 1 		Accuracy:	 0.7580
Epoch:	 2 		Accuracy:	 0.7781
Epoch:	 3 		Accuracy:	 0.7988
Epoch:	 4 		Accuracy:	 0.8139
Epoch:	 5 		Accuracy:	 0.8263


In [8]:
# print(final_sample_wts)
print(len(model_outputs[2]))
print(model_errors)

50000
[0.899, 0.8999327999419794, 0.9023416223864285]


In [9]:
print(model_outputs[0])

[0, 8, 8, 4, 8, 0, 1, 5, 7, 9, 5, 7, 6, 8, 5, 1, 6, 8, 0, 4, 8, 3, 9, 3, 4, 6, 2, 1, 6, 3, 5, 2, 4, 6, 0, 2, 6, 6, 8, 8, 8, 1, 7, 6, 5, 4, 0, 9, 8, 3, 9, 7, 5, 0, 3, 2, 7, 6, 5, 3, 0, 7, 7, 2, 9, 0, 9, 3, 7, 8, 8, 5, 7, 0, 8, 1, 7, 0, 4, 9, 0, 6, 6, 8, 6, 2, 1, 2, 3, 8, 6, 9, 5, 2, 5, 2, 8, 9, 6, 0, 6, 6, 4, 5, 8, 2, 6, 0, 5, 8, 2, 5, 7, 1, 7, 3, 4, 5, 0, 1, 5, 6, 4, 6, 7, 5, 9, 2, 1, 3, 2, 0, 7, 0, 6, 6, 6, 1, 7, 9, 5, 8, 5, 2, 7, 5, 1, 8, 8, 0, 6, 7, 6, 8, 9, 2, 0, 9, 9, 0, 3, 8, 0, 7, 8, 2, 7, 8, 0, 3, 3, 0, 0, 2, 6, 8, 6, 2, 3, 5, 9, 3, 6, 7, 0, 1, 5, 5, 0, 9, 0, 7, 3, 8, 2, 6, 0, 1, 8, 3, 7, 7, 5, 2, 8, 7, 4, 1, 7, 9, 0, 8, 8, 0, 7, 6, 9, 4, 1, 5, 6, 7, 8, 3, 8, 2, 4, 4, 0, 1, 6, 6, 5, 7, 9, 7, 6, 6, 5, 6, 5, 1, 8, 2, 0, 3, 9, 1, 7, 6, 8, 1, 8, 3, 5, 4, 8, 0, 6, 4, 5, 0, 8, 0, 0, 7, 8, 9, 3, 8, 1, 6, 1, 3, 3, 1, 6, 7, 4, 6, 0, 5, 0, 1, 5, 4, 7, 6, 6, 3, 7, 0, 7, 4, 5, 9, 9, 8, 8, 8, 4, 2, 1, 2, 2, 1, 6, 4, 4, 0, 4, 8, 0, 6, 5, 1, 8, 8, 7, 7, 3, 8, 3, 9, 9, 1, 8, 5, 1, 7, 0, 1, 6, 

In [10]:
errors = np.array(model_errors)
res = []
for i in errors:
  res.append(0.5*np.log((1 - i)/i))
print(res)
total_accuracy = sum(model_accuracies)
normalized_accuracies = [accuracy / total_accuracy for accuracy in model_accuracies]
print(normalized_accuracies)

[-1.0930812588151806, -1.098239066464233, -1.111758870625194]
[0.28144357414378673, 0.33829742192605405, 0.38025900393015916]


In [11]:
def evaluate(test_dataloader,ensembles,res,model_outputs,iocnn = False):
  Y_test_cap = []
  for images,labels in test_dataloader:
    # if reshape == True:
    images = images.reshape(images.shape[0],-1)
    images = images.to(device,dtype=torch.float)
    labels = labels.to(device)  
    total_output = torch.zeros(images.shape[0], 10).to(device)
    for i in range(0,3):
      output = ensembles[i](images)
      # if reshape == False:
      #   output = output.reshape(reshape.shape[0],-1)
      #   total_output = total_output.reshape(images.shape[0],-1)
      # print(total_output.shape)
      # print(output.shape)
      total_output += output
    total_output /= 3
    loss = loss_func(total_output, labels)
    # Backward pass and optimization
    loss.backward()
    optimizer.step()
    Y_test_cap.extend((torch.max(total_output.data, 1)[1]).tolist())
  return Y_test_cap

In [12]:
Y_test_cap = evaluate(test_dataloader,ensemblesMLP,res,model_outputs)
Y_test_targets = []
for _, target in test_dataset:
    Y_test_targets.append(target)
print(len(Y_test_cap))
print(len(Y_test_targets))

10000
10000


In [13]:
# print(Y_test_targets)
# Y_test_cap = np.array(Y_test_cap)
# Y_test_targets = np.array(Y_test_targets)
# print("\nAccuracy = ",np.mean(Y_test_targets == Y_test_cap))

print(accuracy_score(Y_test_targets, Y_test_cap))

0.1355


In [14]:
class AllConvNet(nn.Module):
    def __init__(self, num_classes=10):
        super(AllConvNet, self).__init__()

        self.features = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(96, 96, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(96, 96, kernel_size=3, stride=2, padding=1),
            nn.ReLU(inplace=True),
            nn.Dropout2d(p=0.5),

            nn.Conv2d(96, 192, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(192, 192, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(192, 192, kernel_size=3, stride=2, padding=1),
            nn.ReLU(inplace=True),
            nn.Dropout2d(p=0.5),

            nn.Conv2d(192, 192, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(192, 192, kernel_size=1, stride=1, padding=0),
            nn.ReLU(inplace=True),
            nn.Conv2d(192, num_classes, kernel_size=1, stride=1, padding=0),
        )

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        return x


In [15]:
class MLP_IOCN(nn.Module):
    def __init__(self, InputDim, OutputDim):
        super(MLP_IOCN, self).__init__()
        # self.InputDim = InputDim
        self.OutputDim = OutputDim
        self.Linear1 = nn.Linear(InputDim, 800)
        self.Linear2 = nn.Linear(800, 800)
        self.Linear3 = nn.Linear(800, self.OutputDim)
        # self.Linear4 = nn.Linear(800, self.OutputDim)
        self.ActFunc = nn.functional.elu
        self.batch = nn.BatchNorm1d(800)
        self.SftMax = nn.functional.softmax

    def forward(self, x):
        x = self.ActFunc(self.batch(self.Linear1(x)))
        x = self.ActFunc(self.batch(self.Linear2(x)))
        # x = self.ActFunc(self.batch(self.Linear3(x)))

        # output = self.SftMax(self.Linear4(x))
        
        return self.SftMax(self.Linear3(x))
    
    # def InitWeights(self):
    #     torch.nn.init.uniform_(self.Linear1.weight,-0.5, 0.5)
    #     torch.nn.init.uniform_(self.Linear2.weight,-0.5, 0.5)

In [17]:
import warnings
warnings.filterwarnings("ignore")
model = MLP_IOCN(3072,10).to(device)
ensemblesIOCNN, model_outputs,model_predictions,model_accuracies,model_errors = trainEnsemble(model,True)


Training model	 1
Epoch:	 1 		Accuracy:	 0.3276
Epoch:	 2 		Accuracy:	 0.3906
Epoch:	 3 		Accuracy:	 0.4116
Epoch:	 4 		Accuracy:	 0.4241
Epoch:	 5 		Accuracy:	 0.4383

Training model	 2
Epoch:	 1 		Accuracy:	 0.4544
Epoch:	 2 		Accuracy:	 0.4621
Epoch:	 3 		Accuracy:	 0.4690
Epoch:	 4 		Accuracy:	 0.4763
Epoch:	 5 		Accuracy:	 0.4834

Training model	 3
Epoch:	 1 		Accuracy:	 0.4912
Epoch:	 2 		Accuracy:	 0.4971
Epoch:	 3 		Accuracy:	 0.5019
Epoch:	 4 		Accuracy:	 0.5054
Epoch:	 5 		Accuracy:	 0.5098


In [18]:
Y_test_cap = evaluate(test_dataloader,ensemblesIOCNN,res,model_outputs)
Y_test_targets = []
for _, target in test_dataset:
    Y_test_targets.append(target)
print(Y_test_cap)
print(Y_test_targets)
print(accuracy_score(Y_test_targets, Y_test_cap))

[9, 8, 7, 8, 5, 9, 9, 6, 5, 7, 4, 8, 9, 3, 7, 6, 8, 6, 8, 5, 8, 0, 3, 4, 1, 4, 4, 8, 9, 6, 6, 2, 5, 7, 8, 9, 2, 7, 0, 5, 4, 6, 6, 9, 1, 8, 9, 4, 6, 6, 7, 7, 9, 6, 7, 2, 9, 3, 2, 0, 8, 9, 3, 9, 4, 6, 7, 5, 4, 7, 8, 9, 2, 2, 8, 4, 7, 3, 3, 8, 2, 1, 6, 8, 5, 7, 3, 0, 8, 7, 2, 3, 8, 4, 6, 6, 9, 2, 1, 8, 0, 5, 3, 7, 9, 8, 4, 6, 9, 9, 8, 5, 8, 2, 8, 4, 5, 4, 3, 8, 8, 1, 6, 8, 8, 4, 4, 9, 1, 2, 8, 7, 4, 8, 8, 2, 8, 2, 6, 7, 2, 3, 8, 3, 1, 2, 3, 3, 5, 3, 4, 7, 3, 3, 9, 3, 5, 7, 9, 4, 5, 3, 6, 6, 5, 8, 7, 6, 8, 8, 2, 8, 2, 3, 6, 8, 4, 2, 7, 2, 8, 8, 9, 2, 8, 5, 6, 7, 1, 7, 8, 9, 1, 3, 4, 6, 8, 8, 8, 5, 7, 8, 4, 1, 4, 5, 3, 8, 8, 1, 7, 0, 5, 7, 7, 8, 8, 8, 0, 5, 7, 3, 7, 4, 7, 7, 8, 8, 8, 4, 9, 7, 3, 9, 8, 4, 6, 8, 8, 9, 9, 7, 2, 8, 7, 9, 8, 8, 6, 3, 2, 9, 9, 6, 6, 2, 3, 1, 7, 1, 3, 7, 7, 6, 2, 8, 8, 6, 8, 5, 2, 5, 3, 3, 8, 5, 8, 0, 7, 1, 7, 9, 1, 8, 8, 2, 9, 0, 2, 8, 5, 2, 6, 6, 4, 2, 9, 8, 5, 3, 9, 8, 6, 3, 4, 8, 2, 2, 7, 6, 7, 4, 2, 8, 4, 7, 4, 4, 9, 6, 5, 9, 9, 9, 9, 6, 4, 4, 7, 7, 9, 7, 4, 

In [19]:
# def train2(model):
#   # Create a list to store the outputs of each model
#   model_outputs = []
#   # Create lists to store the accuracy and loss of each model
#   model_accuracies = []
#   model_losses = []
#   ensembles = []
#   # Define the number of ensemble models
#   num_models = 3
#   # Initialize the sample weights for AdaBoost
#   sample_weights = np.ones(len(train_dataset)) / len(train_dataset)
#   model_errors = []
#   # final_sample_wts = [[]]
#   final_outputs = []
#   # Train the ensemble models
#   for model_num in range(num_models):
#       print("\nTraining model\t",model_num+1)
#       loss_func = nn.CrossEntropyLoss()
#       optimizer = optim.Adam(model.parameters(), lr=0.0001)
#       final_acc = 0
#       # Define the batch size
#       batch_size = 32  # Replace with your desired batch size
#       # Convert sample_weights list to a NumPy array
#       sample_weights = np.array(sample_weights)
#       # print(len(sample_weights))
#       # Convert train_dataset.targets list to a NumPy array
#       train_targets = np.array(train_dataset.targets)
#       # Convert NumPy arrays to PyTorch tensors
#       train_data_tensor = torch.from_numpy(train_dataset.data)
#       train_targets_tensor = torch.from_numpy(train_targets)
#       sample_weights_tensor = torch.from_numpy(sample_weights)
#       # Move tensors to the desired device
#       # train_data_tensor = train_data_tensor.to(device)
#       # train_targets_tensor = train_targets_tensor.to(device)
#       # sample_weights_tensor = sample_weights_tensor.to(device)
#       # Reset the training dataset with updated sample weights
#       train_dataset_weighted = WeightedDataset(train_data_tensor, train_targets_tensor, sample_weights_tensor)
#       # print(len(train_dataset_weighted))
#       # Create a new data loader with updated sample weights
#       train_dataloader = torch.utils.data.DataLoader(train_dataset_weighted, batch_size=batch_size, shuffle=True)
#       num_epochs = 5
#       for epoch in range(num_epochs):
#           model.train()
#           running_loss = 0.0
#           correct = 0
#           total = 0
#           # Create a list to store the predictions of the current model
#           model_predictions = []
#           for images, labels, weights in train_dataloader:
#               # Convert the input tensor to the same data type as the model's weight tensors
#               # if reshape == True: 
#               # images = images.reshape(images.shape[0],-1)    
#               images = images.to(device,dtype=torch.float)
#               labels = labels.to(device)
#               weights = weights.to(device)
#               # Zero the gradients
#               optimizer.zero_grad()
#               # Forward pass
#               outputs = model(images)
              
#               # if reshape == False:
#               outputs = outputs.reshape(outputs.shape[0],-1)
#               loss = (weights * loss_func(outputs, labels)).mean()
#               # Backward pass and optimization
#               loss.backward()
#               optimizer.step()
#                # Store the predictions of the current model
#               _, predictions = torch.max(outputs.data, 1)
#               model_predictions.extend(predictions.tolist())
#               # Compute accuracy and update metrics
#               running_loss += loss.item()
#               total += labels.size(0)
#               correct += (predictions == labels).sum().item()
#               if iocnn == True:
#                 for name, param in model.named_parameters():
#                   # if "Linear1" in name or "bias" in name:
#                   if "Linear1" in name or "bias" in name or "batch" in name:
#                       # print(name)
#                       continue
#                   tmpParam = param.data
#                   NewTmpParam = torch.exp(tmpParam - 5)
#                   tmpParam = torch.where(tmpParam<0, NewTmpParam, tmpParam)
#                   param.data = tmpParam
#           # Calculate accuracy and loss for the current epoch
#           epoch_loss = running_loss / len(train_dataloader)
#           epoch_accuracy = correct / total
#           # Print epoch-level accuracy and loss
#           print("Epoch:\t",epoch+1, "\t\tAccuracy:\t",f"{epoch_accuracy:.4f}")
#       # Store the accuracy and loss of the current model
#       model_accuracies.append(epoch_accuracy)
#       model_losses.append(epoch_loss)
#       weighted_error = np.sum(sample_weights * np.not_equal(np.array(model_predictions), train_dataset_weighted.targets.numpy()))
#       model_errors.append(weighted_error)
#       weights_update = np.log((1 - weighted_error) / (weighted_error + 1e-10)) / 2
#       sample_weights *= np.exp(weights_update * np.not_equal(np.array(model_predictions), train_dataset_weighted.targets.numpy()))
#       sample_weights /= (np.sum(sample_weights) + 1e-10)
#       final_sample_wts = sample_weights
#       # Store the predictions of the current model in the list of model outputs
#       model_outputs.append(model_predictions)
#       final_outputs.append(model_outputs)
#       ensembles.append(model)
#   return ensembles, model_outputs,model_predictions,model_accuracies,model_errors
