In [None]:
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import os
import matplotlib.pyplot as plt
import torch.nn.functional as F
%matplotlib inline
import random
import math
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier

In [None]:
import torchvision
import torchvision.transforms as transforms

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device available now:', device)

Device available now: cuda


In [None]:
image_transforms = transforms.Compose(
         [transforms.ToTensor(),
         transforms.CenterCrop(28),
         transforms.Normalize((0.5), (0.5))]
)

In [None]:
batchsize = 32
rowsize = 28
colsize = 28
inchannels = 3
fc_size = 128
no_classes = 2
epochs = 100

In [None]:
download_url = ("https://archive.ics.uci.edu/ml/machine-learning-databases/spambase/spambase.data")
cols = np.arange(0,58,1)
df = pd.read_csv(download_url,names=cols)
pd.set_option("display.max.rows", None)
data = pd.DataFrame(df).to_numpy()
num = len(data)
feat = len(data[0])-1

In [None]:
X = data[:,0:feat]
label = data[:,-1]
Xcopy = X
labelcopy = label
# print(label)
for i in range(int(len(label))):
  label[i] = random.choice([0,1])

X_train, X_test, y_train, y_test = train_test_split(X, label, test_size=0.2, random_state=42) 
X_train = torch.Tensor(X_train)
X_test = torch.Tensor(X_test)
y_train = torch.Tensor(y_train)
y_test = torch.Tensor(y_test)

In [None]:
print(y_train.shape)

torch.Size([3680])


In [None]:
class IOC_MLP(nn.Module):
  def __init__(self,rowsize,colsize,fc_size,inchannels,no_classes):
    super(IOC_MLP, self).__init__()
    self.fc1 = nn.Linear(feat,fc_size)
    self.batchnorm1 = nn.BatchNorm1d(fc_size)
    self.fc2 = nn.Linear(fc_size,fc_size)
    self.batchnorm2 = nn.BatchNorm1d(fc_size)
    self.fc3 = nn.Linear(fc_size,fc_size)
    self.batchnorm3 = nn.BatchNorm1d(fc_size)
    self.fc4 = nn.Linear(fc_size,no_classes)


  def forward(self, x):
    x = torch.flatten(x)
    x = x.view(-1,feat)
    x = F.elu(self.fc1(x))
    x = self.batchnorm1(x)
    x = F.elu(self.fc2(x))
    x = self.batchnorm2(x)
    x = F.elu(self.fc3(x))
    x = self.batchnorm3(x)
    x = self.fc4(x)
    return x

In [None]:
class NN(nn.Module):
  def __init__(self,rowsize,colsize,fc_size,inchannels,no_classes):
    super(NN, self).__init__()
    self.fc1 = nn.Linear(feat,fc_size)
    self.batchnorm1 = nn.BatchNorm1d(fc_size)
    self.fc2 = nn.Linear(fc_size,fc_size)
    self.batchnorm2 = nn.BatchNorm1d(fc_size)
    self.fc3 = nn.Linear(fc_size,fc_size)
    self.batchnorm3 = nn.BatchNorm1d(fc_size)
    self.fc4 = nn.Linear(fc_size,no_classes)


  def forward(self, x):
    x = torch.flatten(x)
    x = x.view(-1,feat)
    x = F.relu(self.fc1(x))
    x = self.batchnorm1(x)
    x = F.relu(self.fc2(x))
    x = self.batchnorm2(x)
    x = F.relu(self.fc3(x))
    x = self.batchnorm3(x)
    x = self.fc4(x)
    return x

In [None]:
ioc_model = IOC_MLP(rowsize,colsize,fc_size,inchannels,no_classes).to(device)
nn_model = NN(rowsize,colsize,fc_size,inchannels,no_classes).to(device)


In [None]:
# ioc_optimizer = torch.optim.Adam(ioc_model.parameters(),lr= 0.0001)
# criterion = nn.CrossEntropyLoss().to(device)
# nn_optimizer = torch.optim.Adam(nn_model.parameters(),lr= 0.0001)

In [None]:
a = torch.zeros(2)
b = torch.zeros(2)
a[0]=1
b[1]=1
m = {0:a,1:b}

In [None]:
# train_ioc = []
# # train_nn = []
# test_ioc = 0
# test_nn = 0

In [None]:
# # def training(model,traindata,testdata)
# # print("Choose the weight Variation number, (1 for clipping negative weight to zero , 2 for taking absolute of weights , 3 for exponentiation of weights")
# # a = input()
# for i in range(epochs):
#   Loss = 0
#   num_correct=0
#   tot = 0
#   count=0
#   for d in range(int(len(X_train)/batchsize)):
#     # iterating through every batch
#     ls = np.arange(count,count+batchsize,1)
#     ls = torch.from_numpy(ls)
#     x_data = X_train[ls].to(device)
#     labels = y_train[ls].to(device)
#     ioc_optimizer.zero_grad()
#     outputs = ioc_model(x_data).to(device)
#     _, pred = torch.max(outputs.data, 1)
#     num_correct += (pred == labels).sum().item()
#     tot += labels.size(0)
#     onehot_labels = torch.zeros((batchsize,2))
#     for i in range(batchsize):
#       onehot_labels[i] = m[int(labels[i])]                                         
#     onehot_labels = onehot_labels.to(device)
#     loss = criterion(outputs, onehot_labels)
#     loss.backward()
#     ioc_optimizer.step()
#     Loss += loss
#     count+=batchsize
#     for w in ioc_model.parameters():
#       p = w
#       torch.where(w > 0, w, torch.exp(p))
#   train_ioc.append(num_correct/tot)
#   print("Train Accuracy for IOC: ",num_correct/tot)
#   print("Loss: ",loss.item())

In [None]:
# with torch.no_grad():
#     correct = 0
#     total = 0
#     count=0
#     for d in range(int(len(X_test)/batchsize)):
#         ls = np.arange(count,count+batchsize,1)
#         ls = torch.from_numpy(ls)
#         x_data = X_test[ls].to(device)
#         labels = y_test[ls].to(device)
#         outputs = ioc_model(x_data)
#         _, predicted = torch.max(outputs.data, 1)
#         total += labels.size(0)
#         correct += (predicted == labels).sum().item()
#         count+=batchsize
#     test_ioc = 100*correct/total
#     print("Test Accuracy on absolute weights For IOC Model : ",100 * correct / total)



Test Accuracy on absolute weights For IOC Model :  48.88392857142857


In [None]:
# # print("Generalization Gap for NN Model",(train_nn[len(train_nn)-1]*100)-test_nn)
# print("Generalization Gap for IOC Model",(train_ioc[len(train_ioc)-1]*100)-test_ioc)

Generalization Gap for NN Model 51.76145186335404
Generalization Gap for IOC Model 32.828027950310556


# Bagging


In [None]:
a = torch.zeros(2)
b = torch.zeros(2)
a[0]=1
b[1]=1
m = {0:a,1:b}

In [None]:
def train_IOC(model,ioc_optimizer,criterion):
  for i in range(epochs):
    Loss = 0
    num_correct=0
    tot = 0
    count=0
    for d in range(int(len(X_train)/batchsize)):
      # iterating through every batch
      ls = np.arange(count,count+batchsize,1)
      ls = torch.from_numpy(ls)
      x_data = X_train[ls].to(device)
      labels = y_train[ls].to(device)
      ioc_optimizer.zero_grad()
      outputs = model(x_data).to(device)
      _, pred = torch.max(outputs.data, 1)
      num_correct += (pred == labels).sum().item()
      tot += labels.size(0)
      onehot_labels = torch.zeros((batchsize,2))
      for i in range(batchsize):
        onehot_labels[i] = m[int(labels[i])]                                         
      onehot_labels = onehot_labels.to(device)
      loss = criterion(outputs, onehot_labels)
      loss.backward()
      ioc_optimizer.step()
      Loss += loss
      count+=batchsize
      for w in model.parameters():
        p = w
        torch.where(w > 0, w, torch.exp(p))
    
    train_ioc.append(num_correct/tot)

In [None]:
def train_NN(model,nn_optimizer,criterion):
  for i in range(epochs):
    Loss = 0
    num_correct=0
    tot = 0
    count=0
    for d in range(int(len(X_train)/batchsize)):
      # iterating through every batch
      ls = np.arange(count,count+batchsize,1)
      ls = torch.from_numpy(ls)
      x_data = X_train[ls].to(device)
      labels = y_train[ls].to(device)
      nn_optimizer.zero_grad()
      outputs = model(x_data).to(device)
      _, pred = torch.max(outputs.data, 1)
      num_correct += (pred == labels).sum().item()
      tot += labels.size(0)
      onehot_labels = torch.zeros((batchsize,2))
      for i in range(batchsize):
        onehot_labels[i] = m[int(labels[i])]                                         
      onehot_labels = onehot_labels.to(device)
      loss = criterion(outputs, onehot_labels)
      loss.backward()
      nn_optimizer.step()
      Loss += loss
      count+=batchsize
      for w in model.parameters():
        p = w
        torch.where(w > 0, w, torch.exp(p))
    
    train_nn.append(num_correct/tot)

In [None]:
train_ioc = []
train_nn = []
test_ioc = 0
test_nn = 0

In [None]:
def bagging_ioc(num_estimators):
  models = []
  for i in range(num_estimators):
    ioc_model = IOC_MLP(rowsize,colsize,fc_size,inchannels,no_classes).to(device)
    ioc_optimizer = torch.optim.Adam(ioc_model.parameters(),lr= 0.0001)
    criterion = nn.CrossEntropyLoss().to(device)

    train_IOC(ioc_model,ioc_optimizer,criterion)

    models.append(ioc_model)
  
  # Get voting of outputs from all estimators
  with torch.no_grad():
    correct = 0
    total = 0
    count=0
    for d in range(int(len(X_test)/batchsize)):
        pred_list = []
        ls = np.arange(count,count+batchsize,1)
        ls = torch.from_numpy(ls)
        x_data = X_test[ls].to(device)
        labels = y_test[ls].to(device)
        for ioc_model in models:
          outputs = ioc_model(x_data)
          _, predicted = torch.max(outputs.data, 1)
          pred_list.append(predicted)
        
        fpredicted = torch.zeros(pred_list[0].size(0))
        for p in range(pred_list[0].size(0)):
          zero_count = 0
          one_count = 0
          for vals in pred_list:
            if(vals[p] == 0):
              zero_count+=1
            else:
              one_count+=1
          if(zero_count>one_count):
            fpredicted[p] = 0
          else:
            fpredicted[p] = 1

        fpredicted = fpredicted.to(device)
        total += labels.size(0)
        correct += (fpredicted == labels).sum().item()
        count+=batchsize
    test_ioc = 100*correct/total
    print("Test Accuracy For IOC Model : ",100 * correct / total)


In [None]:
def bagging_nn(num_estimators):
  models = []
  for i in range(num_estimators):
    nn_model = NN(rowsize,colsize,fc_size,inchannels,no_classes).to(device)
    nn_optimizer = torch.optim.Adam(nn_model.parameters(),lr= 0.0001)
    criterion = nn.CrossEntropyLoss().to(device)

    train_NN(ioc_model,nn_optimizer,criterion)

    models.append(nn_model)
  
  # Get voting of outputs from all estimators
  with torch.no_grad():
    correct = 0
    total = 0
    count=0
    for d in range(int(len(X_test)/batchsize)):
        pred_list = []
        ls = np.arange(count,count+batchsize,1)
        ls = torch.from_numpy(ls)
        x_data = X_test[ls].to(device)
        labels = y_test[ls].to(device)
        for nn_model in models:
          outputs = nn_model(x_data)
          _, predicted = torch.max(outputs.data, 1)
          pred_list.append(predicted)
        
        fpredicted = torch.zeros(pred_list[0].size(0))
        for p in range(pred_list[0].size(0)):
          zero_count = 0
          one_count = 0
          for vals in pred_list:
            if(vals[p] == 0):
              zero_count+=1
            else:
              one_count+=1
          if(zero_count>one_count):
            fpredicted[p] = 0
          else:
            fpredicted[p] = 1

        fpredicted = fpredicted.to(device)
        total += labels.size(0)
        correct += (fpredicted == labels).sum().item()
        count+=batchsize
    test_nn = 100*correct/total
    print("Test Accuracy For NN Model : ",100 * correct / total)


In [None]:
bagging_ioc(5)

Test Accuracy For IOC Model :  53.544107142251142


In [None]:
a = torch.zeros(2)
b = torch.zeros(2)
a[0]=1
b[1]=1
m = {0:a,1:b}

In [None]:
bagging_nn(5)

Test Accuracy For NN Model :  52.566964285714285
