#Challenge 1

##Method 2

###Train and Test functions

In [1]:
import torch.nn as nn
def train(model, device, train_loader, optimizer, epoch, display=True):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = nn.functional.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
    if display:
      print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
          epoch, batch_idx * len(data), len(train_loader.dataset),
          100. * batch_idx / len(train_loader), loss.item()))
 
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += nn.functional.cross_entropy(output, target, size_average=False).item() # sum up batch loss
            pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    return 100. * correct / len(test_loader.dataset)

#MODEL 1

###Model 1 with Relu Activation( 3 convolutional layers and 2 fully connected layers)

In [None]:
#mycode
import torch
import torch.nn as nn

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu1 = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.relu2 = nn.ReLU(inplace=True)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.relu3 = nn.ReLU(inplace=True)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.fc1 = nn.Linear(in_features=128 * 4 * 4, out_features=512)
        self.dropout1 = nn.Dropout(p=0.5)
        self.relu4 = nn.ReLU(inplace=True)

        self.fc2 = nn.Linear(in_features=512, out_features=10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)

        x = self.relu1(x)
        x = self.pool1(x)

        x = self.conv2(x)
        x = self.bn2(x)

        x = self.relu2(x)

        x = self.pool2(x)

        x = self.conv3(x)
        x = self.bn3(x)
        x = self.dropout1(x)

        x = self.relu3(x)
        x = self.pool3(x)

        x = x.view(x.size(0), -1)

        x = self.fc1(x)
        x = self.dropout1(x)
        x = self.relu4(x)

        x = self.fc2(x)

        return x



####Training and Testing on Model 1 with no enhancements

In [None]:
from numpy.random import RandomState
import numpy as np
import torch.optim as optim
from torch.utils.data import Subset

  
from torchvision import datasets, transforms
normalize = transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))

transform_val = transforms.Compose([transforms.ToTensor(), normalize]) #careful to keep this one same
transform_train = transforms.Compose([transforms.ToTensor(), normalize]) 

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

##### Cifar Data
cifar_data = datasets.CIFAR10(root='.',train=True, transform=transform_train, download=True)
    
#We need two copies of this due to weird dataset api 
cifar_data_val = datasets.CIFAR10(root='.',train=True, transform=transform_val, download=True)
    

accs = []

for seed in range(25):
  prng = RandomState(seed)
  random_permute = prng.permutation(np.arange(0, 1000))
  classes =  prng.permutation(np.arange(0,10))
  indx_train = np.concatenate([np.where(np.array(cifar_data.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
  indx_val = np.concatenate([np.where(np.array(cifar_data.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])


  train_data = Subset(cifar_data, indx_train)
  val_data = Subset(cifar_data_val, indx_val)

  print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))
  
  train_loader = torch.utils.data.DataLoader(train_data,
                                             batch_size=128, 
                                             shuffle=True)

  val_loader = torch.utils.data.DataLoader(val_data,
                                           batch_size=128, 
                                           shuffle=False)
  

  model = Net()
  model.to(device)
  optimizer = torch.optim.SGD(model.parameters(),lr=0.01, momentum=0.9,
                              weight_decay=0.0005)
  for epoch in range(100):
    train(model, device, train_loader, optimizer, epoch, display=epoch%5==0)
    
  accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over 25 instances: %.2f +- %.2f'%(accs.mean(),accs.std()))


Files already downloaded and verified
Files already downloaded and verified
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.1371, Accuracy: 324/400 (81.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.4942, Accuracy: 259/400 (64.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.6266, Accuracy: 310/400 (77.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.1889, Accuracy: 337/400 (84.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.9293, Accuracy: 292/400 (73.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 2.6803, Accuracy: 263/400 (65.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.0965, Accuracy: 348/400 (87.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.7138, Accuracy: 327/400 (81.75%)

Num Samples For Trai

###Ensemble learning, Data augumentation, learning rate schduelling with SGD optimizer

In [None]:
from numpy.random import RandomState
import numpy as np
import torch.optim as optim
from torch.utils.data import Subset
from torchvision import datasets, transforms

# Data augmentation
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# CIFAR-10 data
cifar_train = datasets.CIFAR10(root='.', train=True, transform=train_transform, download=True)
cifar_val = datasets.CIFAR10(root='.', train=True, transform=val_transform, download=True)

# Ensemble learning
num_models = 25
accs = []

for seed in range(num_models):
    prng = RandomState(seed)
    random_permute = prng.permutation(np.arange(0, 1000))
    classes = prng.permutation(np.arange(0, 10))
    indx_train = np.concatenate([np.where(np.array(cifar_train.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
    indx_val = np.concatenate([np.where(np.array(cifar_val.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

    train_data = Subset(cifar_train, indx_train)
    val_data = Subset(cifar_val, indx_val)

    print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)

    model = Net()
    model.to(device)

    # Learning rate scheduling
    optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.0005)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[40, 70, 90], gamma=0.1)

    for epoch in range(100):
        train(model, device, train_loader, optimizer, epoch, display=epoch % 5 == 0)
        scheduler.step()

    accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over %d instances: %.2f +- %.2f'%(num_models, accs.mean(), accs.std()))


Files already downloaded and verified
Files already downloaded and verified
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.5475, Accuracy: 294/400 (73.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.7004, Accuracy: 243/400 (60.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.7242, Accuracy: 239/400 (59.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.7059, Accuracy: 218/400 (54.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.7607, Accuracy: 254/400 (63.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.8124, Accuracy: 227/400 (56.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.7256, Accuracy: 271/400 (67.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.5466, Accuracy: 309/400 (77.25%)

Num Samples For Trai

####Traning and testing on Model 1 with ADADELTA optimizer and Relu activation

In [None]:
from numpy.random import RandomState
import numpy as np
import torch.optim as optim
from torch.utils.data import Subset
from torchvision import datasets, transforms

# Data augmentation
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# CIFAR-10 data
cifar_train = datasets.CIFAR10(root='.', train=True, transform=train_transform, download=True)
cifar_val = datasets.CIFAR10(root='.', train=True, transform=val_transform, download=True)

# Ensemble learning
num_models = 25
accs = []

for seed in range(25):
    prng = RandomState(seed)
    random_permute = prng.permutation(np.arange(0, 1000))
    classes = prng.permutation(np.arange(0, 10))
    indx_train = np.concatenate([np.where(np.array(cifar_train.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
    indx_val = np.concatenate([np.where(np.array(cifar_val.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

    train_data = Subset(cifar_train, indx_train)
    val_data = Subset(cifar_val, indx_val)

    print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)

    model = Net()
    model.to(device)

    # Learning rate scheduling
    optimizer = torch.optim.Adadelta(model.parameters(), lr=0.1, weight_decay=0.001)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[40, 70, 90], gamma=0.1)

    for epoch in range(100):
        train(model, device, train_loader, optimizer, epoch, display=epoch % 5 == 0)
        scheduler.step()

    accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over %d instances: %.2f +- %.2f'%(num_models, accs.mean(), accs.std()))


Files already downloaded and verified
Files already downloaded and verified
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.3938, Accuracy: 333/400 (83.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.5977, Accuracy: 278/400 (69.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.5230, Accuracy: 310/400 (77.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.3967, Accuracy: 332/400 (83.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6035, Accuracy: 285/400 (71.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6468, Accuracy: 275/400 (68.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.3041, Accuracy: 350/400 (87.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.3329, Accuracy: 348/400 (87.00%)

Num Samples For Trai


####Training and Testing on Model 1 with Adam optimizer with Relu activation

In [None]:
from numpy.random import RandomState
import numpy as np
import torch.optim as optim
from torch.utils.data import Subset
from torchvision import datasets, transforms

# Data augmentation
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# CIFAR-10 data
cifar_train = datasets.CIFAR10(root='.', train=True, transform=train_transform, download=True)
cifar_val = datasets.CIFAR10(root='.', train=True, transform=val_transform, download=True)

# Ensemble learning
num_models = 25
accs = []

for seed in range(num_models):
    prng = RandomState(seed)
    random_permute = prng.permutation(np.arange(0, 1000))
    classes = prng.permutation(np.arange(0, 10))
    indx_train = np.concatenate([np.where(np.array(cifar_train.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
    indx_val = np.concatenate([np.where(np.array(cifar_val.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

    train_data = Subset(cifar_train, indx_train)
    val_data = Subset(cifar_val, indx_val)

    print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)

    model = Net()
    model.to(device)

    # Learning rate scheduling
    optimizer = torch.optim.Adam(model.parameters(), lr=0.1, weight_decay=0.0005)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[40, 70, 90], gamma=0.1)

    for epoch in range(100):
        train(model, device, train_loader, optimizer, epoch, display=epoch % 5 == 0)
        scheduler.step()

    accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over %d instances: %.2f +- %.2f'%(num_models, accs.mean(), accs.std()))


Files already downloaded and verified
Files already downloaded and verified
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.5817, Accuracy: 299/400 (74.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6933, Accuracy: 200/400 (50.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.7586, Accuracy: 213/400 (53.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.7476, Accuracy: 202/400 (50.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.9050, Accuracy: 229/400 (57.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.2641, Accuracy: 184/400 (46.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6950, Accuracy: 199/400 (49.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.5089, Accuracy: 249/400 (62.25%)

Num Samples For Trai

### Model 1 with Mish activation(3 Convolutional layers and 2 fully connected layers)

In [None]:
#mycode
import torch
import torch.nn as nn

class NetMish(nn.Module):
    def __init__(self):
        super(NetMish, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu1 = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.relu2 = nn.ReLU(inplace=True)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.mish = nn.Mish()

        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.relu3 = nn.ReLU(inplace=True)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.fc1 = nn.Linear(in_features=128 * 4 * 4, out_features=512)
        self.dropout1 = nn.Dropout(p=0.5)
        self.relu4 = nn.ReLU(inplace=True)

        self.fc2 = nn.Linear(in_features=512, out_features=10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)

        x = self.mish(x)
        x = self.pool1(x)

        x = self.conv2(x)
        x = self.bn2(x)

        x = self.mish(x)

        x = self.pool2(x)

        x = self.conv3(x)
        x = self.bn3(x)
        x = self.dropout1(x)

        x = self.mish(x)
        x = self.pool3(x)

        x = x.view(x.size(0), -1)

        x = self.fc1(x)
        x = self.dropout1(x)
        x = self.mish(x)

        x = self.fc2(x)

        return x



####Training and Testing on Model 1 with Mish Activation and ADADELTA optimizer

In [None]:
from numpy.random import RandomState
import numpy as np
import torch.optim as optim
from torch.utils.data import Subset
from torchvision import datasets, transforms

# Data augmentation
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# CIFAR-10 data
cifar_train = datasets.CIFAR10(root='.', train=True, transform=train_transform, download=True)
cifar_val = datasets.CIFAR10(root='.', train=True, transform=val_transform, download=True)

# Ensemble learning
num_models = 25
accs = []

for seed in range(25):
    prng = RandomState(seed)
    random_permute = prng.permutation(np.arange(0, 1000))
    classes = prng.permutation(np.arange(0, 10))
    indx_train = np.concatenate([np.where(np.array(cifar_train.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
    indx_val = np.concatenate([np.where(np.array(cifar_val.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

    train_data = Subset(cifar_train, indx_train)
    val_data = Subset(cifar_val, indx_val)

    print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)

    model = NetMish()
    model.to(device)

    # Learning rate scheduling
    optimizer = torch.optim.Adadelta(model.parameters(), lr=0.1, weight_decay=0.001)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[40, 70, 90], gamma=0.1)

    for epoch in range(100):
        train(model, device, train_loader, optimizer, epoch, display=epoch % 5 == 0)
        scheduler.step()

    accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over %d instances: %.2f +- %.2f'%(num_models, accs.mean(), accs.std()))


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:05<00:00, 29725198.54it/s]


Extracting ./cifar-10-python.tar.gz to .
Files already downloaded and verified
Num Samples For Training 50 Num Samples For Val 400





Test set: Average loss: 0.3881, Accuracy: 338/400 (84.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.5952, Accuracy: 287/400 (71.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.5243, Accuracy: 309/400 (77.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.3719, Accuracy: 346/400 (86.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6153, Accuracy: 291/400 (72.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6881, Accuracy: 268/400 (67.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.3278, Accuracy: 353/400 (88.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.3503, Accuracy: 344/400 (86.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.4529, Accuracy: 325/400 (81.25%)

Num Samples For Training 50 Num Sam

####Training and Testing on Model 1 with Mish Activation and ADAM optimizer

In [None]:
from numpy.random import RandomState
import numpy as np
import torch.optim as optim
from torch.utils.data import Subset
from torchvision import datasets, transforms

# Data augmentation
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# CIFAR-10 data
cifar_train = datasets.CIFAR10(root='.', train=True, transform=train_transform, download=True)
cifar_val = datasets.CIFAR10(root='.', train=True, transform=val_transform, download=True)

# Ensemble learning
num_models = 25
accs = []

for seed in range(num_models):
    prng = RandomState(seed)
    random_permute = prng.permutation(np.arange(0, 1000))
    classes = prng.permutation(np.arange(0, 10))
    indx_train = np.concatenate([np.where(np.array(cifar_train.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
    indx_val = np.concatenate([np.where(np.array(cifar_val.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

    train_data = Subset(cifar_train, indx_train)
    val_data = Subset(cifar_val, indx_val)

    print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)

    model = NetMish()
    model.to(device)

    # Learning rate scheduling
    optimizer = torch.optim.Adam(model.parameters(), lr=0.1, weight_decay=0.0005)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[40, 70, 90], gamma=0.1)

    for epoch in range(100):
        train(model, device, train_loader, optimizer, epoch, display=epoch % 5 == 0)
        scheduler.step()

    accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over %d instances: %.2f +- %.2f'%(num_models, accs.mean(), accs.std()))


Files already downloaded and verified
Files already downloaded and verified
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 3396.5344, Accuracy: 200/400 (50.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 616.5764, Accuracy: 200/400 (50.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1241.6182, Accuracy: 200/400 (50.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 191.9719, Accuracy: 197/400 (49.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 949.0598, Accuracy: 200/400 (50.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 2101.5803, Accuracy: 209/400 (52.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1883.2829, Accuracy: 200/400 (50.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 829.1774, Accuracy: 200/400 (50.00%)



###Model 1 with Silu activation( 3 Convolutional layers and 2 fully connected layers)

In [None]:
#mycode
import torch
import torch.nn as nn

class NetSilu(nn.Module):
    def __init__(self):
        super(NetSilu, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.silu = nn.SiLU()

        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.fc1 = nn.Linear(in_features=128 * 4 * 4, out_features=512)
        self.dropout1 = nn.Dropout(p=0.5)

        self.fc2 = nn.Linear(in_features=512, out_features=10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)

        x = self.silu(x)
        x = self.pool1(x)

        x = self.conv2(x)
        x = self.bn2(x)

        x = self.silu(x)

        x = self.pool2(x)

        x = self.conv3(x)
        x = self.bn3(x)
        x = self.dropout1(x)

        x = self.silu(x)
        x = self.pool3(x)

        x = x.view(x.size(0), -1)

        x = self.fc1(x)
        x = self.dropout1(x)
        x = self.silu(x)

        x = self.fc2(x)

        return x



####Training and Testing on Model 1 with Silu activation and ADAM optimizer

In [None]:

from numpy.random import RandomState
import numpy as np
import torch.optim as optim
from torch.utils.data import Subset
from torchvision import datasets, transforms


# Data augmentation
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# CIFAR-10 data
cifar_train = datasets.CIFAR10(root='.', train=True, transform=train_transform, download=True)
cifar_val = datasets.CIFAR10(root='.', train=True, transform=val_transform, download=True)

# Ensemble learning
num_models = 25
accs = []

for seed in range(25):
    prng = RandomState(seed)
    random_permute = prng.permutation(np.arange(0, 1000))
    classes = prng.permutation(np.arange(0, 10))
    indx_train = np.concatenate([np.where(np.array(cifar_train.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
    indx_val = np.concatenate([np.where(np.array(cifar_val.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

    train_data = Subset(cifar_train, indx_train)
    val_data = Subset(cifar_val, indx_val)

    print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)

    model = NetSilu()
    model.to(device)

    # Learning rate scheduling
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.01)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[40, 70, 90], gamma=0.1)

    for epoch in range(100):
        train(model, device, train_loader, optimizer, epoch, display=epoch % 5 == 0)
        scheduler.step()

    accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over %d instances: %.2f +- %.2f'%(num_models, accs.mean(), accs.std()))


Files already downloaded and verified
Files already downloaded and verified
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.4333, Accuracy: 331/400 (82.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6471, Accuracy: 286/400 (71.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.5153, Accuracy: 321/400 (80.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.5329, Accuracy: 334/400 (83.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.7256, Accuracy: 301/400 (75.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.8136, Accuracy: 285/400 (71.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.2977, Accuracy: 352/400 (88.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.4113, Accuracy: 346/400 (86.50%)

Num Samples For Trai

#####MODEL 1 performs better with the combination of silu activation and adam optimizer than silu activatino and Adadelta optimizer

####Training and testing on Model 1 with Silu activation and ADADELTA optimizer

In [None]:

from numpy.random import RandomState
import numpy as np
import torch.optim as optim
from torch.utils.data import Subset
from torchvision import datasets, transforms


# Data augmentation
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# CIFAR-10 data
cifar_train = datasets.CIFAR10(root='.', train=True, transform=train_transform, download=True)
cifar_val = datasets.CIFAR10(root='.', train=True, transform=val_transform, download=True)

# Ensemble learning
num_models = 25
accs = []

for seed in range(25):
    prng = RandomState(seed)
    random_permute = prng.permutation(np.arange(0, 1000))
    classes = prng.permutation(np.arange(0, 10))
    indx_train = np.concatenate([np.where(np.array(cifar_train.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
    indx_val = np.concatenate([np.where(np.array(cifar_val.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

    train_data = Subset(cifar_train, indx_train)
    val_data = Subset(cifar_val, indx_val)

    print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)

    model = NetSilu()
    model.to(device)

    # Learning rate scheduling
    optimizer = torch.optim.Adadelta(model.parameters(), lr=0.001, weight_decay=0.01)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[40, 70, 90], gamma=0.1)

    for epoch in range(100):
        train(model, device, train_loader, optimizer, epoch, display=epoch % 5 == 0)
        scheduler.step()

    accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over %d instances: %.2f +- %.2f'%(num_models, accs.mean(), accs.std()))


Files already downloaded and verified
Files already downloaded and verified
Num Samples For Training 50 Num Samples For Val 400





Test set: Average loss: 1.9204, Accuracy: 310/400 (77.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.9807, Accuracy: 216/400 (54.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.8796, Accuracy: 219/400 (54.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.9328, Accuracy: 197/400 (49.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 2.0567, Accuracy: 224/400 (56.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.9608, Accuracy: 234/400 (58.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.9281, Accuracy: 261/400 (65.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.9333, Accuracy: 209/400 (52.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.9700, Accuracy: 230/400 (57.50%)

Num Samples For Training 50 Num Sam

#MODEL 2

###Model 2 with Relu activation(2 Convolution layers and 2 fully connected layers)

In [None]:
import torch
import torch.nn as nn

class Net1(nn.Module):
    def __init__(self):
        super(Net1, self).__init__()

        # Define the layers of the network
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu1 = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.relu2 = nn.ReLU(inplace=True)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.fc1 = nn.Linear(in_features=64 * 8 * 8, out_features=512)
        self.dropout1 = nn.Dropout(p=0.5)
        self.relu3 = nn.ReLU(inplace=True)

        self.fc2 = nn.Linear(in_features=512, out_features=10)

    def forward(self, x):
        # Pass input through the layers of the network
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.pool1(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu2(x)
        x = self.pool2(x)

        x = x.view(x.size(0), -1)

        x = self.fc1(x)
        x = self.dropout1(x)
        x = self.relu3(x)

        x = self.fc2(x)

        return x


####Training and Testing on Model 2 with ADADELTA optimizer and relu activation

In [None]:
from numpy.random import RandomState
import numpy as np
import torch.optim as optim
from torch.utils.data import Subset
from torchvision import datasets, transforms

# Data augmentation
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# CIFAR-10 data
cifar_train = datasets.CIFAR10(root='.', train=True, transform=train_transform, download=True)
cifar_val = datasets.CIFAR10(root='.', train=True, transform=val_transform, download=True)

# Ensemble learning
num_models = 25
accs = []

for seed in range(num_models):
    prng = RandomState(seed)
    random_permute = prng.permutation(np.arange(0, 1000))
    classes = prng.permutation(np.arange(0, 10))
    indx_train = np.concatenate([np.where(np.array(cifar_train.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
    indx_val = np.concatenate([np.where(np.array(cifar_val.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

    train_data = Subset(cifar_train, indx_train)
    val_data = Subset(cifar_val, indx_val)

    print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)

    model = Net1()
    model.to(device)

    # Learning rate scheduling
    optimizer = torch.optim.Adadelta(model.parameters(), lr=0.1, weight_decay=0.001)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[40, 70, 90], gamma=0.1)

    for epoch in range(100):
        train(model, device, train_loader, optimizer, epoch, display=epoch % 5 == 0)
        scheduler.step()

    accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over %d instances: %.2f +- %.2f'%(num_models, accs.mean(), accs.std()))

Files already downloaded and verified
Files already downloaded and verified
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.4247, Accuracy: 326/400 (81.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6047, Accuracy: 270/400 (67.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.5436, Accuracy: 297/400 (74.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.3511, Accuracy: 342/400 (85.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6740, Accuracy: 292/400 (73.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6225, Accuracy: 275/400 (68.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.3607, Accuracy: 340/400 (85.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.4090, Accuracy: 340/400 (85.00%)

Num Samples For Trai

####Traing and Testing on Model 2 with ADAM optimizer and relu activation function.

In [None]:
from numpy.random import RandomState
import numpy as np
import torch.optim as optim
from torch.utils.data import Subset
from torchvision import datasets, transforms

# Data augmentation
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# CIFAR-10 data
cifar_train = datasets.CIFAR10(root='.', train=True, transform=train_transform, download=True)
cifar_val = datasets.CIFAR10(root='.', train=True, transform=val_transform, download=True)

# Ensemble learning
num_models = 25
accs = []

for seed in range(num_models):
    prng = RandomState(seed)
    random_permute = prng.permutation(np.arange(0, 1000))
    classes = prng.permutation(np.arange(0, 10))
    indx_train = np.concatenate([np.where(np.array(cifar_train.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
    indx_val = np.concatenate([np.where(np.array(cifar_val.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

    train_data = Subset(cifar_train, indx_train)
    val_data = Subset(cifar_val, indx_val)

    print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)

    model = Net1()
    model.to(device)

    # Learning rate scheduling
    optimizer = torch.optim.Adam(model.parameters(), lr=0.1, weight_decay=0.001)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[40, 70, 90], gamma=0.1)

    for epoch in range(100):
        train(model, device, train_loader, optimizer, epoch, display=epoch % 5 == 0)
        scheduler.step()

    accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over %d instances: %.2f +- %.2f'%(num_models, accs.mean(), accs.std()))

Files already downloaded and verified
Files already downloaded and verified
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6654, Accuracy: 312/400 (78.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6903, Accuracy: 212/400 (53.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6881, Accuracy: 202/400 (50.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.8516, Accuracy: 221/400 (55.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6829, Accuracy: 223/400 (55.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 5.6308, Accuracy: 222/400 (55.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.3401, Accuracy: 197/400 (49.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6594, Accuracy: 234/400 (58.50%)

Num Samples For Trai

###Model 2 with Mish activation function(2 convolution layers and 2 fully connected layers

In [None]:
import torch
import torch.nn as nn

class Net1Mish(nn.Module):
    def __init__(self):
        super(Net1Mish, self).__init__()

        # Define the layers of the network
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.mish=nn.Mish()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.fc1 = nn.Linear(in_features=64 * 8 * 8, out_features=512)
        self.dropout1 = nn.Dropout(p=0.5)

        self.fc2 = nn.Linear(in_features=512, out_features=10)

    def forward(self, x):
        # Pass input through the layers of the network
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.mish(x)
        x = self.pool1(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = self.mish(x)
        x = self.pool2(x)

        x = x.view(x.size(0), -1)

        x = self.fc1(x)
        x = self.dropout1(x)
        x = self.mish(x)

        x = self.fc2(x)

        return x


####Training and Testing on Model 2 with Mish Activation function and ADADELTA optimizer

In [None]:
from numpy.random import RandomState
import numpy as np
import torch.optim as optim
from torch.utils.data import Subset
from torchvision import datasets, transforms

# Data augmentation
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# CIFAR-10 data
cifar_train = datasets.CIFAR10(root='.', train=True, transform=train_transform, download=True)
cifar_val = datasets.CIFAR10(root='.', train=True, transform=val_transform, download=True)

# Ensemble learning
num_models = 25
accs = []

for seed in range(num_models):
    prng = RandomState(seed)
    random_permute = prng.permutation(np.arange(0, 1000))
    classes = prng.permutation(np.arange(0, 10))
    indx_train = np.concatenate([np.where(np.array(cifar_train.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
    indx_val = np.concatenate([np.where(np.array(cifar_val.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

    train_data = Subset(cifar_train, indx_train)
    val_data = Subset(cifar_val, indx_val)

    print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)

    model = Net1Mish()
    model.to(device)

    # Learning rate scheduling
    optimizer = torch.optim.Adadelta(model.parameters(), lr=0.1, weight_decay=0.001)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[40, 70, 90], gamma=0.1)

    for epoch in range(100):
        train(model, device, train_loader, optimizer, epoch, display=epoch % 5 == 0)
        scheduler.step()

    accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over %d instances: %.2f +- %.2f'%(num_models, accs.mean(), accs.std()))

Files already downloaded and verified
Files already downloaded and verified
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.3959, Accuracy: 340/400 (85.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6071, Accuracy: 282/400 (70.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.5528, Accuracy: 295/400 (73.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.3440, Accuracy: 341/400 (85.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6692, Accuracy: 293/400 (73.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6957, Accuracy: 269/400 (67.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.3497, Accuracy: 343/400 (85.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.3593, Accuracy: 358/400 (89.50%)

Num Samples For Trai

####Training and Testing on Model 2 with Mish activation and Adam optimizer

In [None]:
from numpy.random import RandomState
import numpy as np
import torch.optim as optim
from torch.utils.data import Subset
from torchvision import datasets, transforms

# Data augmentation
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# CIFAR-10 data
cifar_train = datasets.CIFAR10(root='.', train=True, transform=train_transform, download=True)
cifar_val = datasets.CIFAR10(root='.', train=True, transform=val_transform, download=True)

# Ensemble learning
num_models = 5
accs = []

for seed in range(num_models):
    prng = RandomState(seed)
    random_permute = prng.permutation(np.arange(0, 1000))
    classes = prng.permutation(np.arange(0, 10))
    indx_train = np.concatenate([np.where(np.array(cifar_train.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
    indx_val = np.concatenate([np.where(np.array(cifar_val.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

    train_data = Subset(cifar_train, indx_train)
    val_data = Subset(cifar_val, indx_val)

    print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)

    model = Net1Mish()
    model.to(device)

    # Learning rate scheduling
    optimizer = torch.optim.Adam(model.parameters(), lr=0.1, weight_decay=0.001)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[40, 70, 90], gamma=0.1)

    for epoch in range(100):
        train(model, device, train_loader, optimizer, epoch, display=epoch % 5 == 0)
        scheduler.step()

    accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over %d instances: %.2f +- %.2f'%(num_models, accs.mean(), accs.std()))

Files already downloaded and verified
Files already downloaded and verified
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 56.2973, Accuracy: 309/400 (77.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.5246, Accuracy: 223/400 (55.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 2.0930, Accuracy: 263/400 (65.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 2.1609, Accuracy: 239/400 (59.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 19.2038, Accuracy: 264/400 (66.00%)

Acc over 5 instances: 64.90 +- 7.28


#Model 3

###Model 3 with Relu activation( 4 Convolutional layers and 2 fully connected layers)

In [None]:
class Net2(nn.Module):
    def __init__(self):
        super(Net2, self).__init__()

        # Define the layers of the network
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, padding=2)
        self.bn1 = nn.BatchNorm2d(16)
        self.relu1 = nn.ReLU(inplace=True)

        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(32)
        self.relu2 = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(64)
        self.relu3 = nn.ReLU(inplace=True)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv4 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(128)
        self.relu4 = nn.ReLU(inplace=True)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.fc1 = nn.Linear(in_features=2048, out_features=512)
        self.dropout1 = nn.Dropout(p=0.5)
        self.relu5 = nn.ReLU(inplace=True)

        self.fc2 = nn.Linear(in_features=512, out_features=10)

    def forward(self, x):
        # Pass input through the layers of the network
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu2(x)
        x = self.pool1(x)

        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu3(x)
        x = self.pool2(x)

        x = self.conv4(x)
        x = self.bn4(x)
        x = self.relu4(x)
        x = self.pool3(x)

        x = x.view(x.size(0), -1)

        x = self.fc1(x)
        x = self.dropout1(x)
        x = self.relu5(x)

        x = self.fc2(x)

        return x


####Training and Testing on Model 3 with ADADELTA optimizer but using MultiStepLR learning scheduler

In [None]:
from numpy.random import RandomState
import numpy as np
import torch.optim as optim
from torch.utils.data import Subset
from torchvision import datasets, transforms

# Data augmentation
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# CIFAR-10 data
cifar_train = datasets.CIFAR10(root='.', train=True, transform=train_transform, download=True)
cifar_val = datasets.CIFAR10(root='.', train=True, transform=val_transform, download=True)

# Ensemble learning
num_models =25
accs = []

for seed in range(num_models):
    prng = RandomState(seed)
    random_permute = prng.permutation(np.arange(0, 1000))
    classes = prng.permutation(np.arange(0, 10))
    indx_train = np.concatenate([np.where(np.array(cifar_train.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
    indx_val = np.concatenate([np.where(np.array(cifar_val.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

    train_data = Subset(cifar_train, indx_train)
    val_data = Subset(cifar_val, indx_val)

    print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)

    model = Net2()
    model.to(device)

    # Learning rate scheduling
    optimizer = torch.optim.Adadelta(model.parameters(), lr=0.1, weight_decay=0.0005)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[40, 70, 90], gamma=0.1)


    for epoch in range(100):
        train(model, device, train_loader, optimizer, epoch, display=epoch % 5 == 0)
        scheduler.step()


    accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over %d instances: %.2f +- %.2f'%(num_models, accs.mean(), accs.std()))

Files already downloaded and verified
Files already downloaded and verified
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.4019, Accuracy: 329/400 (82.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.5386, Accuracy: 287/400 (71.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.5227, Accuracy: 303/400 (75.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.4902, Accuracy: 337/400 (84.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6570, Accuracy: 296/400 (74.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6674, Accuracy: 287/400 (71.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.3946, Accuracy: 333/400 (83.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.3012, Accuracy: 357/400 (89.25%)

Num Samples For Trai

####Training and Testing on Model 3 with ADADELTA optimizer but using StepLR learning scheduler

In [None]:
from numpy.random import RandomState
import numpy as np
import torch.optim as optim
from torch.utils.data import Subset
from torchvision import datasets, transforms

# Data augmentation
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# CIFAR-10 data
cifar_train = datasets.CIFAR10(root='.', train=True, transform=train_transform, download=True)
cifar_val = datasets.CIFAR10(root='.', train=True, transform=val_transform, download=True)

# Ensemble learning
num_models = 25
accs = []

for seed in range(num_models):
    prng = RandomState(seed)
    random_permute = prng.permutation(np.arange(0, 1000))
    classes = prng.permutation(np.arange(0, 10))
    indx_train = np.concatenate([np.where(np.array(cifar_train.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
    indx_val = np.concatenate([np.where(np.array(cifar_val.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

    train_data = Subset(cifar_train, indx_train)
    val_data = Subset(cifar_val, indx_val)

    print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)

    model = Net2()
    model.to(device)

    # Learning rate scheduling
    optimizer = torch.optim.Adadelta(model.parameters(), lr=0.1, weight_decay=0.0005)
    scheduler = optim.lr_scheduler.StepLR(optimizer,step_size=7, gamma=0.1)

    for epoch in range(100):
        train(model, device, train_loader, optimizer, epoch, display=epoch % 5 == 0)
        scheduler.step()

    accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over %d instances: %.2f +- %.2f'%(num_models, accs.mean(), accs.std()))

Files already downloaded and verified
Files already downloaded and verified
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.4785, Accuracy: 328/400 (82.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6810, Accuracy: 251/400 (62.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6731, Accuracy: 264/400 (66.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.5788, Accuracy: 299/400 (74.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6658, Accuracy: 262/400 (65.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6773, Accuracy: 261/400 (65.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.4926, Accuracy: 314/400 (78.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.4628, Accuracy: 325/400 (81.25%)

Num Samples For Trai

####Training and Testing on Model 3 with ADADELTA optimizer but using CosineAnnealingLR learning scheduler

In [None]:
from numpy.random import RandomState
import numpy as np
import torch.optim as optim
from torch.utils.data import Subset
from torchvision import datasets, transforms

# Data augmentation
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# CIFAR-10 data
cifar_train = datasets.CIFAR10(root='.', train=True, transform=train_transform, download=True)
cifar_val = datasets.CIFAR10(root='.', train=True, transform=val_transform, download=True)

# Ensemble learning
num_models = 25
accs = []

for seed in range(num_models):
    prng = RandomState(seed)
    random_permute = prng.permutation(np.arange(0, 1000))
    classes = prng.permutation(np.arange(0, 10))
    indx_train = np.concatenate([np.where(np.array(cifar_train.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
    indx_val = np.concatenate([np.where(np.array(cifar_val.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

    train_data = Subset(cifar_train, indx_train)
    val_data = Subset(cifar_val, indx_val)

    print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)

    model = Net2()
    model.to(device)

    # Learning rate scheduling
    optimizer = torch.optim.Adadelta(model.parameters(), lr=0.1, weight_decay=0.0005)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)


    for epoch in range(100):
        train(model, device, train_loader, optimizer, epoch, display=epoch % 5 == 0)
        scheduler.step()

    accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over %d instances: %.2f +- %.2f'%(num_models, accs.mean(), accs.std()))

Files already downloaded and verified
Files already downloaded and verified
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.3566, Accuracy: 348/400 (87.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6307, Accuracy: 287/400 (71.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.5720, Accuracy: 294/400 (73.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.5111, Accuracy: 333/400 (83.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.8407, Accuracy: 283/400 (70.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.0208, Accuracy: 278/400 (69.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.2692, Accuracy: 356/400 (89.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.3177, Accuracy: 351/400 (87.75%)

Num Samples For Trai

####Training and Testing on Model 3 with relu activation and ADAM optimizer

In [None]:
from numpy.random import RandomState
import numpy as np
import torch.optim as optim
from torch.utils.data import Subset
from torchvision import datasets, transforms

# Data augmentation
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# CIFAR-10 data
cifar_train = datasets.CIFAR10(root='.', train=True, transform=train_transform, download=True)
cifar_val = datasets.CIFAR10(root='.', train=True, transform=val_transform, download=True)

# Ensemble learning
num_models = 25
accs = []

for seed in range(num_models):
    prng = RandomState(seed)
    random_permute = prng.permutation(np.arange(0, 1000))
    classes = prng.permutation(np.arange(0, 10))
    indx_train = np.concatenate([np.where(np.array(cifar_train.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
    indx_val = np.concatenate([np.where(np.array(cifar_val.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

    train_data = Subset(cifar_train, indx_train)
    val_data = Subset(cifar_val, indx_val)

    print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)

    model = Net2()
    model.to(device)

    # Learning rate scheduling
    optimizer = torch.optim.Adam(model.parameters(), lr=0.1, weight_decay=0.001)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[40, 70, 90], gamma=0.1)


    for epoch in range(100):
        train(model, device, train_loader, optimizer, epoch, display=epoch % 5 == 0)
        scheduler.step()


    accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over %d instances: %.2f +- %.2f'%(num_models, accs.mean(), accs.std()))

Files already downloaded and verified
Files already downloaded and verified
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.2279, Accuracy: 265/400 (66.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6940, Accuracy: 201/400 (50.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.7832, Accuracy: 214/400 (53.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6934, Accuracy: 199/400 (49.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.9502, Accuracy: 212/400 (53.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 35.6296, Accuracy: 244/400 (61.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.7584, Accuracy: 203/400 (50.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6808, Accuracy: 240/400 (60.00%)

Num Samples For Tra

###Model 3 with Mish activation(4 Convolutional layers and 2 fully connected layers)

In [None]:
class Net2Mish(nn.Module):
    def __init__(self):
        super(Net2Mish, self).__init__()

        # Define the layers of the network
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, padding=2)
        self.bn1 = nn.BatchNorm2d(16)
        self.mish=nn.Mish()

        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(32)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(64)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv4 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(128)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.fc1 = nn.Linear(in_features=2048, out_features=512)
        self.dropout1 = nn.Dropout(p=0.5)

        self.fc2 = nn.Linear(in_features=512, out_features=10)

    def forward(self, x):
        # Pass input through the layers of the network
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.mish(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = self.mish(x)
        x = self.pool1(x)

        x = self.conv3(x)
        x = self.bn3(x)
        x = self.mish(x)
        x = self.pool2(x)

        x = self.conv4(x)
        x = self.bn4(x)
        x = self.mish(x)
        x = self.pool3(x)

        x = x.view(x.size(0), -1)

        x = self.fc1(x)
        x = self.dropout1(x)
        x = self.mish(x)

        x = self.fc2(x)

        return x


####Training and Testing on Model 3 with Mish activation funcition and ADADELTA optimizer


In [None]:
from numpy.random import RandomState
import numpy as np
import torch.optim as optim
from torch.utils.data import Subset
from torchvision import datasets, transforms

# Data augmentation
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# CIFAR-10 data
cifar_train = datasets.CIFAR10(root='.', train=True, transform=train_transform, download=True)
cifar_val = datasets.CIFAR10(root='.', train=True, transform=val_transform, download=True)

# Ensemble learning
num_models = 25
accs = []

for seed in range(num_models):
    prng = RandomState(seed)
    random_permute = prng.permutation(np.arange(0, 1000))
    classes = prng.permutation(np.arange(0, 10))
    indx_train = np.concatenate([np.where(np.array(cifar_train.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
    indx_val = np.concatenate([np.where(np.array(cifar_val.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

    train_data = Subset(cifar_train, indx_train)
    val_data = Subset(cifar_val, indx_val)

    print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)

    model = Net2Mish()
    model.to(device)

    # Learning rate scheduling
    optimizer = torch.optim.Adadelta(model.parameters(), lr=0.1, weight_decay=0.001)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[40, 70, 90], gamma=0.1)


    for epoch in range(100):
        train(model, device, train_loader, optimizer, epoch, display=epoch % 5 == 0)
        scheduler.step()


    accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over %d instances: %.2f +- %.2f'%(num_models, accs.mean(), accs.std()))

Files already downloaded and verified
Files already downloaded and verified
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.4130, Accuracy: 335/400 (83.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.5958, Accuracy: 282/400 (70.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.5197, Accuracy: 308/400 (77.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.5578, Accuracy: 326/400 (81.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.7329, Accuracy: 295/400 (73.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6859, Accuracy: 277/400 (69.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.3457, Accuracy: 339/400 (84.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.3634, Accuracy: 342/400 (85.50%)

Num Samples For Trai

####Training and Testing on Model 3 with Mish activation funcition and ADAM optimizer


In [None]:
from numpy.random import RandomState
import numpy as np
import torch.optim as optim
from torch.utils.data import Subset
from torchvision import datasets, transforms

# Data augmentation
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# CIFAR-10 data
cifar_train = datasets.CIFAR10(root='.', train=True, transform=train_transform, download=True)
cifar_val = datasets.CIFAR10(root='.', train=True, transform=val_transform, download=True)

# Ensemble learning
num_models = 25
accs = []

for seed in range(num_models):
    prng = RandomState(seed)
    random_permute = prng.permutation(np.arange(0, 1000))
    classes = prng.permutation(np.arange(0, 10))
    indx_train = np.concatenate([np.where(np.array(cifar_train.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
    indx_val = np.concatenate([np.where(np.array(cifar_val.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

    train_data = Subset(cifar_train, indx_train)
    val_data = Subset(cifar_val, indx_val)

    print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)

    model = Net2Mish()
    model.to(device)

    # Learning rate scheduling
    optimizer = torch.optim.Adam(model.parameters(), lr=0.1, weight_decay=0.001)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[40, 70, 90], gamma=0.1)


    for epoch in range(100):
        train(model, device, train_loader, optimizer, epoch, display=epoch % 5 == 0)
        scheduler.step()


    accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over %d instances: %.2f +- %.2f'%(num_models, accs.mean(), accs.std()))

Files already downloaded and verified
Files already downloaded and verified
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.4539, Accuracy: 312/400 (78.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.6917, Accuracy: 264/400 (66.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 4.0553, Accuracy: 227/400 (56.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 3.4943, Accuracy: 210/400 (52.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 5.4746, Accuracy: 261/400 (65.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 67.2904, Accuracy: 228/400 (57.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 13.8919, Accuracy: 270/400 (67.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 5.4781, Accuracy: 302/400 (75.50%)

Num Samples For Tr

###Model 3 with Silu activation(4 Convolutional layers and 2 fully connected layers)

In [None]:
class Net2silu(nn.Module):
    def __init__(self):
        super(Net2silu, self).__init__()

        # Define the layers of the network
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, padding=2)
        self.bn1 = nn.BatchNorm2d(16)
        self.Silu=nn.SiLU()

        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(32)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(64)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv4 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(128)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.fc1 = nn.Linear(in_features=2048, out_features=512)
        self.dropout1 = nn.Dropout(p=0.5)

        self.fc2 = nn.Linear(in_features=512, out_features=10)

    def forward(self, x):
        # Pass input through the layers of the network
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.Silu(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = self.Silu(x)
        x = self.pool1(x)

        x = self.conv3(x)
        x = self.bn3(x)
        x = self.Silu(x)
        x = self.pool2(x)

        x = self.conv4(x)
        x = self.bn4(x)
        x = self.Silu(x)
        x = self.pool3(x)

        x = x.view(x.size(0), -1)

        x = self.fc1(x)
        x = self.dropout1(x)
        x = self.Silu(x)

        x = self.fc2(x)

        return x


####Training and Testing on Model 3 with SiLU activation and ADADELTA Optimizer

In [None]:
from numpy.random import RandomState
import numpy as np
import torch.optim as optim
from torch.utils.data import Subset
from torchvision import datasets, transforms

# Data augmentation
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# CIFAR-10 data
cifar_train = datasets.CIFAR10(root='.', train=True, transform=train_transform, download=True)
cifar_val = datasets.CIFAR10(root='.', train=True, transform=val_transform, download=True)

# Ensemble learning
num_models = 25
accs = []

for seed in range(num_models):
    prng = RandomState(seed)
    random_permute = prng.permutation(np.arange(0, 1000))
    classes = prng.permutation(np.arange(0, 10))
    indx_train = np.concatenate([np.where(np.array(cifar_train.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
    indx_val = np.concatenate([np.where(np.array(cifar_val.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

    train_data = Subset(cifar_train, indx_train)
    val_data = Subset(cifar_val, indx_val)

    print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)

    model = Net2silu()
    model.to(device)

    # Learning rate scheduling
    optimizer = torch.optim.Adadelta(model.parameters(), lr=0.1, weight_decay=0.001)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[40, 70, 90], gamma=0.1)


    for epoch in range(100):
        train(model, device, train_loader, optimizer, epoch, display=epoch % 5 == 0)
        scheduler.step()


    accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over %d instances: %.2f +- %.2f'%(num_models, accs.mean(), accs.std()))

Files already downloaded and verified
Files already downloaded and verified
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.3863, Accuracy: 334/400 (83.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6250, Accuracy: 281/400 (70.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.5652, Accuracy: 310/400 (77.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.3941, Accuracy: 338/400 (84.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6686, Accuracy: 297/400 (74.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.8231, Accuracy: 264/400 (66.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.2839, Accuracy: 352/400 (88.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.3351, Accuracy: 357/400 (89.25%)

Num Samples For Trai

####Training and Testing on Model 3 with SiLU activation and ADAM optimizer

In [None]:
from numpy.random import RandomState
import numpy as np
import torch.optim as optim
from torch.utils.data import Subset
from torchvision import datasets, transforms

# Data augmentation
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# CIFAR-10 data
cifar_train = datasets.CIFAR10(root='.', train=True, transform=train_transform, download=True)
cifar_val = datasets.CIFAR10(root='.', train=True, transform=val_transform, download=True)

# Ensemble learning
num_models = 25
accs = []

for seed in range(num_models):
    prng = RandomState(seed)
    random_permute = prng.permutation(np.arange(0, 1000))
    classes = prng.permutation(np.arange(0, 10))
    indx_train = np.concatenate([np.where(np.array(cifar_train.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
    indx_val = np.concatenate([np.where(np.array(cifar_val.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

    train_data = Subset(cifar_train, indx_train)
    val_data = Subset(cifar_val, indx_val)

    print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)

    model = Net2silu()
    model.to(device)

    # Learning rate scheduling
    optimizer = torch.optim.Adam(model.parameters(), lr=0.1, weight_decay=0.001)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[40, 70, 90], gamma=0.1)


    for epoch in range(100):
        train(model, device, train_loader, optimizer, epoch, display=epoch % 5 == 0)
        scheduler.step()


    accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over %d instances: %.2f +- %.2f'%(num_models, accs.mean(), accs.std()))

Files already downloaded and verified
Files already downloaded and verified
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 28.7559, Accuracy: 262/400 (65.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.3476, Accuracy: 248/400 (62.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 3.2422, Accuracy: 247/400 (61.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.7641, Accuracy: 246/400 (61.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 9.7574, Accuracy: 246/400 (61.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 37.6347, Accuracy: 200/400 (50.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.3624, Accuracy: 216/400 (54.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 2.8884, Accuracy: 306/400 (76.50%)

Num Samples For Tr

#MODEL 4

###Model 4 with Relu activation (6 Convolutional layers and 4 fully connected layers)

In [None]:
import torch.nn as nn

class Net4(nn.Module):
    def __init__(self):
        super(Net4, self).__init__()
        
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
        self.conv5 = nn.Conv2d(512, 1024, kernel_size=3, padding=1)
        self.conv6 = nn.Conv2d(1024, 1024, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(1024 * 4 * 4, 4096)
        self.fc2 = nn.Linear(4096, 4096)
        self.fc3 = nn.Linear(4096, 2048)
        self.fc4 = nn.Linear(2048, 10)

        self.relu = nn.ReLU(inplace=True)
        self.dropout = nn.Dropout(p=0.5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = self.pool(x)
        x = self.relu(self.conv3(x))
        x = self.relu(self.conv4(x))
        x = self.pool(x)
        x = self.relu(self.conv5(x))
        x = self.relu(self.conv6(x))
        x = self.pool(x)
        x = x.view(-1, 1024 * 4 * 4)
        x = self.dropout(self.relu(self.fc1(x)))
        x = self.dropout(self.relu(self.fc2(x)))
        x = self.dropout(self.relu(self.fc3(x)))
        x = self.fc4(x)
        return x


####Training and Testing on Model 4 with Relu activation and ADADELTA optimizer

In [None]:
from numpy.random import RandomState
import numpy as np
import torch.optim as optim
from torch.utils.data import Subset
from torchvision import datasets, transforms

# Data augmentation
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# CIFAR-10 data
cifar_train = datasets.CIFAR10(root='.', train=True, transform=train_transform, download=True)
cifar_val = datasets.CIFAR10(root='.', train=True, transform=val_transform, download=True)

# Ensemble learning
num_models = 25
accs = []

for seed in range(num_models):
    prng = RandomState(seed)
    random_permute = prng.permutation(np.arange(0, 1000))
    classes = prng.permutation(np.arange(0, 10))
    indx_train = np.concatenate([np.where(np.array(cifar_train.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
    indx_val = np.concatenate([np.where(np.array(cifar_val.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

    train_data = Subset(cifar_train, indx_train)
    val_data = Subset(cifar_val, indx_val)

    print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)

    model = Net4()
    model.to(device)

    # Learning rate scheduling
    optimizer = torch.optim.Adadelta(model.parameters(), lr=0.1, weight_decay=0.0005)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[40, 70, 90], gamma=0.1)

    for epoch in range(100):
        train(model, device, train_loader, optimizer, epoch, display=epoch % 5 == 0)
        scheduler.step()

    accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over %d instances: %.2f +- %.2f'%(num_models, accs.mean(), accs.std()))

Files already downloaded and verified
Files already downloaded and verified
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6973, Accuracy: 221/400 (55.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6989, Accuracy: 208/400 (52.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.7027, Accuracy: 200/400 (50.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6975, Accuracy: 201/400 (50.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.7002, Accuracy: 200/400 (50.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6987, Accuracy: 200/400 (50.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6943, Accuracy: 210/400 (52.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6907, Accuracy: 200/400 (50.00%)

Num Samples For Trai

####Training and Testing on Model 4 with Relu activation function and ADAM optimizer

In [None]:
from numpy.random import RandomState
import numpy as np
import torch.optim as optim
from torch.utils.data import Subset
from torchvision import datasets, transforms

# Data augmentation
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# CIFAR-10 data
cifar_train = datasets.CIFAR10(root='.', train=True, transform=train_transform, download=True)
cifar_val = datasets.CIFAR10(root='.', train=True, transform=val_transform, download=True)

# Ensemble learning
num_models = 25
accs = []

for seed in range(num_models):
    prng = RandomState(seed)
    random_permute = prng.permutation(np.arange(0, 1000))
    classes = prng.permutation(np.arange(0, 10))
    indx_train = np.concatenate([np.where(np.array(cifar_train.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
    indx_val = np.concatenate([np.where(np.array(cifar_val.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

    train_data = Subset(cifar_train, indx_train)
    val_data = Subset(cifar_val, indx_val)

    print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)

    model = Net4()
    model.to(device)

    # Learning rate scheduling
    optimizer = torch.optim.Adam(model.parameters(), lr=0.1, weight_decay=0.001)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[40, 70, 90], gamma=0.1)

    for epoch in range(100):
        train(model, device, train_loader, optimizer, epoch, display=epoch % 5 == 0)
        scheduler.step()

    accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over %d instances: %.2f +- %.2f'%(num_models, accs.mean(), accs.std()))

Files already downloaded and verified
Files already downloaded and verified
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1605318.8800, Accuracy: 200/400 (50.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 333917.0707, Accuracy: 199/400 (49.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 598912.6600, Accuracy: 200/400 (50.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 745.3771, Accuracy: 200/400 (50.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 716.9166, Accuracy: 200/400 (50.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 10028698.9600, Accuracy: 200/400 (50.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 986936.0200, Accuracy: 200/400 (50.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 11374.8113, Accuracy: 20

###Model 4 with Mish Activation Function(6 convolutional layers and 4 fully connected layers)

In [None]:
import torch.nn as nn

class Net4Mish(nn.Module):
    def __init__(self):
        super(Net4Mish, self).__init__()
        
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
        self.conv5 = nn.Conv2d(512, 1024, kernel_size=3, padding=1)
        self.conv6 = nn.Conv2d(1024, 1024, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(1024 * 4 * 4, 4096)
        self.fc2 = nn.Linear(4096, 4096)
        self.fc3 = nn.Linear(4096, 2048)
        self.fc4 = nn.Linear(2048, 10)

        self.mish = nn.Mish()
        self.dropout = nn.Dropout(p=0.5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

    def forward(self, x):
        x = self.mish(self.conv1(x))
        x = self.mish(self.conv2(x))
        x = self.pool(x)
        x = self.mish(self.conv3(x))
        x = self.mish(self.conv4(x))
        x = self.pool(x)
        x = self.mish(self.conv5(x))
        x = self.mish(self.conv6(x))
        x = self.pool(x)
        x = x.view(-1, 1024 * 4 * 4)
        x = self.dropout(self.mish(self.fc1(x)))
        x = self.dropout(self.mish(self.fc2(x)))
        x = self.dropout(self.mish(self.fc3(x)))
        x = self.fc4(x)
        return x


####Training and Testing on Model 4 with Mish activation and ADADELTA optimizer

In [None]:
from numpy.random import RandomState
import numpy as np
import torch.optim as optim
from torch.utils.data import Subset
from torchvision import datasets, transforms

# Data augmentation
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# CIFAR-10 data
cifar_train = datasets.CIFAR10(root='.', train=True, transform=train_transform, download=True)
cifar_val = datasets.CIFAR10(root='.', train=True, transform=val_transform, download=True)

# Ensemble learning
num_models = 25
accs = []

for seed in range(num_models):
    prng = RandomState(seed)
    random_permute = prng.permutation(np.arange(0, 1000))
    classes = prng.permutation(np.arange(0, 10))
    indx_train = np.concatenate([np.where(np.array(cifar_train.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
    indx_val = np.concatenate([np.where(np.array(cifar_val.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

    train_data = Subset(cifar_train, indx_train)
    val_data = Subset(cifar_val, indx_val)

    print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)

    model = Net4Mish()
    model.to(device)

    # Learning rate scheduling
    optimizer = torch.optim.Adadelta(model.parameters(), lr=0.1, weight_decay=0.0005)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[40, 70, 90], gamma=0.1)

    for epoch in range(100):
        train(model, device, train_loader, optimizer, epoch, display=epoch % 5 == 0)
        scheduler.step()

    accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over %d instances: %.2f +- %.2f'%(num_models, accs.mean(), accs.std()))

Files already downloaded and verified
Files already downloaded and verified
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.7469, Accuracy: 200/400 (50.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.7386, Accuracy: 200/400 (50.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.7327, Accuracy: 200/400 (50.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.7402, Accuracy: 200/400 (50.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.7405, Accuracy: 200/400 (50.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.7534, Accuracy: 200/400 (50.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.7486, Accuracy: 200/400 (50.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.7488, Accuracy: 200/400 (50.00%)

Num Samples For Trai

####Training and Testing on Model 4 with Mish Activation and ADAM optimizer

In [None]:
from numpy.random import RandomState
import numpy as np
import torch.optim as optim
from torch.utils.data import Subset
from torchvision import datasets, transforms

# Data augmentation
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# CIFAR-10 data
cifar_train = datasets.CIFAR10(root='.', train=True, transform=train_transform, download=True)
cifar_val = datasets.CIFAR10(root='.', train=True, transform=val_transform, download=True)

# Ensemble learning
num_models = 25
accs = []

for seed in range(num_models):
    prng = RandomState(seed)
    random_permute = prng.permutation(np.arange(0, 1000))
    classes = prng.permutation(np.arange(0, 10))
    indx_train = np.concatenate([np.where(np.array(cifar_train.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
    indx_val = np.concatenate([np.where(np.array(cifar_val.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

    train_data = Subset(cifar_train, indx_train)
    val_data = Subset(cifar_val, indx_val)

    print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)

    model = Net4Mish()
    model.to(device)

    # Learning rate scheduling
    optimizer = torch.optim.Adam(model.parameters(), lr=0.1, weight_decay=0.0005)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[40, 70, 90], gamma=0.1)

    for epoch in range(100):
        train(model, device, train_loader, optimizer, epoch, display=epoch % 5 == 0)
        scheduler.step()

    accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over %d instances: %.2f +- %.2f'%(num_models, accs.mean(), accs.std()))

Files already downloaded and verified
Files already downloaded and verified
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.7030, Accuracy: 200/400 (50.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 3.3157, Accuracy: 200/400 (50.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 334201651.2000, Accuracy: 195/400 (48.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.7048, Accuracy: 200/400 (50.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 258805539471.3600, Accuracy: 251/400 (62.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 112945346851.8400, Accuracy: 215/400 (53.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1516212.1200, Accuracy: 200/400 (50.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.7834, Accuracy: 20

#MODEL 5

###Model 5 with Relu activation and batch normalization (6 Convolutional layers and 4 fully connected layers)

In [2]:
import torch.nn as nn

class Net5(nn.Module):
    def __init__(self):
        super(Net5, self).__init__()

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(64)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(128)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(256)
        self.conv4 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(512)
        self.conv5 = nn.Conv2d(512, 1024, kernel_size=3, padding=1)
        self.bn5 = nn.BatchNorm2d(1024)
        self.conv6 = nn.Conv2d(1024, 1024, kernel_size=3, padding=1)
        self.bn6 = nn.BatchNorm2d(1024)
        self.fc1 = nn.Linear(1024 * 4 * 4, 4096)
        self.bn_fc1 = nn.BatchNorm1d(4096)
        self.fc2 = nn.Linear(4096, 4096)
        self.bn_fc2 = nn.BatchNorm1d(4096)
        self.fc3 = nn.Linear(4096, 2048)
        self.bn_fc3 = nn.BatchNorm1d(2048)
        self.fc4 = nn.Linear(2048, 10)

        self.relu = nn.ReLU(inplace=True)
        self.dropout = nn.Dropout(p=0.5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.relu(self.bn2(self.conv2(x)))
        x = self.pool(x)
        x = self.relu(self.bn3(self.conv3(x)))
        x = self.relu(self.bn4(self.conv4(x)))
        x = self.pool(x)
        x = self.relu(self.bn5(self.conv5(x)))
        x = self.relu(self.bn6(self.conv6(x)))
        x = self.pool(x)
        x = x.view(-1, 1024 * 4 * 4)
        x = self.dropout(self.relu(self.bn_fc1(self.fc1(x))))
        x = self.dropout(self.relu(self.bn_fc2(self.fc2(x))))
        x = self.dropout(self.relu(self.bn_fc3(self.fc3(x))))
        x = self.fc4(x)
        return x


####Training and Testing on Model 5 with Relu activation function ADADELTA optimizer

In [None]:
from numpy.random import RandomState
import numpy as np
import torch.optim as optim
from torch.utils.data import Subset
from torchvision import datasets, transforms

# Data augmentation
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# CIFAR-10 data
cifar_train = datasets.CIFAR10(root='.', train=True, transform=train_transform, download=True)
cifar_val = datasets.CIFAR10(root='.', train=True, transform=val_transform, download=True)

# Ensemble learning
num_models = 25
accs = []

for seed in range(num_models):
    prng = RandomState(seed)
    random_permute = prng.permutation(np.arange(0, 1000))
    classes = prng.permutation(np.arange(0, 10))
    indx_train = np.concatenate([np.where(np.array(cifar_train.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
    indx_val = np.concatenate([np.where(np.array(cifar_val.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

    train_data = Subset(cifar_train, indx_train)
    val_data = Subset(cifar_val, indx_val)

    print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)

    model = Net5()
    model.to(device)

    # Learning rate scheduling
    optimizer = torch.optim.Adadelta(model.parameters(), lr=0.1, weight_decay=0.001)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[40, 70, 90], gamma=0.1)

    for epoch in range(100):
        train(model, device, train_loader, optimizer, epoch, display=epoch % 5 == 0)
        scheduler.step()

    accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over %d instances: %.2f +- %.2f'%(num_models, accs.mean(), accs.std()))

Files already downloaded and verified
Files already downloaded and verified
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6061, Accuracy: 344/400 (86.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.3362, Accuracy: 260/400 (65.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.2076, Accuracy: 293/400 (73.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.7661, Accuracy: 336/400 (84.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.1792, Accuracy: 296/400 (74.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.2936, Accuracy: 286/400 (71.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.4473, Accuracy: 348/400 (87.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.5026, Accuracy: 350/400 (87.50%)

Num Samples For Trai

####Training and Testing on Model 5 with relu activation and ADAM optimizer

In [4]:
from numpy.random import RandomState
import numpy as np
import torch.optim as optim
from torch.utils.data import Subset
from torchvision import datasets, transforms
import torch

# Data augmentation
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# CIFAR-10 data
cifar_train = datasets.CIFAR10(root='.', train=True, transform=train_transform, download=True)
cifar_val = datasets.CIFAR10(root='.', train=True, transform=val_transform, download=True)

# Ensemble learning
num_models = 25
accs = []

for seed in range(num_models):
    prng = RandomState(seed)
    random_permute = prng.permutation(np.arange(0, 1000))
    classes = prng.permutation(np.arange(0, 10))
    indx_train = np.concatenate([np.where(np.array(cifar_train.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
    indx_val = np.concatenate([np.where(np.array(cifar_val.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

    train_data = Subset(cifar_train, indx_train)
    val_data = Subset(cifar_val, indx_val)

    print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)

    model = Net5()
    model.to(device)

    # Learning rate scheduling
    optimizer = torch.optim.Adam(model.parameters(), lr=0.1, weight_decay=0.001)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[40, 70, 90], gamma=0.1)

    for epoch in range(100):
        train(model, device, train_loader, optimizer, epoch, display=epoch % 5 == 0)
        scheduler.step()

    accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over %d instances: %.2f +- %.2f'%(num_models, accs.mean(), accs.std()))

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:02<00:00, 76390697.25it/s] 


Extracting ./cifar-10-python.tar.gz to .
Files already downloaded and verified
Num Samples For Training 50 Num Samples For Val 400





Test set: Average loss: 1.2871, Accuracy: 311/400 (77.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.8071, Accuracy: 221/400 (55.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 4.9114, Accuracy: 241/400 (60.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.0987, Accuracy: 297/400 (74.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.7489, Accuracy: 251/400 (62.75%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 5.9624, Accuracy: 244/400 (61.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 1.5565, Accuracy: 306/400 (76.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6669, Accuracy: 333/400 (83.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 3.0127, Accuracy: 308/400 (77.00%)

Num Samples For Training 50 Num Sam

###Model 5 with Mish activation (6 convolutional layers and 4 fully connected layers with batch Normalization)

In [None]:
import torch.nn as nn

class Net5Mish(nn.Module):
    def __init__(self):
        super(Net5Mish, self).__init__()

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(64)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(128)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(256)
        self.conv4 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(512)
        self.conv5 = nn.Conv2d(512, 1024, kernel_size=3, padding=1)
        self.bn5 = nn.BatchNorm2d(1024)
        self.conv6 = nn.Conv2d(1024, 1024, kernel_size=3, padding=1)
        self.bn6 = nn.BatchNorm2d(1024)
        self.fc1 = nn.Linear(1024 * 4 * 4, 4096)
        self.bn_fc1 = nn.BatchNorm1d(4096)
        self.fc2 = nn.Linear(4096, 4096)
        self.bn_fc2 = nn.BatchNorm1d(4096)
        self.fc3 = nn.Linear(4096, 2048)
        self.bn_fc3 = nn.BatchNorm1d(2048)
        self.fc4 = nn.Linear(2048, 10)

        self.mish = nn.Mish()
        self.dropout = nn.Dropout(p=0.5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

    def forward(self, x):
        x = self.mish(self.bn1(self.conv1(x)))
        x = self.mish(self.bn2(self.conv2(x)))
        x = self.pool(x)
        x = self.mish(self.bn3(self.conv3(x)))
        x = self.mish(self.bn4(self.conv4(x)))
        x = self.pool(x)
        x = self.mish(self.bn5(self.conv5(x)))
        x = self.mish(self.bn6(self.conv6(x)))
        x = self.pool(x)
        x = x.view(-1, 1024 * 4 * 4)
        x = self.dropout(self.mish(self.bn_fc1(self.fc1(x))))
        x = self.dropout(self.mish(self.bn_fc2(self.fc2(x))))
        x = self.dropout(self.mish(self.bn_fc3(self.fc3(x))))
        x = self.fc4(x)
        return x


####Training and Testing on Model 5 with Mish activation function and ADADELTA optimizer

In [None]:
from numpy.random import RandomState
import numpy as np
import torch.optim as optim
from torch.utils.data import Subset
from torchvision import datasets, transforms

# Data augmentation
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# CIFAR-10 data
cifar_train = datasets.CIFAR10(root='.', train=True, transform=train_transform, download=True)
cifar_val = datasets.CIFAR10(root='.', train=True, transform=val_transform, download=True)

# Ensemble learning
num_models = 25
accs = []

for seed in range(num_models):
    prng = RandomState(seed)
    random_permute = prng.permutation(np.arange(0, 1000))
    classes = prng.permutation(np.arange(0, 10))
    indx_train = np.concatenate([np.where(np.array(cifar_train.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
    indx_val = np.concatenate([np.where(np.array(cifar_val.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

    train_data = Subset(cifar_train, indx_train)
    val_data = Subset(cifar_val, indx_val)

    print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)

    model = Net5Mish()
    model.to(device)

    # Learning rate scheduling
    optimizer = torch.optim.Adadelta(model.parameters(), lr=0.1, weight_decay=0.001)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[40, 70, 90], gamma=0.1)

    for epoch in range(100):
        train(model, device, train_loader, optimizer, epoch, display=epoch % 5 == 0)
        scheduler.step()

    accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over %d instances: %.2f +- %.2f'%(num_models, accs.mean(), accs.std()))

####Training and Testing on Model 5 with Mish activation and ADAM optimizer

In [None]:
from numpy.random import RandomState
import numpy as np
import torch.optim as optim
from torch.utils.data import Subset
from torchvision import datasets, transforms

# Data augmentation
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# CIFAR-10 data
cifar_train = datasets.CIFAR10(root='.', train=True, transform=train_transform, download=True)
cifar_val = datasets.CIFAR10(root='.', train=True, transform=val_transform, download=True)

# Ensemble learning
num_models = 25
accs = []

for seed in range(num_models):
    prng = RandomState(seed)
    random_permute = prng.permutation(np.arange(0, 1000))
    classes = prng.permutation(np.arange(0, 10))
    indx_train = np.concatenate([np.where(np.array(cifar_train.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
    indx_val = np.concatenate([np.where(np.array(cifar_val.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

    train_data = Subset(cifar_train, indx_train)
    val_data = Subset(cifar_val, indx_val)

    print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)

    model = Net5Mish()
    model.to(device)

    # Learning rate scheduling
    optimizer = torch.optim.Adam(model.parameters(), lr=0.1, weight_decay=0.001)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[40, 70, 90], gamma=0.1)

    for epoch in range(100):
        train(model, device, train_loader, optimizer, epoch, display=epoch % 5 == 0)
        scheduler.step()

    accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over %d instances: %.2f +- %.2f'%(num_models, accs.mean(), accs.std()))

###Model 5 on SiLU activation function

In [None]:
import torch.nn as nn

class Net5Silu(nn.Module):
    def __init__(self):
        super(Net5Silu, self).__init__()

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(64)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(128)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(256)
        self.conv4 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(512)
        self.conv5 = nn.Conv2d(512, 1024, kernel_size=3, padding=1)
        self.bn5 = nn.BatchNorm2d(1024)
        self.conv6 = nn.Conv2d(1024, 1024, kernel_size=3, padding=1)
        self.bn6 = nn.BatchNorm2d(1024)
        self.fc1 = nn.Linear(1024 * 4 * 4, 4096)
        self.bn_fc1 = nn.BatchNorm1d(4096)
        self.fc2 = nn.Linear(4096, 4096)
        self.bn_fc2 = nn.BatchNorm1d(4096)
        self.fc3 = nn.Linear(4096, 2048)
        self.bn_fc3 = nn.BatchNorm1d(2048)
        self.fc4 = nn.Linear(2048, 10)

        self.silu = nn.SiLU()
        self.dropout = nn.Dropout(p=0.5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

    def forward(self, x):
        x = self.silu(self.bn1(self.conv1(x)))
        x = self.silu(self.bn2(self.conv2(x)))
        x = self.pool(x)
        x = self.silu(self.bn3(self.conv3(x)))
        x = self.silu(self.bn4(self.conv4(x)))
        x = self.pool(x)
        x = self.silu(self.bn5(self.conv5(x)))
        x = self.silu(self.bn6(self.conv6(x)))
        x = self.pool(x)
        x = x.view(-1, 1024 * 4 * 4)
        x = self.dropout(self.silu(self.bn_fc1(self.fc1(x))))
        x = self.dropout(self.silu(self.bn_fc2(self.fc2(x))))
        x = self.dropout(self.silu(self.bn_fc3(self.fc3(x))))
        x = self.fc4(x)
        return x


####Training on Model 5 with SiLU activation function and ADADELTA optimizer

In [None]:
from numpy.random import RandomState
import numpy as np
import torch.optim as optim
from torch.utils.data import Subset
from torchvision import datasets, transforms

# Data augmentation
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# CIFAR-10 data
cifar_train = datasets.CIFAR10(root='.', train=True, transform=train_transform, download=True)
cifar_val = datasets.CIFAR10(root='.', train=True, transform=val_transform, download=True)

# Ensemble learning
num_models = 25
accs = []

for seed in range(num_models):
    prng = RandomState(seed)
    random_permute = prng.permutation(np.arange(0, 1000))
    classes = prng.permutation(np.arange(0, 10))
    indx_train = np.concatenate([np.where(np.array(cifar_train.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
    indx_val = np.concatenate([np.where(np.array(cifar_val.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

    train_data = Subset(cifar_train, indx_train)
    val_data = Subset(cifar_val, indx_val)

    print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)

    model = Net5Silu()
    model.to(device)

    # Learning rate scheduling
    optimizer = torch.optim.Adadelta(model.parameters(), lr=0.1, weight_decay=0.001)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[40, 70, 90], gamma=0.1)

    for epoch in range(100):
        train(model, device, train_loader, optimizer, epoch, display=epoch % 5 == 0)
        scheduler.step()

    accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over %d instances: %.2f +- %.2f'%(num_models, accs.mean(), accs.std()))

####Training and testing on model 5 with SiLU activation function and ADAM optimizer

In [None]:
from numpy.random import RandomState
import numpy as np
import torch.optim as optim
from torch.utils.data import Subset
from torchvision import datasets, transforms

# Data augmentation
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# CIFAR-10 data
cifar_train = datasets.CIFAR10(root='.', train=True, transform=train_transform, download=True)
cifar_val = datasets.CIFAR10(root='.', train=True, transform=val_transform, download=True)

# Ensemble learning
num_models = 25
accs = []

for seed in range(num_models):
    prng = RandomState(seed)
    random_permute = prng.permutation(np.arange(0, 1000))
    classes = prng.permutation(np.arange(0, 10))
    indx_train = np.concatenate([np.where(np.array(cifar_train.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
    indx_val = np.concatenate([np.where(np.array(cifar_val.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

    train_data = Subset(cifar_train, indx_train)
    val_data = Subset(cifar_val, indx_val)

    print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)

    model = Net5Silu()
    model.to(device)

    # Learning rate scheduling
    optimizer = torch.optim.Adam(model.parameters(), lr=0.1, weight_decay=0.001)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[40, 70, 90], gamma=0.1)

    for epoch in range(100):
        train(model, device, train_loader, optimizer, epoch, display=epoch % 5 == 0)
        scheduler.step()

    accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over %d instances: %.2f +- %.2f'%(num_models, accs.mean(), accs.std()))