# SETUP

In [1]:
import numpy as np
import torch
import torch.nn as nn
from torchvision import datasets
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler

In [2]:
#Hyperparams
num_classes = 10
num_epochs = 5
batch_size = 72
learning_rate = 0.001

In [3]:
# Get gpu, mps or cpu device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")


Using cuda device


In [4]:
def get_train_valid_loader(data_dir,
                           batch_size,
                           augment,
                           random_seed,
                           valid_size=0.1,
                           shuffle=True):
    normalize = transforms.Normalize(
        mean=[0.4914, 0.4822, 0.4465],
        std=[0.2023, 0.1994, 0.2010],
    )

    # define transforms
    valid_transform = transforms.Compose([
            transforms.Resize((128,128)),
            transforms.ToTensor(),
            normalize,
    ])
    if augment:
        train_transform = transforms.Compose([
            transforms.Resize((128,128)),
            transforms.RandomCrop(120, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
    else:
        train_transform = transforms.Compose([
            transforms.Resize((227,227)),
            transforms.ToTensor(),
            normalize,
        ])

    # load the dataset
    train_dataset = datasets.CIFAR10(
        root=data_dir, train=True,
        download=True, transform=train_transform,
    )

    valid_dataset = datasets.CIFAR10(
        root=data_dir, train=True,
        download=True, transform=valid_transform,
    )

    num_train = len(train_dataset)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))

    if shuffle:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, sampler=train_sampler)

    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=batch_size, sampler=valid_sampler)

    return (train_loader, valid_loader)


def get_test_loader(data_dir,
                    batch_size,
                    shuffle=True):
    normalize = transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    )

    # define transform
    transform = transforms.Compose([
        transforms.Resize((128,128)),
        transforms.ToTensor(),
        normalize,
    ])

    dataset = datasets.CIFAR10(
        root=data_dir, train=False,
        download=True, transform=transform,
    )

    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=batch_size, shuffle=shuffle
    )

    return data_loader


# dataset
train_loader, valid_loader = get_train_valid_loader(data_dir = './data',                                      batch_size = 64,
                       augment = True,random_seed = 123)

test_loader = get_test_loader(data_dir = './data',
                              batch_size = 72)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:01<00:00, 98123110.84it/s] 


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Files already downloaded and verified


# Creating Model

In [5]:
from torch.nn import Sequential, Linear, MSELoss

In [6]:
class Alex(nn.Module):
    def __init__(self, num_classes=10):
        super(Alex, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),
            nn.BatchNorm2d(96),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(384),
            nn.ReLU())
        self.layer4 = nn.Sequential(
            nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(384),
            nn.ReLU())
        self.layer5 = nn.Sequential(
            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(1024, 4096),
            nn.ReLU())
        self.fc1 = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU())
        self.fc2= nn.Sequential(
            nn.Linear(4096, num_classes))

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

In [7]:
def train(num_classes, num_epochs, batch_size, lr, train_loader, valid_loader):
  model = Alex().to(device)
  criterion = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(model.parameters(), lr = lr)# train model

  total_step = len(train_loader)

  for epoch in range(num_epochs):
      for i, (images, labels) in enumerate(train_loader):
          # Move tensors to the configured device
          images = images.to(device)
          labels = labels.to(device)

          # Forward pass
          outputs = model(images)
          loss = criterion(outputs, labels)

          # Backward and optimize
          optimizer.zero_grad()
          loss.backward()
          optimizer.step()

      #print(epoch)
      print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                    .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

  # Validation
  with torch.no_grad():
      correct = 0
      total = 0
      for images, labels in valid_loader:
          images = images.to(device)
          labels = labels.to(device)
          outputs = model(images)
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          correct += (predicted == labels).sum().item()
          del images, labels, outputs

      print('Accuracy of the network on the {} validation images: {} %'.format(5000, 100 * correct / total))
      return 100 * correct / total


# PSO

In [8]:
def objective_function(params, test):
    lr, batch_size = params
    num_epochs = 5
    return -train(10, num_epochs, batch_size, lr, train_loader, test) # Negative because PSO minimizes

In [9]:
def particle_swarm_optimization(num_dimensions, num_particles, max_iter, test, i_min = -10, i_max = 10, w = 0.5, c1 = 0.25, c2 = 0.75, bounds=None):
    particles = [{'position': np.random.uniform(0, 1, size=num_dimensions),
                  'velocity': np.random.uniform(-1, 1, size=num_dimensions),
                  'pbest': float('inf'),
                  'pbest_position': None} for _ in range(num_particles)]

    # init global best
    gbest_value = float('inf')
    gbest_position = None

    for _ in range(max_iter):
        for particle in particles:
            position = particle['position']
            velocity = particle['velocity']

            # Calculate the current value
            current_value = objective_function(position, test)

            # Update personal best
            if current_value < particle['pbest']:
                particle['pbest'] = current_value
                particle['pbest_position'] = position.copy()

            # Update global best
            if current_value < gbest_value:
                gbest_value = current_value
                gbest_position = position.copy()

            # Update particle's velocity and position
            for i in range(num_dimensions):
                r1, r2 = np.random.uniform(), np.random.uniform()
                velocity[i] = w * velocity[i] + c1*r1 * (particle['pbest_position'][i] - position[i]) + c2*r2 * (gbest_position[i] - position[i])
                position[i] += velocity[i]

                # legalize the values to the provided bounds
                if bounds is not None:
                    position[i] = np.clip(position[i], bounds[i][0], bounds[i][1])

    return gbest_position, gbest_value


In [10]:
bounds = [(0.0001, 0.001), (32, 64)] #lr, batch_size

In [None]:
best_hyperparameters, best_accuracy = particle_swarm_optimization(num_dimensions=2,
                                                                   num_particles=3,
                                                                   max_iter=50,
                                                                   test = test_loader,
                                                                   bounds=bounds
                                                                   )

print(f"Optimal Learning Rate: {best_hyperparameters[0]}, Optimal Batch Size: {int(best_hyperparameters[1])}")
print(f"Best Accuracy: {best_accuracy:.2f}%")


Epoch [1/5], Step [704/704], Loss: 2.4085
Epoch [2/5], Step [704/704], Loss: 2.3377
Epoch [3/5], Step [704/704], Loss: 2.3039
Epoch [4/5], Step [704/704], Loss: 2.2441
Epoch [5/5], Step [704/704], Loss: 2.3700
Accuracy of the network on the 5000 validation images: 10.0 %
Epoch [1/5], Step [704/704], Loss: 2.3757
Epoch [2/5], Step [704/704], Loss: 2.3450
Epoch [3/5], Step [704/704], Loss: 2.3338
Epoch [4/5], Step [704/704], Loss: 2.6485
Epoch [5/5], Step [704/704], Loss: 2.5264
Accuracy of the network on the 5000 validation images: 10.0 %
Epoch [1/5], Step [704/704], Loss: 2.3940
Epoch [2/5], Step [704/704], Loss: 2.4440
Epoch [3/5], Step [704/704], Loss: 2.3890
Epoch [4/5], Step [704/704], Loss: 2.4197
Epoch [5/5], Step [704/704], Loss: 2.4149
Accuracy of the network on the 5000 validation images: 10.0 %
Epoch [1/5], Step [704/704], Loss: 1.4109
Epoch [2/5], Step [704/704], Loss: 1.1413
Epoch [3/5], Step [704/704], Loss: 1.1753
Epoch [4/5], Step [704/704], Loss: 1.4465
Epoch [5/5], Ste

In [None]:
def evaluate_accuracy(params):
  lr, batch_size = params

  model = model().to(device)
  criterion = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(model.parameters(), lr = lr)# train model

  total_step = len(train_loader)

  for epoch in range(num_epochs):
      for i, (images, labels) in enumerate(train_loader):
          # Move tensors to the configured device
          images = images.to(device)
          labels = labels.to(device)

          # Forward pass
          outputs = model(images)
          loss = criterion(outputs, labels)

          # Backward and optimize
          optimizer.zero_grad()
          loss.backward()
          optimizer.step()

      print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                    .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

      # Validation
      with torch.no_grad():
          correct = 0
          total = 0
          for images, labels in valid_loader:
              images = images.to(device)
              labels = labels.to(device)
              outputs = model(images)
              _, predicted = torch.max(outputs.data, 1)
              total += labels.size(0)
              correct += (predicted == labels).sum().item()
              del images, labels, outputs

          print('Accuracy of the network on the {} validation images: {} %'.format(5000, 100 * correct / total))
          return 100 * correct / total

