In [None]:
#tune own neural net for the work

In [1]:
import numpy as np
from math import gamma
import torch
import torch.nn as nn
from torchvision import datasets
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler
import matplotlib.pyplot as plt


In [2]:
# Get gpu, mps or cpu device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


In [3]:
def get_train_valid_loader(data_dir,
                           batch_size,
                           augment,
                           random_seed,
                           valid_size=0.1,
                           shuffle=True):
    normalize = transforms.Normalize(
        mean=[0.4914, 0.4822, 0.4465],
        std=[0.2023, 0.1994, 0.2010],
    )

    # define transforms
    valid_transform = transforms.Compose([
            transforms.Resize((227,227)),
            transforms.ToTensor(),
            normalize,
    ])
    if augment:
        train_transform = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
    else:
        train_transform = transforms.Compose([
            transforms.Resize((227,227)),
            transforms.ToTensor(),
            normalize,
        ])

    # load the dataset
    train_dataset = datasets.CIFAR10(
        root=data_dir, train=True,
        download=True, transform=train_transform,
    )

    valid_dataset = datasets.CIFAR10(
        root=data_dir, train=True,
        download=True, transform=valid_transform,
    )

    num_train = len(train_dataset)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))

    if shuffle:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, sampler=train_sampler)

    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=batch_size, sampler=valid_sampler)

    return (train_loader, valid_loader)


def get_test_loader(data_dir,
                    batch_size,
                    shuffle=True):
    normalize = transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    )

    # define transform
    transform = transforms.Compose([
        transforms.Resize((227,227)),
        transforms.ToTensor(),
        normalize,
    ])

    dataset = datasets.CIFAR10(
        root=data_dir, train=False,
        download=True, transform=transform,
    )

    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=batch_size, shuffle=shuffle
    )

    return data_loader


# CIFAR10 dataset
train_loader, valid_loader = get_train_valid_loader(data_dir = './data',                                      batch_size = 64,
                       augment = False,                             		     random_seed = 1)

test_loader = get_test_loader(data_dir = './data',
                              batch_size = 64)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 43637515.68it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Files already downloaded and verified


For this assignment you will use a gridsearch algorithm, such as the particle swarm or CSO to tune hyperparameters for a Pytorch neural network design, such as Alex Net, to create a data application for the CiFAR10  data set and yield good accuracy on the test set. For CiFAR10, good accuracy on the test set is over 84%.

Alternatively, if you prefer to work with EMNIST (https://www.nist.gov/itl/products-and-services/emnist-dataset) you should aim at an accuracy over 90%.

Reference for EMNIST: https://arxiv.org/pdf/1702.05373.pdf

## PSO



In [4]:
import math

In [5]:
def particle_swarm_optimization(num_dimensions, num_particles, max_iter, i_min=-10,i_max=10,bounds=None,w=0.5,c1=0.25,c2=0.75):
    # Initialize the particles
    # This creates a data structure such as a dictionary
    if bounds is None:
        particles = [({'position': [np.random.uniform(i_min, i_max) for _ in range(num_dimensions)],
                    'velocity': [np.random.uniform(-1, 1) for _ in range(num_dimensions)],
                    'pbest': float('inf'),
                    'pbest_position': None})
                    for _ in range(num_particles)]
    else:
        particles = [({'position': [np.random.uniform(bounds[i][0], bounds[i][1]) for i in range(num_dimensions)],
                    'velocity': [np.random.uniform(-1, 1) for _ in range(num_dimensions)],
                    'pbest': float('inf'),
                    'pbest_position': None})
                    for _ in range(num_particles)]

    # Initialize global best
    gbest_value = float('inf')
    gbest_position = None

    for particle in particles:
          position = particle['position']
          velocity = particle['velocity']

          convs = math.ceil(position[0])
          neurs = math.ceil(((position[1])//20)*20)
          wd = math.ceil(position[2])/10000

          # Calculate the current value
          num_classes = 10
          num_epochs = 10
          batch_size = 40

          model = MyAlexNet(convs,neurs).to(device)
            # Loss and optimizer
          criterion = nn.CrossEntropyLoss()
          optimizer = torch.optim.SGD(model.parameters(), lr=.001, weight_decay = wd, momentum = 0.9)
          current_value = stepper(num_epochs,train_loader,model,criterion,optimizer,len(train_loader))
            # Update personal best
          if current_value < particle['pbest']:
              particle['pbest'] = current_value
              particle['pbest_position'] = position.copy()

            # Update global best
          if current_value < gbest_value:
              gbest_value = current_value
              gbest_position = position.copy()
            # Update particle's velocity and position
          for i in range(num_dimensions):
              r1, r2 = np.random.uniform(), np.random.uniform()
              velocity[i] = w * velocity[i] + c1*r1 * (particle['pbest_position'][i] - position[i]) + c2*r2 * (gbest_position[i] - position[i])
              position[i] += velocity[i]
              # legalize the values to the provided bounds
              if bounds is not None:
                  position[i] = np.clip(position[i],bounds[i][0],bounds[i][1])

    return gbest_position, gbest_value

## AlexNet

In [6]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from tqdm import tqdm

In [7]:
class MyAlexNet(nn.Module):
    def __init__(self,conv,neur,num_classes=10):
        super(MyAlexNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, conv, kernel_size=11, stride=4, padding=0),
            nn.BatchNorm2d(conv),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(conv, 256, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(384),
            nn.ReLU())
        self.layer4 = nn.Sequential(
            nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(384),
            nn.ReLU())
        self.layer5 = nn.Sequential(
            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(9216, neur),
            nn.ReLU())
        self.fc1 = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(neur, neur),
            nn.ReLU())
        self.fc2= nn.Sequential(
            nn.Linear(neur, num_classes))

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

In [8]:
#tune number of convulutions and nuerons. make range of values
##create wrapper function, output is accuracy on test, choice of hyperparameters
#h(0) number of convolutions
#h(1) number of nuerons
#h(2) learning rate

In [9]:
def stepper(num_epochs,trainloader,model,criterion,optimizer,total_step):
  for epoch in range(num_epochs):
      for i, (images, labels) in enumerate(train_loader):
          # Move tensors to the configured device
          images = images.to(device)
          labels = labels.to(device)

          # Forward pass
          outputs = model(images)
          loss = criterion(outputs, labels)

          # Backward and optimize
          optimizer.zero_grad()
          loss.backward()
          optimizer.step()

      print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                    .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

      # Validation
      with torch.no_grad():
          correct = 0
          total = 0
          for images, labels in valid_loader:
              images = images.to(device)
              labels = labels.to(device)
              outputs = model(images)
              _, predicted = torch.max(outputs.data, 1)
              total += labels.size(0)
              correct += (predicted == labels).sum().item()
              del images, labels, outputs
              accuracy = 100 * correct / total

          print('Accuracy of the network on the {} validation images: {} %'.format(5000, 100 * correct / total))
  return loss

In [14]:
num_classes = 10
num_dimensions = 3
num_particles = 5
max_iter = 50
learning_rate = 0.005
bounds_p = [(75, 150), (500,8000 ),(10,50)]#conv, neur, wd

# Run PSO

In [15]:
10/ 10000


0.001

## My model was able to consistently get to 81% on the validation data for each of the final epochs, but was not much higher than this

In [16]:
best_position, best_value = particle_swarm_optimization(num_dimensions, num_particles, max_iter,bounds=bounds_p)

Epoch [1/10], Step [704/704], Loss: 0.8932
Accuracy of the network on the 5000 validation images: 59.26 %
Epoch [2/10], Step [704/704], Loss: 0.5975
Accuracy of the network on the 5000 validation images: 67.3 %
Epoch [3/10], Step [704/704], Loss: 0.6206
Accuracy of the network on the 5000 validation images: 69.74 %
Epoch [4/10], Step [704/704], Loss: 0.3599
Accuracy of the network on the 5000 validation images: 73.24 %
Epoch [5/10], Step [704/704], Loss: 0.5620
Accuracy of the network on the 5000 validation images: 75.3 %
Epoch [6/10], Step [704/704], Loss: 0.7047
Accuracy of the network on the 5000 validation images: 76.62 %
Epoch [7/10], Step [704/704], Loss: 0.3734
Accuracy of the network on the 5000 validation images: 78.32 %
Epoch [8/10], Step [704/704], Loss: 1.2914
Accuracy of the network on the 5000 validation images: 79.26 %
Epoch [9/10], Step [704/704], Loss: 0.7850
Accuracy of the network on the 5000 validation images: 78.54 %
Epoch [10/10], Step [704/704], Loss: 0.8601
Accu