In [None]:
import numpy as np
import torch
import torch.nn as nn
from torchvision import datasets
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import DataLoader

In [None]:
# Get gpu, mps or cpu device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


In [None]:


class AlexNet(nn.Module):
    def __init__(self, num_classes=10):
        super(AlexNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),
            nn.BatchNorm2d(96),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(384),
            nn.ReLU())
        self.layer4 = nn.Sequential(
            nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(384),
            nn.ReLU())
        self.layer5 = nn.Sequential(
            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2))
        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU())
        self.fc1 = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU())
        self.fc2 = nn.Sequential(
            nn.Linear(4096, num_classes))

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

def get_data_loaders(batch_size, augment=False):
    data_dir = './data'
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

    train_transform = transforms.Compose([
        transforms.RandomResizedCrop(227) if augment else transforms.Resize(256),
        transforms.RandomHorizontalFlip(),
        transforms.CenterCrop(227),
        transforms.ToTensor(),
        normalize,
    ])

    test_transform = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(227),
        transforms.ToTensor(),
        normalize,
    ])

    train_dataset = datasets.CIFAR10(root=data_dir, train=True, download=True, transform=train_transform)
    test_dataset = datasets.CIFAR10(root=data_dir, train=False, download=True, transform=test_transform)

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader

In [None]:


def train_and_evaluate(model, train_loader, test_loader, optimizer, criterion, scheduler, device, num_epochs, batch_size, learning_rate):
    best_accuracy = 0
    new_lr = learning_rate
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        temp=0
        for i, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)

            temp=temp+1
            if temp%100==0:
                print('train '+str(temp)+'/'+str(len(train_loader)))


            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')

        model.eval()
        correct = total = 0
        with torch.no_grad():
            tomp=0
            for images, labels in test_loader:

                tomp=tomp+1
                if tomp%100==0:
                    print('test '+str(tomp)+'/'+str(len(test_loader)))

                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        accuracy = 100 * correct / total

        print(f'Accuracy on test set: {accuracy}%, Learining rate: {learning_rate}, Batch size: {batch_size}')
        if accuracy > best_accuracy:
            best_accuracy = accuracy




        scheduler.step(accuracy)

        if new_lr != learning_rate:
            new_lr = learning_rate

        # Print the last learing rate
        current_lr = scheduler.optimizer.param_groups[0]['lr']
        print(f'Learning rate: {current_lr}')

        if abs(accuracy) > 83.9:
            break


    return best_accuracy, new_lr




In [None]:
def obj_function(hyperparams):
    batch_size, learning_rate = hyperparams
    batch_size = int(batch_size)
    print(f'batch_size: {batch_size}, learning_rate: {learning_rate}')

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    train_loader, test_loader = get_data_loaders(batch_size)
    model = AlexNet(num_classes=10).to(device)
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=0.005)
    criterion = nn.CrossEntropyLoss()
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', factor=0.1, patience=10, verbose=True)

    num_epochs=10

    accuracy ,new_lr = train_and_evaluate(model, train_loader, test_loader, optimizer, criterion, scheduler, device, num_epochs, batch_size, learning_rate)

    return -accuracy , new_lr  # Negative accuracy for minimization

In [None]:
# PSO algorithm

def particle_swarm_optimization(num_dimensions, num_particles, max_iter, bounds, obj_function, w=0.5, c1=0.25, c2=0.75 , i_min=-10,i_max=10,):
    # Initialize the particles
    # This creates a data structure such as a dictionary
    if bounds is None:
        particles = [({'position': [np.random.uniform(i_min, i_max) for _ in range(num_dimensions)],
                    'velocity': [np.random.uniform(-1, 1) for _ in range(num_dimensions)],
                    'pbest': float('inf'),
                    'pbest_position': None})
                    for _ in range(num_particles)]
    else:
        particles = [({'position': [np.random.uniform(bounds[i][0], bounds[i][1]) for i in range(num_dimensions)],
                    'velocity': [np.random.uniform(-1, 1) for _ in range(num_dimensions)],
                    'pbest': float('inf'),
                    'pbest_position': None})
                    for _ in range(num_particles)]





    # Initialize global best
    gbest_value = float('inf')
    gbest_position = None

    for _ in range(max_iter):
        for particle in particles:


            position = particle['position']
            velocity = particle['velocity']

            print('position:',position)

            # Calculate the current value
            current_value , new_lr= obj_function(position)
            position[1] = new_lr

            # Update personal best
            if current_value < particle['pbest']:
                particle['pbest'] = current_value
                particle['pbest_position'] = position.copy()

            # Update global best
            if current_value < gbest_value:
                gbest_value = current_value
                gbest_position = position.copy()

            if abs(current_value) > 83.9:
                break

            # Update particle's velocity and position
            for i in range(num_dimensions):
                r1, r2 = np.random.uniform(), np.random.uniform()
                velocity[i] = w * velocity[i] + c1*r1 * (particle['pbest_position'][i] - position[i]) + c2*r2 * (gbest_position[i] - position[i])
                position[i] += velocity[i]
                # legalize the values to the provided bounds
                if bounds is not None:
                    position[i] = np.clip(position[i],bounds[i][0],bounds[i][1])

    return gbest_position, gbest_value



In [None]:
torch.cuda.empty_cache()

In [None]:
# PSO hyperparameters setup
num_dimensions = 2  # For example, batch_size and learning_rate
num_particles = 10
max_iter = 1
bounds = [(32, 45), (0.001, 0.01)]  # Example bounds for batch_size and learning_rate

torch.cuda.empty_cache()

# Run PSO
best_hyperparams, best_value = particle_swarm_optimization(num_dimensions, num_particles, max_iter, bounds, obj_function)

print("Best Hyperparameters:", best_hyperparams)

print("Best Validation Accuracy:", -best_value)

position: [43.74036540768514, 0.008260047204433463]
batch_size: 43, learning_rate: 0.008260047204433463
Files already downloaded and verified
Files already downloaded and verified
train 100/1163
train 200/1163
train 300/1163
train 400/1163
train 500/1163
train 600/1163
train 700/1163
train 800/1163
train 900/1163
train 1000/1163
train 1100/1163
Epoch 1, Loss: 1.4850094857035478
test 100/233
test 200/233
Accuracy on test set: 55.44%, Learining rate: 0.008260047204433463, Batch size: 43
Learning rate: 0.008260047204433463
train 100/1163
train 200/1163
train 300/1163
train 400/1163
train 500/1163
train 600/1163
train 700/1163
train 800/1163
train 900/1163
train 1000/1163
train 1100/1163
Epoch 2, Loss: 1.0848590817209574
test 100/233
test 200/233
Accuracy on test set: 66.14%, Learining rate: 0.008260047204433463, Batch size: 43
Learning rate: 0.008260047204433463
train 100/1163
train 200/1163
train 300/1163
train 400/1163
train 500/1163
train 600/1163
train 700/1163
train 800/1163
train 90

# batch size: 42
# learnign rate: 0.002131872066721196
# Accuracy: 81.36


# 3. Posted on Githib at: https://brukeamare.github.io/AdvancedAML/Amare_HW2.html