## COGS 181 Project: Convolutional Neural Network on CIFAR-10 database

Calixto Calangi


In [2]:
# Necessary code
import matplotlib.pyplot as plt
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

## Types of implementations

In this project, I will testing across several different implementations, recording the best hyper-parameters for each.

### The first test, I will test across different architectures (custom CNN based on what our COGS 181 course) v. architecture based on AlexNet

After that, I will tinker with my CNN and do a series of changes to look at the results versus the baseline recorded in my original CNN (the one used versus AlexNet architecture.

### Then, I will be looking at (for my own CNN):

#### 1.) Varying layers
#### 2.) Different optimization methods (Adam v. stochastic gradient descent)
#### 3.) Different pooling functions (average pooling, max pooling, stochastic pooling)
#### 4.) Different activation functions (ReLu, Sigmoid)


### Preparing Dataset: transforming

In [4]:
#transform code, architecture same to HW4
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

#labels
classes = ('airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

### Choosing what device

In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)  

cpu


## Test 1: comparing my CNN to AlexNet architecture

In this section, I will be looking at the two architectures (my own and one based on AlexNet).

In [7]:
### Architecture 1: my own CNN
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Sequential(
          nn.Conv2d(3, 20, 3, stride=1, padding=1),
          nn.ReLU(),
          nn.AvgPool2d(2)
        )
        self.conv2 = nn.Sequential(
          nn.Conv2d(20,20,3, stride=1, padding=1),
          nn.ReLU(),
          nn.AvgPool2d(2)
        )
        self.fc1= nn.Linear(1280,1280)
        self.fc2= nn.Linear(1280,100)
        self.relu= nn.ReLU()
        self.fc3 = nn.Linear(100,10)
      
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = torch.flatten(x,1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.fc3(x)
    
        return x

net = Net()     # Create the network instance.
net.to(device)  # Move the network parameters to the specified device.

Net(
  (conv1): Sequential(
    (0): Conv2d(3, 20, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
  )
  (conv2): Sequential(
    (0): Conv2d(20, 20, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
  )
  (fc1): Linear(in_features=1280, out_features=1280, bias=True)
  (fc2): Linear(in_features=1280, out_features=100, bias=True)
  (relu): ReLU()
  (fc3): Linear(in_features=100, out_features=10, bias=True)
)

In [12]:
### Architecture 2: AlexNet based CNN
#PyTorch has AlexNet, so I'm going to use that as a basis for testing against each other.
from torchvision import datasets, transforms, models

#AlexNet code based on PyTorch
alexnet = models.alexnet(pretrained=True)

alexnet.classifier[6] = nn.Linear(alexnet.classifier[6].in_features, 10)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
alexnet.to(device)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

#### Optimizer and Loss function:
To keep it consistent with AlexNet architecture, which utilizes cross-entropy loss and stochastic gradient descent, I will also keep it for both. Learning rate will maintain the same value (0.001)

In [11]:
##### loss, optimizer
loss_func = nn.CrossEntropyLoss()  
opt = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) 

#### Training, report best hyper-parameters

In [None]:
####

#### Plot training curve

In [None]:
####

#### Test accuracy

In [None]:
####

## Test 2: Varying layers in my CNN

In [None]:
####

## Test 3: Different pooling functions

In [None]:
####

## Test 4: Different activation functions

In [None]:
####