## Convolutional Neural Networks (CNNs)

In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F

### Convolution operator - OOP way

**OOP - Based (torcn.nn)**
- in_channels : number of channels in input
- out_channels : number of output channels produced by convolution
- kernel_size : size of convolution kernel
- stride : stride of convolution. Default 1
- padding : padding of convolution. Zero padding

In [2]:
# Create 10 random images of shape (1, 28, 28)
images = torch.rand(10, 1, 28, 28)

# Build 6 conv. filters
conv_filters = torch.nn.Conv2d(in_channels=1, out_channels=6, kernel_size=3, stride=1, padding=1)

# Convolve the image with the filters 
output_feature = conv_filters(images)
print(output_feature.shape)

torch.Size([10, 6, 28, 28])


### Convolution operator - Functional way

**Functional - Based (torcn.nn.functional)**
- input : input tensor of shape (batch,in_channels,Height,Width)
- weights : filter of shape
- stride : the stride of the convolution kernel. Can be a single number or a tuple (sH,sW)
- padding : implicit zero padding on both sides of the input. Default 0

In [4]:
# Create 10 random images
image = torch.rand(10, 1, 28, 28)

# Create 6 filters
filters = torch.rand(6, 1, 3, 3)

# Convolve the image with the filters
output_feature = F.conv2d(image, filters, stride=1, padding=1)
print(output_feature.shape)

torch.Size([10, 6, 28, 28])


### Max-pooling operator

In [6]:
im = torch.tensor([[[[ 8.,  1.,  2.,  5.,  3.,  1.],
                    [ 6.,  0.,  0., -5.,  7.,  9.],
                    [ 1.,  9., -1., -2.,  2.,  6.],
                    [ 0.,  4.,  2., -3.,  4.,  3.],
                    [ 2., -1.,  4., -1., -2.,  3.],
                    [ 2., -4.,  5.,  9., -7.,  8.]]]])

# Build a pooling operator with size `2`.
max_pooling = torch.nn.MaxPool2d(2)

# Apply the pooling operator
output_feature = max_pooling(im) # OOP - BASED

# Use pooling operator in the image
output_feature_F = F.max_pool2d(im,2) # Functional - BASED

# print the results of both cases
print(output_feature)
print(output_feature_F)

tensor([[[[8., 5., 9.],
          [9., 2., 6.],
          [2., 9., 8.]]]])
tensor([[[[8., 5., 9.],
          [9., 2., 6.],
          [2., 9., 8.]]]])


### Average-pooling operator

In [8]:
# Build a pooling operator with size `2`.
avg_pooling = torch.nn.AvgPool2d(2)

# Apply the pooling operator
output_feature = avg_pooling(im)

# Use pooling operator in the image
output_feature_F = F.avg_pool2d(im, 2)

# print the results of both cases
print("OOP - Based")
print(output_feature)
print("\nFunctional - Based")
print(output_feature_F)

OOP - Based
tensor([[[[ 3.7500,  0.5000,  5.0000],
          [ 3.5000, -1.0000,  3.7500],
          [-0.2500,  4.2500,  0.5000]]]])

Functional - Based
tensor([[[[ 3.7500,  0.5000,  5.0000],
          [ 3.5000, -1.0000,  3.7500],
          [-0.2500,  4.2500,  0.5000]]]])


### CNN - init method

- You are going to build your first convolutional neural network. You're going to use the MNIST dataset as the dataset, which is made of handwritten digits from 0 to 9. 
- The convolutional neural network is going to have 2 convolutional layers, each followed by a ReLU nonlinearity, and a fully connected layer. 
- Each pooling layer halves both the height and the width of the image, so by using 2 pooling layers, the height and width are 1/4 of the original sizes. MNIST images have shape (1, 28, 28)

In [11]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        # Instantiate two convolutional layers
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=5, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=5, out_channels=10, kernel_size=3, padding=1)
        
        # Instantiate the ReLU nonlinearity
        self.relu = nn.ReLU()
        
        # Instantiate a max pooling layer
        self.pool = nn.MaxPool2d(2, 2)
        
        # Instantiate a fully connected layer
        self.fc = nn.Linear(7 * 7 * 10, 10)

### CNN - forward()

In [12]:
class Net(nn.Module):
    def __init__(self, num_classes):
        super(Net, self).__init__()
		
        # Instantiate the ReLU nonlinearity
        self.relu = nn.ReLU()
        
        # Instantiate two convolutional layers
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=5, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=5, out_channels=10, kernel_size=3, padding=1)
        
        # Instantiate a max pooling layer
        self.pool = nn.MaxPool2d(2, 2)
        
        # Instantiate a fully connected layer
        self.fc = nn.Linear(7 * 7 * 10, 10)

    def forward(self, x):

        # Apply conv followd by relu, then in next line pool
        x = self.relu(self.conv1(x))
        x = self.pool(x)

        # Apply conv followd by relu, then in next line pool
        x = self.relu(self.conv2(x))
        x = self.pool(x)

        # Prepare the image for the fully connected layer
        x = x.view(-1, 7 * 7 * 10)

        # Apply the fully connected layer and return the result
        return self.fc(x)

### Train CNN

In [20]:
import torchvision
import torch.utils.data
import torchvision.transforms as transforms
import torch.optim as optim

**Preapare Data**

In [16]:
# Transform the data to torch tensors and normalize it 
transform = transforms.Compose([transforms.ToTensor(),
								transforms.Normalize((0.1307), ((0.3081)))])

# Prepare training set and testing set
trainset = torchvision.datasets.CIFAR10('Data', train=True, 
									  download=True, transform=transform)
testset = torchvision.datasets.CIFAR10('Data', train=False,
			   download=True, transform=transform)

# Prepare training loader and testing loader
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32,
                                          shuffle=True, num_workers=0)
testloader = torch.utils.data.DataLoader(testset, batch_size=32,
                                          shuffle=False, num_workers=0) 

Files already downloaded and verified
Files already downloaded and verified


In [27]:
class Net(nn.Module):
    def __init__(self, num_classes):
        super(Net, self).__init__()
		
        # Instantiate the ReLU nonlinearity
        self.relu = nn.ReLU()
        
        # Instantiate two convolutional layers
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        
        # Instantiate a max pooling layer
        self.pool = nn.MaxPool2d(2, 2)
        
        # Instantiate a fully connected layer
        self.fc = nn.Linear(128 * 4 * 4, 10)

    def forward(self, x):

        # Apply conv followd by relu, then in next line pool
        x = self.relu(self.conv1(x))
        x = self.pool(x)

        # Apply conv followd by relu, then in next line pool
        x = self.relu(self.conv2(x))
        x = self.pool(x)

        x = self.relu(self.conv3(x))
        x = self.pool(x)

        # Prepare the image for the fully connected layer
        x = x.view(-1, 128 * 4 * 4)

        # Apply the fully connected layer and return the result
        return self.fc(x)

In [77]:
# Instantiate the Adam optimizer and Cross-Entropy loss function
model = Net(num_classes=10)   
optimizer = optim.Adam(model.parameters(), lr=3e-4)
criterion = nn.CrossEntropyLoss()


num_epochs = 10

for epoch in range(num_epochs):
    running_loss = 0.0
    correct = 0
    total = 0

    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        optimizer.zero_grad()

        # Compute the forward pass
        outputs = model(inputs)
            
        # Compute the loss function
        loss = criterion(outputs,labels)
            
        # Compute the gradients
        loss.backward()
        
        # Update the weights
        optimizer.step()

        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        running_loss += loss.item()
        if i % 500 == 499:
            accuracy = 100 * correct / total 
            print(f'Epoch {epoch + 1}, Batch {i + 1}, Loss: {running_loss / 2000:.3f}, Accuracy: {accuracy}')
            running_loss = 0.0
            correct = 0
            total = 0

Epoch 1, Batch 500, Loss: 0.436, Accuracy: 37.13125
Epoch 1, Batch 1000, Loss: 0.357, Accuracy: 48.61875
Epoch 1, Batch 1500, Loss: 0.324, Accuracy: 54.38125
Epoch 2, Batch 500, Loss: 0.292, Accuracy: 59.04375
Epoch 2, Batch 1000, Loss: 0.276, Accuracy: 61.03125
Epoch 2, Batch 1500, Loss: 0.266, Accuracy: 63.09375
Epoch 3, Batch 500, Loss: 0.244, Accuracy: 65.6875
Epoch 3, Batch 1000, Loss: 0.237, Accuracy: 67.175
Epoch 3, Batch 1500, Loss: 0.233, Accuracy: 67.5875
Epoch 4, Batch 500, Loss: 0.216, Accuracy: 70.2
Epoch 4, Batch 1000, Loss: 0.215, Accuracy: 70.9
Epoch 4, Batch 1500, Loss: 0.211, Accuracy: 70.83125
Epoch 5, Batch 500, Loss: 0.198, Accuracy: 72.825
Epoch 5, Batch 1000, Loss: 0.197, Accuracy: 72.9375
Epoch 5, Batch 1500, Loss: 0.191, Accuracy: 73.625
Epoch 6, Batch 500, Loss: 0.179, Accuracy: 75.5625
Epoch 6, Batch 1000, Loss: 0.181, Accuracy: 75.39375
Epoch 6, Batch 1500, Loss: 0.179, Accuracy: 75.25625
Epoch 7, Batch 500, Loss: 0.164, Accuracy: 77.85625
Epoch 7, Batch 100

In [78]:
total = 0
correct = 0
for i, data in enumerate(testloader, 0):
    images, labels = data
    outputs = model(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the test images: {100 * correct / total}%')    

Accuracy of the network on the test images: 74.18%
