## HW4 CS 454 CAN SÖLÖMBAZ 

In [1]:
import torch
from torch import nn
import torch.nn.functional as F
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader
from torchvision import transforms
from torch.autograd import Variable
from torchvision.utils import save_image
from sklearn.metrics import confusion_matrix
from matplotlib import pyplot as plt
from matplotlib import cm as cm
import pandas as pd
import numpy as np

In [2]:
label=[i for i in range(10)]
def conf_matrix(y_pred,y_true,label):
    labels=label
    cm = confusion_matrix(y_true, y_pred, labels=label)
    print(cm)
    fig = plt.figure()
    ax = fig.add_subplot(111)
    cax = ax.matshow(cm)
    plt.title('Confusion matrix')
    fig.colorbar(cax)
    ax.set_xticks(labels)
    ax.set_yticks(labels)
    ax.set_xticklabels(labels)
    ax.set_yticklabels(labels)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()

In [3]:
## This structure is very similar to LeNet-5. Only difference is size of the image which is 1x28x28 in our case
## Structure of CNN as follows: 1 Convnet-1 Maxpool-1 Convnet-1 Maxpool-1 Convnet-Flatten-2 FulCon-Softmax

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # ====== ENCODER PART ======       
        # MNIST image is 1x28x28 (CxHxW)
        # Pytorch expects input data as BxCxHxW 
        # B: Batch size
        # C: number of channels gray scale images have 1 channel
        # W: width of the image 
        # H: height of the image
        
        # W after conv2d  [(W - Kernelw + 2*padding)/stride] + 1
        
        # after convolution we'll have Bx6 24x24 feature maps
        self.conv1 = nn.Conv2d(in_channels=1,
                                out_channels=6,
                                kernel_size=5,
                                stride=1,
                                padding=0)
        
        # after maxpool we'll have Bx6 12x12 feature maps
        self.maxpool1 = nn.MaxPool2d(2,2)


        
        # after convolution we'll have Bx16 8x8 feature maps 
        self.conv2= nn.Conv2d(in_channels=6,
                                out_channels=16,
                                kernel_size=5,
                                stride=1,
                                padding=0
                                )

        # after maxpool we'll have Bx16 4x4 feature maps
        self.maxpool2 = nn.MaxPool2d(2,2)
        
        self.conv3= nn.Conv2d(in_channels=16,
                                out_channels=120,
                                kernel_size=4,
                                stride=1,
                                padding=0
                                )
        
        # first fully connected layer from 120 input features to 84 hidden units
        self.fc1 = nn.Linear(in_features=120,
                                out_features=84)
       
        # second fully connected layer from 84 input features to 10 outputs
        self.fc2 = nn.Linear(in_features=84,
                                out_features=10)
                           


    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.maxpool1(x)
        x = F.relu(self.conv2(x))
        x = self.maxpool2(x)
        x = F.relu(self.conv3(x))
        x = torch.flatten(x,1) # flatten feature maps, Bx (CxHxW)
        x = F.relu(self.fc1(x))
        x = F.log_softmax(self.fc2(x),dim=1)
#         x = x.view(-1,64,4,4) # reshape back to feature map format

        return x        


In [4]:
class Can_CNN(nn.Module):
    def __init__(self):
        super(Can_CNN, self).__init__()
        
        # W after conv2d  [(W - Kernelw + 2*padding)/stride] + 1
        
        # after convolution we'll have Bx32 28x28 feature maps
        self.conv1 = nn.Conv2d(in_channels=1,
                                out_channels=32,
                                kernel_size=5,
                                stride=1,
                                padding=2)

        # after avgpool we'll have Bx32 14x14 feature maps
        self.maxpool1 = nn.MaxPool2d(2,2)

        
        # after convolution we'll have Bx64 14x14 feature maps 
        self.conv2= nn.Conv2d(in_channels=32,
                                out_channels=64,
                                kernel_size=5,
                                stride=1,
                                padding=2
                                )    
        
        # after avgpool we'll have Bx64 7x7 feature maps
        self.maxpool2 = nn.MaxPool2d(2,2)
               
        
        # first fully connected layer from 64*7*7 input features to 128 hidden units
        self.fc1 = nn.Linear(in_features=64*7*7,
                                out_features=128)
       
        # second fully connected layer from 128 input features to 10 outputs
        self.fc2 = nn.Linear(in_features=128,
                                out_features=10)
        
                           


    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.maxpool1(x)
        x = F.relu(self.conv2(x))
        x = self.maxpool2(x)
        x = torch.flatten(x,1) # flatten feature maps, Bx (CxHxW)
        x = F.relu(self.fc1(x))
        x = F.log_softmax(self.fc2(x),dim=1)
#         x = x.view(-1,64,4,4) # reshape back to feature map format

        return x        

In [7]:
def to_img(x):
    x = 0.5 * (x + 1)   # from [-1, 1] range to [0, 1] range
    x = x.clamp(0, 1)   # assign less than 0 to 0, bigger than 1 to 1
    x = x.view(x.size(0), 1, 28, 28) # B, C, H, W format for MNIST
    return x

num_epochs = 20
batch_size = 128
learning_rate = 1e-3
n_batches = 60000 // batch_size

# normalize each image and set the pixel values between -1 and 1
img_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))])

# prepare data loader
trainset = MNIST('./data', transform=img_transform, download=True)
dataloader = DataLoader(trainset, batch_size=batch_size, shuffle=True)

# determine where to run the code
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 

# create an CNN network instance
net = CNN().to(device)
net2 = Can_CNN().to(device)
# print(net)  # display the architecture
loss_function = nn.NLLLoss().to(device)
optimizer1 = torch.optim.Adam(net.parameters(), lr=learning_rate,
                             weight_decay=1e-5)
optimizer2 = torch.optim.Adam(net2.parameters(), lr=learning_rate,
                             weight_decay=1e-5)

In [8]:
def train(net, loader, loss_func, optimizer):
    net.train()                           # put model in train mode
    total_loss = torch.zeros(1).to(device)
    
    for img, target in loader:                  # next batch
        img = Variable(img).to(device)     # convert to Variable to calculate gradient and move to gpu
        target = Variable(target).to(device)
        output = net(img).to(device)          # feed forward
        loss = loss_func(output, target)      # calculate loss         
        optimizer.zero_grad()               # clear previous gradients 
        loss.backward()                     # calculate new gradients
        optimizer.step()                    # update weights 
        total_loss += loss                  # accumulate loss  
            
    return img, output, total_loss

In [9]:
for epoch in range(num_epochs):
    img, output, loss = train(net, dataloader, loss_function, optimizer1)
               
    # log
    print('epoch [{}/{}], loss:{:.4f}'
            .format(epoch+1, num_epochs, loss.item()/n_batches))

num_of_epoch=[i+1 for i in range(num_epochs)]
plt.xticks(num_of_epoch)
plt.xlabel("number of epoch")
plt.ylabel("error")
plt.title("Epoch vs Loss")
plt.plot(num_of_epoch, errors) 

# save the model
torch.save(net.state_dict(), './CNN.pth')

epoch [1/20], loss:0.3713
epoch [2/20], loss:0.0875
epoch [3/20], loss:0.0624
epoch [4/20], loss:0.0503


KeyboardInterrupt: 

In [7]:
testset = MNIST('./data', transform=img_transform, download=True, train=False)
trainset = MNIST('./data', transform=img_transform, download=True)
test_loader = DataLoader(testset, batch_size=10000, shuffle=True)
train_loader = DataLoader(trainset, batch_size=60000, shuffle=True)

In [10]:
def results(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum')  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            confusion_matrix=conf_matrix(pred,target,label)
            
    return confusion_matrix

In [53]:
results(net, device, train_loader)
results(net, device, test_loader)

tensor([1, 8, 4,  ..., 7, 5, 6])
60000
tensor([[1],
        [8],
        [4],
        ...,
        [7],
        [5],
        [6]])
60000


TypeError: 'NoneType' object is not callable