# Home-made CNN

## The code that produces the .csv file is at the very bottom.

In [1]:
import torch
import numpy as np

from torchvision.datasets import ImageFolder
from torchvision import transforms
import torch.nn as nn
from torch.optim import Adam, SGD
from torch.autograd import Variable
import PIL.Image as Image
import os 
from torch.optim import lr_scheduler

In [2]:
normalize = transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    )

train_transform = transforms.Compose([
        transforms.Resize((256,256)),
        transforms.RandomRotation(45),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
        ])

valid_transform = transforms.Compose([
            transforms.Resize((256,256)),
            transforms.RandomRotation(45),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])


train_dataset = ImageFolder(
        root='./bird_dataset/train_images/', 
        transform=train_transform
    )

valid_dataset = ImageFolder(
        root='./bird_dataset/val_images/', 
        transform=valid_transform
    )


In [3]:
print(len(train_dataset))
print(len(valid_dataset))

1082
103


In [4]:
len(train_dataset.classes)

20

In [5]:
batch_size = 8
num_workers = 1  #1 if cuda
pin_memory = True  #true if cuda
shuffle = True

train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, shuffle=shuffle,
        num_workers=num_workers, pin_memory=pin_memory,
    )

valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=batch_size, shuffle=shuffle,
        num_workers=num_workers, pin_memory=pin_memory,
    )

In [6]:
#credit for this architecture 
# https://heartbeat.fritz.ai/basics-of-image-classification-with-pytorch-2f8973c51864

class Unit(nn.Module):
    def __init__(self,in_channels,out_channels):
        super(Unit,self).__init__()
        

        self.conv = nn.Conv2d(in_channels=in_channels,kernel_size=3,out_channels=out_channels,stride=1,padding=1)
        self.bn = nn.BatchNorm2d(num_features=out_channels)
        self.relu = nn.ReLU()

    def forward(self,input):
        output = self.conv(input)
        output = self.bn(output)
        output = self.relu(output)

        return output

class SimpleNet(nn.Module):
    def __init__(self,num_classes=20):
        super(SimpleNet,self).__init__()

        
        self.unit1 = Unit(in_channels=3,out_channels=32)
        self.unit2 = Unit(in_channels=32, out_channels=32)
        self.unit3 = Unit(in_channels=32, out_channels=32)

        self.pool1 = nn.MaxPool2d(kernel_size=2)

        self.unit4 = Unit(in_channels=32, out_channels=64)
        self.unit5 = Unit(in_channels=64, out_channels=64)
        self.unit6 = Unit(in_channels=64, out_channels=64)
        self.unit7 = Unit(in_channels=64, out_channels=64)

        self.pool2 = nn.MaxPool2d(kernel_size=2)

        self.unit8 = Unit(in_channels=64, out_channels=128)
        self.unit9 = Unit(in_channels=128, out_channels=128)
        self.unit10 = Unit(in_channels=128, out_channels=128)
        self.unit11 = Unit(in_channels=128, out_channels=128)

        self.pool3 = nn.MaxPool2d(kernel_size=2)

        self.unit12 = Unit(in_channels=128, out_channels=128)
        self.unit13 = Unit(in_channels=128, out_channels=128)
        self.unit14 = Unit(in_channels=128, out_channels=128)

        self.avgpool = nn.AvgPool2d(kernel_size=4)
        
        self.net = nn.Sequential(self.unit1, self.unit2, self.unit3, self.pool1, self.unit4, self.unit5, self.unit6
                                 ,self.unit7, self.pool2, self.unit8, self.unit9, self.unit10, self.unit11, self.pool3,
                                 self.unit12, self.unit13, self.unit14, self.avgpool)
        
        self.drop_out = nn.Dropout()
        self.fc1 = nn.Linear(128*8*8,1000)
        self.fc2 = nn.Linear(1000, num_classes)
        
    def forward(self, input):
        output = self.net(input)
        #print(output.shape)  useful for debugging and finding the right size
        output = output.view(-1,128*8*8)
        output = self.drop_out(output)
        output = self.fc1(output)
        output = self.fc2(output)
        return output

In [7]:
cuda_avail = torch.cuda.is_available()

model = SimpleNet(num_classes=20)

if cuda_avail:
    model.cuda()

optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9)
loss_fn = nn.CrossEntropyLoss()

In [8]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

#inspired from
# https://heartbeat.fritz.ai/basics-of-image-classification-with-pytorch-2f8973c51864


def save_models(epoch):
    torch.save(model.state_dict(), "CNN_{}.model".format(epoch))
    print("Checkpoint saved")

def test():
    model.eval()
    test_acc = 0.0
    for data in valid_loader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)
        _,prediction = torch.max(outputs.data, 1)
        test_acc += (prediction == labels).sum().item()
        
    test_acc = test_acc/103
    return test_acc


best_acc_train = 0.0
best_acc_test = 0.0

def train(num_epochs):
    global best_acc_train
    global best_acc_test
    for epoch in range(num_epochs):
        model.train()
        train_acc = 0.0
        train_loss = 0.0
        for data in train_loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()  #Clear all accumulated gradients
            outputs = model(images)
            loss = loss_fn(outputs,labels)
            loss.backward()

            
            optimizer.step()  #Adjust parameters according to the computed gradients

            train_loss += loss.cpu().item() * images.size(0)
            _, prediction = torch.max(outputs.data, 1)
            
            train_acc += (prediction == labels).sum().item()

        train_acc = train_acc / 1082
        train_loss = train_loss / 1082


        test_acc = test()

        
        if (test_acc >= best_acc_test) and (train_acc >= best_acc_train):
            save_models(epoch)
            best_acc_test = test_acc
            best_acc_train = train_acc


        # Print the metrics
        print("Epoch {}, Train Accuracy: {} , TrainLoss: {} , Test Accuracy: {}".format(epoch, train_acc, train_loss,test_acc))


In [9]:
train(5)

Checkpoint saved
Epoch 0, Train Accuracy: 0.08502772643253234 , TrainLoss: 2.9217026374697026 , Test Accuracy: 0.05825242718446602
Checkpoint saved
Epoch 1, Train Accuracy: 0.13955637707948243 , TrainLoss: 2.7727735188002947 , Test Accuracy: 0.1553398058252427
Epoch 2, Train Accuracy: 0.18299445471349354 , TrainLoss: 2.5872170299346697 , Test Accuracy: 0.13592233009708737
Checkpoint saved
Epoch 3, Train Accuracy: 0.2255083179297597 , TrainLoss: 2.455256350159425 , Test Accuracy: 0.1941747572815534
Checkpoint saved
Epoch 4, Train Accuracy: 0.2689463955637708 , TrainLoss: 2.321520590297396 , Test Accuracy: 0.23300970873786409


In [10]:
train(20)

Checkpoint saved
Epoch 0, Train Accuracy: 0.29852125693160814 , TrainLoss: 2.2076543217005 , Test Accuracy: 0.2815533980582524
Checkpoint saved
Epoch 1, Train Accuracy: 0.32532347504621073 , TrainLoss: 2.081925305773723 , Test Accuracy: 0.3786407766990291
Epoch 2, Train Accuracy: 0.3410351201478743 , TrainLoss: 2.0333803568221285 , Test Accuracy: 0.2815533980582524
Epoch 3, Train Accuracy: 0.3576709796672828 , TrainLoss: 1.9161328524626557 , Test Accuracy: 0.33980582524271846
Epoch 4, Train Accuracy: 0.39371534195933455 , TrainLoss: 1.8801093264560382 , Test Accuracy: 0.33980582524271846
Checkpoint saved
Epoch 5, Train Accuracy: 0.40388170055452866 , TrainLoss: 1.8087816542486166 , Test Accuracy: 0.4174757281553398
Epoch 6, Train Accuracy: 0.42791127541589646 , TrainLoss: 1.7655716418338572 , Test Accuracy: 0.3592233009708738
Epoch 7, Train Accuracy: 0.42791127541589646 , TrainLoss: 1.7691678306752343 , Test Accuracy: 0.4077669902912621
Checkpoint saved
Epoch 8, Train Accuracy: 0.45748

In [12]:
for g in optimizer.param_groups:
    g['lr'] = 0.0001    #lowering the learning rate
train(10)

Epoch 0, Train Accuracy: 0.5859519408502772 , TrainLoss: 1.2850846603044521 , Test Accuracy: 0.4854368932038835
Epoch 1, Train Accuracy: 0.6035120147874307 , TrainLoss: 1.21521439790285 , Test Accuracy: 0.47572815533980584
Checkpoint saved
Epoch 2, Train Accuracy: 0.6090573012939002 , TrainLoss: 1.19355831613382 , Test Accuracy: 0.5339805825242718
Epoch 3, Train Accuracy: 0.6053604436229205 , TrainLoss: 1.1694013252716629 , Test Accuracy: 0.47572815533980584
Checkpoint saved
Epoch 4, Train Accuracy: 0.6284658040665434 , TrainLoss: 1.1434117868515128 , Test Accuracy: 0.5339805825242718
Epoch 5, Train Accuracy: 0.6127541589648798 , TrainLoss: 1.1410721926063354 , Test Accuracy: 0.5339805825242718
Epoch 6, Train Accuracy: 0.634011090573013 , TrainLoss: 1.1377606297156286 , Test Accuracy: 0.5242718446601942
Epoch 7, Train Accuracy: 0.6303142329020333 , TrainLoss: 1.1244565750887126 , Test Accuracy: 0.5242718446601942
Epoch 8, Train Accuracy: 0.6182994454713494 , TrainLoss: 1.14183354487921

In [13]:
train(40)

Epoch 0, Train Accuracy: 0.6164510166358595 , TrainLoss: 1.1416488271103329 , Test Accuracy: 0.5631067961165048
Epoch 1, Train Accuracy: 0.6349353049907579 , TrainLoss: 1.1056541277167977 , Test Accuracy: 0.5048543689320388
Epoch 2, Train Accuracy: 0.6312384473197782 , TrainLoss: 1.096075559720094 , Test Accuracy: 0.5242718446601942
Checkpoint saved
Epoch 3, Train Accuracy: 0.6478743068391867 , TrainLoss: 1.1021574898700397 , Test Accuracy: 0.5436893203883495
Epoch 4, Train Accuracy: 0.6423290203327172 , TrainLoss: 1.0750381939953224 , Test Accuracy: 0.47572815533980584
Epoch 5, Train Accuracy: 0.6515711645101664 , TrainLoss: 1.0886062958764942 , Test Accuracy: 0.5048543689320388
Epoch 6, Train Accuracy: 0.6626617375231053 , TrainLoss: 1.0652820385317705 , Test Accuracy: 0.5145631067961165
Epoch 7, Train Accuracy: 0.6691312384473198 , TrainLoss: 1.0574364556402465 , Test Accuracy: 0.5048543689320388
Epoch 8, Train Accuracy: 0.6423290203327172 , TrainLoss: 1.0934649121959636 , Test Accu

In [14]:
for g in optimizer.param_groups:
    g['lr'] = 0.00001    #lowering the learning rate again
train(10)

Epoch 0, Train Accuracy: 0.6940850277264325 , TrainLoss: 0.8980850942033496 , Test Accuracy: 0.5825242718446602
Epoch 1, Train Accuracy: 0.7181146025878004 , TrainLoss: 0.8766235091108933 , Test Accuracy: 0.5631067961165048
Epoch 2, Train Accuracy: 0.7301293900184843 , TrainLoss: 0.8415324348618936 , Test Accuracy: 0.5728155339805825
Checkpoint saved
Epoch 3, Train Accuracy: 0.722735674676525 , TrainLoss: 0.845726874310957 , Test Accuracy: 0.5825242718446602
Epoch 4, Train Accuracy: 0.7125693160813309 , TrainLoss: 0.8605740090174507 , Test Accuracy: 0.5631067961165048
Epoch 5, Train Accuracy: 0.7134935304990758 , TrainLoss: 0.8515933562559033 , Test Accuracy: 0.5825242718446602
Epoch 6, Train Accuracy: 0.7134935304990758 , TrainLoss: 0.8482062946626308 , Test Accuracy: 0.5728155339805825
Epoch 7, Train Accuracy: 0.7144177449168208 , TrainLoss: 0.879456793094078 , Test Accuracy: 0.5533980582524272
Epoch 8, Train Accuracy: 0.7282809611829945 , TrainLoss: 0.8686375981558273 , Test Accurac

In [15]:
#At this point, after 85 epochs, validation accuracy is not improving anymore.
#We stop training and load the best model so far

model.load_state_dict(torch.load('./CNN_3.model'))

In [15]:
correct = 0
total = 0
with torch.no_grad():
    for data in train_loader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 9 %


In [16]:
test_dir = './bird_dataset/test_images/mistery_category'
output_file = open("test.csv", "w")
output_file.write("Id,Category\n")

def pil_loader(path):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        with Image.open(f) as img:
            return img.convert('RGB')

for f in os.listdir(test_dir):
    if 'jpg' in f:
        data = valid_transform(pil_loader(test_dir + '/' + f))
        data = data.view(1, data.size(0), data.size(1), data.size(2))
        if cuda_avail:
            data = data.cuda()
        model.eval()
        output = model(data)
        pred = output.data.max(1, keepdim=True)[1]
        output_file.write("%s,%d\n" % (f[:-4], pred))

output_file.close()