# Training CNN

In [1]:
import torch
import pandas as pd
import torch.nn as nn
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
import numpy as np
import os

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from torch.autograd import Variable
from torch import optim

from skimage import io, transform
from random import randint
import shutil

import os, uuid, glob, warnings
import numpy as np

from PIL import Image

warnings.filterwarnings("ignore")

plt.ion()

In [2]:
transform = transforms.Compose([
        transforms.RandomSizedCrop(224),
        transforms.ToTensor(), 
        transforms.Normalize((0.485, 0.456, 0.406), 
                             (0.229, 0.224, 0.225))])

PARAMETERS FOR FLICKER LOGOS DATASET 27

In [3]:
ORIGINAL_DATA_DIR = '../data/fl27/original'
CROPPED_DATA_DIR = '../data/fl27/images'
ORIGINAL_ANNOTATION = '../data/fl27/annotation.txt'
CROPPED_ANNOTATION = '../data/fl27/crop_annotation.txt'

TRAIN_SET = '../annotations/trainset.txt'
TEST_SET = '../annotations/testset.txt'

PARAMETERS FOR FLICKER LOGOS DATASET 32

In [4]:
ORIGINAL_DATA_DIR = '../data/fl32/originals'
CROPPED_DATA_DIR = '../data/fl32/images'
ORIGINAL_ANNOTATION = '../data/fl32/annotation.txt'
CROPPED_ANNOTATION = '../data/fl32/crop_annotation.txt'


TRAIN_SET = '../annotations/trainset32.txt'
TEST_SET = '../annotations/testset32.txt'

In [5]:
def read_from_annotation(path):
    file = open(path, "r")
    content = file.readlines()
    new = [x.split(" ")[:-1] for x in content]
    return new
def to_var(x, volatile=False):
    if torch.cuda.is_available():
        x = x.cuda()
    return Variable(x, volatile=volatile)

In [6]:
class MyDataset(Dataset):
    def __init__(self, txt_file, root,transform=None):
        self.txt_file = txt_file
        self.root = root
        self.transform = transform

    def __len__(self):
        return self.txt_file.shape[0]

    def __getitem__(self, id):
        img_name = os.path.join(self.root, self.txt_file[id][0])
        img = Image.open(img_name)
        if self.transform is not None:
            img = self.transform(img)
        logo = int(self.txt_file[id][1])
        return img,logo

Labels for FL 27

In [8]:
LABELS = ['Adidas', 'Apple', 'BMW', 'Citroen', 'Cocacola', 'DHL', 'Fedex', 'Ferrari', 
          'Ford', 'Google', 'Heineken', 'HP', 'Intel', 'McDonalds', 'Mini', 'Nbc', 'Nike', 'Pepsi', 
          'Porsche', 'Puma', 'RedBull', 'Sprite', 'Starbucks', 'Texaco', 'Unicef', 'Vodafone', 'Yahoo']

In [7]:
LABELS = ['ferrari', 'ups', 'cocacola', 'guiness', 'adidas', 'aldi', 'texaco', 'nvidia', 'rittersport', 
          'paulaner', 'dhl', 'bmw', 'fosters', 'milka', 'starbucks', 'pepsi', 'singha', 'apple', 'fedex',
          'carlsberg', 'hp', 'chimay', 'google', 'tsingtao', 'corona', 'ford', 'esso', 'shell', 'stellaartois', 
          'becks', 'heineken', 'erdinger','nologo']
print(LABELS[32])
print(len(LABELS))

nologo
33


In [8]:
def prepare_num_dataset(annotation_path, set_path):
    arr = read_from_annotation(set_path)
    out = []
    for item in arr:
        tmp = [item[0], LABELS.index(item[1].split('\n')[0])]
        out.append(tmp)
    out = np.array(out)
    return out

train_data = prepare_num_dataset(CROPPED_ANNOTATION, TRAIN_SET)
test_data = prepare_num_dataset(CROPPED_ANNOTATION, TEST_SET)
trainset = MyDataset(train_data, CROPPED_DATA_DIR,transform)
testset = MyDataset(test_data, CROPPED_DATA_DIR,transform)
print("Train size {} items and test size {} items".format(len(trainset), len(testset)))



batch_size = 70


train_loader = torch.utils.data.DataLoader(dataset=trainset,
                                           batch_size=batch_size,
                                            shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=testset,
                                            batch_size=batch_size,
                                            shuffle=False)
print('Batch size: {}'.format(len(test_loader)))

Train size 8404 items and test size 1000 items
Batch size: 15


## Training process of CNN

In [9]:
num_epochs = 500
learning_rate = 0.001
momentum = 0.9

n_classes = len(LABELS)
n_classes

33

In [10]:
class CNN(nn.Module):
    def __init__(self, n_classes):
        super(CNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, n_classes),
        )
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), 256 * 6 * 6)
        x = self.classifier(x)
        return x
    
cnn = CNN(n_classes)

In [11]:
cnn.cuda()

CNN (
  (features): Sequential (
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU (inplace)
    (2): MaxPool2d (size=(3, 3), stride=(2, 2), dilation=(1, 1))
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU (inplace)
    (5): MaxPool2d (size=(3, 3), stride=(2, 2), dilation=(1, 1))
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU (inplace)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU (inplace)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU (inplace)
    (12): MaxPool2d (size=(3, 3), stride=(2, 2), dilation=(1, 1))
  )
  (classifier): Sequential (
    (0): Dropout (p = 0.5)
    (1): Linear (9216 -> 4096)
    (2): ReLU (inplace)
    (3): Dropout (p = 0.5)
    (4): Linear (4096 -> 4096)
    (5): ReLU (inplace)
    (6): Linear (4096 -> 33)
  )
)

In [12]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(cnn.parameters(), lr = learning_rate, momentum=momentum)
for epoch in range(num_epochs):
    cnn.train()
    for i, (images, labels) in enumerate(train_loader):
        images = to_var(images)
        labels = to_var(labels)
        optimizer.zero_grad()
        outputs = cnn(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    if (epoch+1)%10 == 0:
        print ('Epoch [%d/%d] Loss: %.4f'
            %(epoch+1, num_epochs, loss.data[0]))
#         torch.save(cnn.state_dict(), 'cnn-fl22-vol3-2nd-{}.pt'.format(epoch+1))

Epoch [10/500] Loss: 3.1264
Epoch [20/500] Loss: 1.6110
Epoch [30/500] Loss: 2.2609
Epoch [40/500] Loss: 1.9291
Epoch [50/500] Loss: 1.9732
Epoch [60/500] Loss: 0.7036
Epoch [70/500] Loss: 1.1888
Epoch [80/500] Loss: 2.9764
Epoch [90/500] Loss: 1.1771
Epoch [100/500] Loss: 0.4418
Epoch [110/500] Loss: 0.0063
Epoch [120/500] Loss: 0.0538
Epoch [130/500] Loss: 0.8441
Epoch [140/500] Loss: 1.8677
Epoch [150/500] Loss: 0.0279
Epoch [160/500] Loss: 2.1514
Epoch [170/500] Loss: 0.7830
Epoch [180/500] Loss: 0.0278
Epoch [190/500] Loss: 0.8292
Epoch [200/500] Loss: 0.1652
Epoch [210/500] Loss: 0.7431
Epoch [220/500] Loss: 0.0810
Epoch [230/500] Loss: 0.4506
Epoch [240/500] Loss: 0.0820
Epoch [250/500] Loss: 0.4758
Epoch [260/500] Loss: 0.0018
Epoch [270/500] Loss: 0.1659
Epoch [280/500] Loss: 1.1016
Epoch [290/500] Loss: 1.6601
Epoch [300/500] Loss: 0.0156
Epoch [310/500] Loss: 0.0038
Epoch [320/500] Loss: 0.0099
Epoch [330/500] Loss: 1.7033
Epoch [340/500] Loss: 0.8691
Epoch [350/500] Loss: 0

In [18]:
torch.save(cnn.state_dict(), 'model-lr.pt')

In [19]:
import timeit
start_time = timeit.default_timer()
cnn = CNN(n_classes)
cnn.load_state_dict(torch.load('model-lr.pt'))
cnn.eval()  # Change model to 'eval' mode (BN uses moving mean/var).
correct = 0
total = 0
for images, labels in test_loader:
    images = Variable(images)
    outputs = cnn(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()

elapsed = timeit.default_timer() - start_time
print('Test Accuracy of the model on the test images: %d %%' % (100 * correct / total))
print('Time for running: %d' % (elapsed))


Test Accuracy of the model on the test images: 0 %
Time for running: 55


In [None]:
loss_file = 'cnn-vol-2-loss.txt' 
def read_from_loss(file):
    with open (file, "r") as f:
        content = f.readlines()
        content = [x.split(' ') for x in content]
        content = [[x[i].split('\n')[0] for i in range(len(x)) if i%2!=0] for x in content]
        content = [[x[0]+x[1] , x[2]] for x in content]
    data = []
    for item in content:
        tmp = item[0].split(',')
        num = ""
        for t in tmp:
            num += (t.split('/')[0].split('[')[1]) + '.'
        iteration = float(num[:-1])
        data.append([iteration, float(item[1])])
    data = np.array(data)
    plt.plot(data[: , 0], data[: , 1])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.show()
read_from_loss(loss_file)

# FUTURE TRAINING PROCESS

In [None]:
batch_size = 100
mnist = Mnist(batch_size)

n_epochs = 30


for epoch in range(n_epochs):
    capsule_net.train()
    train_loss = 0
    for batch_id, (data, target) in enumerate(mnist.train_loader):

        target = torch.sparse.torch.eye(10).index_select(dim=0, index=target)
        data, target = Variable(data), Variable(target)

        if USE_CUDA:
            data, target = data.cuda(), target.cuda()

        optimizer.zero_grad()
        output, reconstructions, masked = capsule_net(data)
        loss = capsule_net.loss(data, output, target, reconstructions)
        loss.backward()
        optimizer.step()

        train_loss += loss.data[0]
        
        if batch_id % 100 == 0:
            print "train accuracy:", sum(np.argmax(masked.data.cpu().numpy(), 1) == 
                                   np.argmax(target.data.cpu().numpy(), 1)) / float(batch_size)
        
    print train_loss / len(mnist.train_loader)
        
    capsule_net.eval()
    test_loss = 0
    for batch_id, (data, target) in enumerate(mnist.test_loader):

        target = torch.sparse.torch.eye(10).index_select(dim=0, index=target)
        data, target = Variable(data), Variable(target)

        if USE_CUDA:
            data, target = data.cuda(), target.cuda()

        output, reconstructions, masked = capsule_net(data)
        loss = capsule_net.loss(data, output, target, reconstructions)

        test_loss += loss.data[0]
        
        if batch_id % 100 == 0:
            print "test accuracy:", sum(np.argmax(masked.data.cpu().numpy(), 1) == 
                                   np.argmax(target.data.cpu().numpy(), 1)) / float(batch_size)
    
    print test_loss / len(mnist.test_loader)

# Sample testing of the image

In [None]:
import selectivesearch
from skimage.transform import resize
from PIL import Image
from torchvision import transforms 

In [None]:
TEST_IMAGES_FOR_SEGMENTATION = '../data/fl27/segmentation/original'
SEGMENTED_RESIZED_IMAGES = '../data/fl27/segmentation/segmented'

In [None]:
test_images = glob.glob(os.path.join(TEST_IMAGES_FOR_SEGMENTATION,'*.jpg'))

In [None]:
for img in test_images:
    im = io.imread(img)
    img_lbl,regions = selectivesearch.selective_search(im, scale=500, sigma=0.9, min_size=1000)

In [None]:
filtered_regions = []
for item in regions:
    tmp = item['rect']
    if tmp not in filtered_regions:
        filtered_regions.append(tmp)

In [None]:
print(filtered_regions)

In [None]:
for item in filtered_regions:
    rect = item
    x1 = rect[0]
    y1 = rect[1]
    x2 = rect[2]
    y2 = rect[3]
    
    if x1>x2:
        tmp = x1
        x1 = x2
        x2 = tmp
        
    if y1>y2:
        tmp = y1
        y1 = y2
        y2 = tmp
    
    new_img = im[x1:x2, y1:y2]
    resized = resize(new_img,(224,224))
    new_f_name = os.path.join(SEGMENTED_RESIZED_IMAGES,uuid.uuid4().hex+'.jpg')
    
    io.imsave(new_f_name, resized)

In [None]:
def to_var(x, volatile=False):
    if torch.cuda.is_available():
        x = x.cuda()
    return Variable(x, volatile=volatile)

def load_image(image, transform=None):
    
    if transform is not None:
        image = transform(image).unsqueeze(0)
    
    return image

In [None]:
transform = transforms.Compose([
    transforms.ToTensor(), 
    transforms.Normalize((0.485, 0.456, 0.406), 
                         (0.229, 0.224, 0.225))])

In [None]:
test_cnn = CNN(n_classes)
test_cnn.eval()
test_cnn.load_state_dict(torch.load('cnn-vol-2.pt'))

In [None]:
test_cnn.cuda()

In [None]:
test_image = io.imread(os.path.join(SEGMENTED_RESIZED_IMAGES,'991d371972114c9da5552238e61fa50f.jpg'))
image = load_image(test_image, transform)
image_tensor = to_var(image, volatile=True)

In [None]:
output = test_cnn(image_tensor)
_, predicted = torch.max(output.data, 1)
print(LABELS[int(predicted)])