In [3]:
#Note: this code is written as an assignment in a Helsinki Uni course on Deep Learning and is heavily influenced by
#starter code provided by the lecturers Hande Celikkanat and Roman Yangarber

import os
import pandas as pd
import numpy as np


In [4]:
annotations = os.listdir('./annotations')
#print([x.split('.')[0] for x in annotations])
images = os.listdir('./images')
print(len(images))
df = pd.DataFrame(0, index=np.arange(1,len(images)+1), columns=[x.split('.')[0] for x in annotations])
print(df.shape)
for tag in annotations:
    with open(f'./annotations/{tag}') as f:
        lines = f.readlines()
        for line in lines:
           imgNumber = line.split("\n")[0]
           df[tag.split('.')[0]][int(imgNumber)] = 1

20000
(20000, 14)


In [5]:
import torch
import torch.optim as optim
import torch.utils.data
import torch.backends.cudnn as cudnn
import torchvision
from torchvision import transforms, datasets
import torch.nn as nn
import torch.nn.functional as F

In [6]:
from PIL import Image
from torchvision.io import read_image
from torch.utils.data import Dataset

In [7]:
DATA_DIR = 'images'

In [8]:
train_transform = transforms.Compose([transforms.Grayscale()])
test_transform = transforms.Compose([transforms.Grayscale()])

In [90]:
#this code is an edited version of the code found at https://pytorch.org/tutorials/beginner/basics/data_tutorial.html

class CustomImageDataset(Dataset):
    def __init__(self, annotations_df, img_dir, transform=None, target_transform=None):
        self.img_labels = annotations_df
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, "im"+str(idx+1)+".jpg")
        image = Image.open(img_path)
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Grayscale()
            #transforms.RandomResizedCrop(256)
        ])
        img_tensor = transform(image)
        label = torch.from_numpy(np.array(self.img_labels.iloc[idx])).float()#NOTE! This should be fixed (the float issue)
        if self.transform:
            img_tensor = self.transform(img_tensor)
        if self.target_transform:
            label = self.target_transform(label)
        return img_tensor, label


In [91]:
customImageDataset = CustomImageDataset(df, DATA_DIR)

In [92]:
train_loader = torch.utils.data.DataLoader(dataset=customImageDataset, batch_size=50, shuffle=True)

In [93]:
NUM_CLASSES=14

In [102]:
class CNN(nn.Module):
    def __init__(self, num_classes=NUM_CLASSES):
        super(CNN, self).__init__()
        self.dropout = nn.Dropout(0.05)
        self.features = nn.Sequential(
          nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1),
          nn.ReLU(inplace=True),
          nn.MaxPool2d(kernel_size=2, stride=2),
          nn.BatchNorm2d(16),
          nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
          nn.ReLU(inplace=True),
          nn.MaxPool2d(kernel_size=2, stride=2),
          nn.BatchNorm2d(32)
        )

        self.classify = nn.Sequential(
          nn.Linear(32 * 32 * 32, 60),
          nn.Linear(60, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        #print(x.shape)
        x = x.view(-1, 32 * 32 * 32)
        x = self.classify(x)
        return x

In [103]:
model = CNN(NUM_CLASSES)
print(model)

CNN(
  (dropout): Dropout(p=0.05, inplace=False)
  (features): Sequential(
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (4): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (classify): Sequential(
    (0): Linear(in_features=32768, out_features=60, bias=True)
    (1): Linear(in_features=60, out_features=14, bias=True)
  )
)


In [104]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

In [105]:
model = CNN().to(device)

In [106]:
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.01)

In [None]:
for epoch in range(5):
    train_loss = 0
    train_correct = 0
    total = 0
    for batch_num, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        result = model.forward(data)
        probs = torch.softmax(result, dim=1)
        #print(probs)
        #winners = probs.argmax(dim=1)
        #print(winners)
        optimizer.zero_grad()
        loss = loss_function(result, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        print(train_loss / (batch_num+1))
        print('Training: Epoch %d - Batch %d/%d: Loss: %.4f ' % 
              (epoch, batch_num, len(train_loader), train_loss / (batch_num + 1)))
        #train_correct += (winners == target).sum().item()
        #total = total + BATCH_SIZE_TRAIN
        #print('Training: Epoch %d - Batch %d/%d: Loss: %.4f | Train Acc: %.3f%% (%d/%d)' % 
        #      (epoch, batch_num, len(train_loader), train_loss / (batch_num + 1), 
        #       100. * train_correct / total, train_correct, total))