In [1]:
import numpy as np
import torch
import torchvision

from torch import nn
from torch.utils.data import TensorDataset, DataLoader

from sklearn.model_selection import train_test_split

from os import listdir
from PIL import Image
from collections import defaultdict

# Data loading

In [2]:
IMAGE_PATH = '../data/images'
LABEL_PATH = '../data/annotations'

In [3]:
def number_of_classes():
    return len(listdir(LABEL_PATH))

In [4]:
def get_class_map():
    ret = {}

    i = 0
    for fname in listdir(LABEL_PATH):
        img_class, _ = fname.split('.')
        ret[img_class] = i
        i += 1

    return ret

In [5]:
def get_dataloader(bs=256, train_fr=.6, max_images_per_class=1e9):
    # mapping from class names to integers
    class_map = get_class_map()

    # create a dictionary to hold our label vectors
    n_classes = len(class_map.keys())
    img_to_class = defaultdict(lambda: np.zeros(n_classes))

    # another dictionary to hold the actual image data
    img_to_data = dict()
    
    # loop through all the annotations
    for fname in listdir(LABEL_PATH):
        img_class, _ = fname.split('.')
        print(f'Reading class: {img_class}')
        
        # open the annotation file
        with open(f'{LABEL_PATH}/{fname}', 'r') as fh:

            # get image ids from annotation file
            img_ids = fh.read().splitlines()

            # gather the images with labels
            i = 0
            for img_id in img_ids:
                img_path = f'{IMAGE_PATH}/im{img_id}.jpg'
                img = Image.open(img_path)

                # some images are black-and-white so convert to rgb
                img_rgb = img.convert('RGB')
                img_data = np.asarray(img_rgb)
                
                img_data = img_data.flatten()
                img_to_data[img_id] = img_data

                # get one-hot encoded vector of image classes
                img_classes = img_to_class[img_id]

                # add new class to image vector
                img_class_id = class_map[img_class]
                img_classes[img_class_id] = 1

                # store the updated vector back
                img_to_class[img_id] = img_classes

                if i >= max_images_per_class:
                    break

                i += 1

    # collect data to a single array
    X = []
    y = []
    for img_id in img_to_class.keys():
        X.append(img_to_data[img_id])
        y.append(img_to_class[img_id])
        
    X_train, X_tmp, y_train, y_tmp = train_test_split(X, y, train_size=train_fr, random_state=42)
    X_test, X_valid, y_test, y_valid = train_test_split(X_tmp, y_tmp, train_size=.5, test_size=.5, random_state=42)
    
    train_dataloader = DataLoader(TensorDataset(
        torch.tensor(X_train, dtype=torch.float),
        torch.tensor(y_train, dtype=torch.float)),
        batch_size=bs)           

    valid_dataloader = DataLoader(TensorDataset(
        torch.tensor(X_valid, dtype=torch.float),
        torch.tensor(y_valid, dtype=torch.float)),
        batch_size=bs)

    test_dataloader = DataLoader(TensorDataset(
        torch.tensor(X_test, dtype=torch.float),
        torch.tensor(y_test, dtype=torch.float)),
        batch_size=bs)      

    return train_dataloader, valid_dataloader, test_dataloader

# Models

In [6]:
class TwoLayerModel(nn.Module):
    def __init__(self, n_input, n_hidden1, n_hidden2, n_classes):
        super().__init__()
        self.bs = bs
        self.input_layer = nn.Linear(n_input, n_hidden1)
        self.hidden1 = nn.Linear(n_hidden1, n_hidden2)
        self.hidden2 = nn.Linear(n_hidden2, n_classes)
        self.relu = nn.ReLU()
        self.bn0 = nn.BatchNorm1d(n_input)
        self.bn1 = nn.BatchNorm1d(n_hidden1)
        self.bn2 = nn.BatchNorm1d(n_hidden2)

    def forward(self, x):
        x = self.bn0(x)
        x = self.input_layer(x)
        x = self.relu(x)
        x = self.bn1(x)
        x = self.hidden1(x)
        x = self.relu(x)
        x = self.bn2(x)
        x = self.hidden2(x)

        return x

In [7]:
class OneLayerModel(nn.Module):
    def __init__(self, n_input, n_hidden, n_classes):
        super().__init__()

        self.input_layer = nn.Linear(n_input, n_hidden)
        self.hidden = nn.Linear(n_hidden, n_classes)
        self.relu = nn.ReLU()
        self.bn0 = nn.BatchNorm1d(n_input)
        self.bn1 = nn.BatchNorm1d(n_hidden)

    def forward(self, x):
        x = self.bn0(x)
        x = self.input_layer(x)
        x = self.relu(x)
        x = self.bn1(x)
        x = self.hidden(x)

        return x

In [8]:
def train(dataloader, model, optimizer, criterion, device, n_epochs=50, losses=[]):

    model.train()

    for epoch in range(n_epochs):
        
        for i, batch in enumerate(dataloader):
            X, y = batch
            
            X = X.to(device)
            y = y.to(device)

            optimizer.zero_grad()
            y_pred = model(X)
            loss = criterion(y_pred, y)
            loss.backward()
            optimizer.step()
            
            losses.append(loss)

        print(f'Epoch: {epoch}, loss: {loss}')

# Training

In [9]:
use_cuda = True

device = torch.device('cuda') if use_cuda else torch.device('cpu')

lr = 0.1
n_epochs = 10
bs = 256

n_classes = len(get_class_map().keys())

model = TwoLayerModel(128*128*3, 128, 64, n_classes).to(device)
#model = OneLayerModel(128*128*3, 128, n_classes).to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

## Create and save dataloaders

In [10]:
if False:
    train_dataloader, valid_dataloader, test_dataloader = get_dataloader(bs=bs)
    torch.save(train_dataloader, '../data/train_dataloader.dat')
    torch.save(valid_dataloader, '../data/valid_dataloader.dat')
    torch.save(test_dataloader, '../data/test_dataloader.dat')

## Load dataloaders from disk

In [11]:
train_dataloader = torch.load('../data/train_dataloader.dat')
valid_dataloader = torch.load('../data/valid_dataloader.dat')
test_dataloader = torch.load('../data/test_dataloader.dat')

## Do the actual training

In [12]:
train(train_dataloader, model, optimizer, criterion, device, n_epochs=10)

Epoch: 0, loss: 0.659910261631012
Epoch: 1, loss: 0.6206590533256531
Epoch: 2, loss: 0.5837750434875488
Epoch: 3, loss: 0.5470588207244873
Epoch: 4, loss: 0.5079249739646912
Epoch: 5, loss: 0.4676307737827301
Epoch: 6, loss: 0.4319877326488495
Epoch: 7, loss: 0.39483389258384705
Epoch: 8, loss: 0.3577764332294464
Epoch: 9, loss: 0.34109529852867126
