In [1]:
import numpy as np
import torch

from torch import nn
from torch.utils.data import TensorDataset, DataLoader, RandomSampler

from os import listdir
from PIL import Image

In [2]:
IMAGE_PATH = '../data/images'
LABEL_PATH = '../data/annotations'

In [3]:
def get_class_map():
    ret = {}

    i = 0
    for fname in listdir(LABEL_PATH):
        img_class, _ = fname.split('.')
        ret[img_class] = i
        i += 1

    return ret

In [4]:
def get_dataloader(bs=64):
    data = []
    X = []
    y = []

    # mapping from class names to integers
    class_map = get_class_map()

    # loop through all the annotations
    for fname in listdir(LABEL_PATH):
        img_class, _ = fname.split('.')
    
        # open the annotation
        with open(f'{LABEL_PATH}/{fname}', 'r') as fh:

            # get image ids from annotation file
            img_ids = fh.read().splitlines()

            # gather the images with labels
            for img_id in img_ids:
                img_path = f'{IMAGE_PATH}/im{img_id}.jpg'
                img = Image.open(img_path)
                img_data = np.asarray(img)
                
                # skip black-and-white images
                if not len(img_data.shape) == 3:
                    continue

                img_data = img_data.flatten().astype(np.float32)

                data.append([img_data, class_map[img_class]])

    return DataLoader(data, batch_size=bs, shuffle=True)

In [5]:
class TwoLayerModel(nn.Module):
    def __init__(self, n_input, n_hidden1, n_hidden2, n_classes):
        super().__init__()

        self.input_layer = nn.Linear(n_input, n_hidden1)
        self.hidden1 = nn.Linear(n_hidden1, n_hidden2)
        self.hidden2 = nn.Linear(n_hidden2, n_classes)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.input_layer(x)
        x = self.relu(x)
        x = self.hidden1(x)
        x = self.relu(x)
        x = self.hidden2(x)
        x = self.softmax(x)

        return x

In [6]:
def train(dataloader, model, optimizer, criterion, device, n_epochs=50, losses=[]):

    model.train()

    for epoch in range(n_epochs):
        
        for i, batch in enumerate(dataloader):
            X, y = batch
            X = X.to(device)
            y = y.to(device)

            optimizer.zero_grad()
            y_pred = model(X)
            loss = criterion(y_pred, y)
            loss.backward()
            optimizer.step
            
            losses.append(loss)

        print(f'Epoch: {epoch}, loss: {loss}')

In [14]:
use_cuda = True

device = torch.device('cuda') if use_cuda else torch.device('cpu')

lr = 0.05
n_epochs = 5
bs = 256

n_classes = len(get_class_map().keys())

model = TwoLayerModel(128*128*3, 1024, 512, n_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

In [10]:
dataloader = get_dataloader(bs)

In [15]:
train(dataloader, model, optimizer, criterion, device, n_epochs)

Epoch: 0, loss: 2.6201508045196533
Epoch: 1, loss: 2.5878045558929443
Epoch: 2, loss: 2.6083316802978516
Epoch: 3, loss: 2.6293861865997314
Epoch: 4, loss: 2.6258599758148193
Epoch: 5, loss: 2.6057941913604736
Epoch: 6, loss: 2.658468008041382
Epoch: 7, loss: 2.6089224815368652
Epoch: 8, loss: 2.6375796794891357
Epoch: 9, loss: 2.593463659286499
