In [13]:
import numpy as np
import torch

from torch import nn
from torch.utils.data import TensorDataset, DataLoader, RandomSampler

import torchvision
import matplotlib.pyplot as plt

from os import listdir
from PIL import Image

In [80]:
IMAGE_PATH = '../data/images'
NUMBER_OF_IMAGES = 20000
LABEL_PATH = '../data/annotations'
NUMBER_OF_CLASSES = 14

In [81]:
def get_class_map():
    classnametoint = {}
    inttoclassname = {}

    i = 0
    label_files = sorted(listdir(LABEL_PATH))
    for fname in label_files:
        img_class, _ = fname.split('.')
        classnametoint[img_class] = i
        inttoclassname[i] = img_class
        i += 1

    return classnametoint, inttoclassname

In [82]:
# Parameters:
#   bs: batch size (default 64)
#   samples_to_print: number of samples to print when getting dataloader (default 0)

def get_dataloader(bs=64, samples_to_print=0):
    data = []
    # 2d array where each row is an image and each column is a boolean indicating if label is active. 
    # Row number is image number. Row 0 will not be used since there is no image number 0.
    class_array = np.zeros((NUMBER_OF_IMAGES + 1, NUMBER_OF_CLASSES), dtype=bool)
                
    # mapping from class names to integers
    class_map, class_int_to_string = get_class_map()

    # loop through all the annotations
    label_files = sorted(listdir(LABEL_PATH))
    for fname in label_files:
        img_class, _ = fname.split('.')
        class_int = class_map[img_class]
        
        # open the annotation
        with open(f'{LABEL_PATH}/{fname}', 'r') as fh:

            # get image ids from annotation file
            img_ids = fh.read().splitlines()
            
            # For the image, set the bool corresponding to the class to True
            for im_id in img_ids:
                class_array[int(im_id)][class_int] = True
    
    # Now we have a complete array of image labels in class_array
    print(class_array[0:20,])
    # Let's iterate through the images and attach the labels vector to each image
    
    nblackwhite = 0
    skippedids = []
    for img_id in range(1, NUMBER_OF_IMAGES + 1):
        img_path = f'{IMAGE_PATH}/im{img_id}.jpg'
        img = Image.open(img_path)
        img_data = np.asarray(img)

        # skip black-and-white images
        if not len(img_data.shape) == 3:
            nblackwhite += 1
            skippedids.append(img_id)
            continue

        img_data = img_data.flatten().astype(np.float32)

        data.append([img_data, class_array[img_id]])

    print(f'Skipped {nblackwhite} images that were black and white.')
    
    # Print some samples of the data items if samples_to_print is set
    if (samples_to_print > 0):
        print(f'\nFirst {samples_to_print} data items and their labels:')
        for i in range(samples_to_print + 1):
            print(f'\nImage number {i+1}')
            print(data[i])
            print('   Labels in text:')
            print('      ', end = ' ')
            for lab in range(NUMBER_OF_CLASSES):
                if(data[i][1][lab]):
                    print(class_int_to_string[lab], end = ' ')
            print('')
        
    return DataLoader(data, batch_size=bs, shuffle=True)

In [7]:
class TwoLayerModel(nn.Module):
    def __init__(self, n_input, n_hidden1, n_hidden2, n_classes):
        super().__init__()

        self.input_layer = nn.Linear(n_input, n_hidden1)
        self.hidden1 = nn.Linear(n_hidden1, n_hidden2)
        self.hidden2 = nn.Linear(n_hidden2, n_classes)
        self.relu = nn.ReLU()
        #self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.input_layer(x)
        x = self.relu(x)
        x = self.hidden1(x)
        x = self.relu(x)
        x = self.hidden2(x)
        #x = self.softmax(x)

        return x

In [8]:
def train(dataloader, model, optimizer, criterion, device, n_epochs=50, losses=[]):

    model.train()

    for epoch in range(n_epochs):
        
        for i, batch in enumerate(dataloader):
            X, y = batch
            X = X.to(device)
            y = y.to(device)

            optimizer.zero_grad()
            y_pred = model(X)
            loss = criterion(y_pred, y)
            loss.backward()
            optimizer.step
            
            losses.append(loss)

        print(f'Epoch: {epoch}, loss: {loss}')

In [9]:
# NOT WORKING YET
# Visualize some samples 
# Taken from transfer learning tutorial https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html
def imshow(inp, title=None):
    """Imshow for Tensor."""
    #inp = inp.numpy().transpose((1, 2, 0))
    #mean = np.array([0.485, 0.456, 0.406])
    #std = np.array([0.229, 0.224, 0.225])
    #inp = std * inp + mean
    #inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated

ntoshow = 1
dataloaderforvisu = get_dataloader(ntoshow)
#class_map_forvisu = get_class_map()
_, class_names = get_class_map()
    
# Get a batch of training data
inputs, classes = next(iter(dataloaderforvisu))

inputs = inputs.reshape((ntoshow, 128, 128, 3))
print("Inputs shape")
print(inputs.shape)
print('Inputs:')
print(inputs)

print("Array data type")
print(inputs.dtype)

print('Classes:')
print(classes)
print("Classes shape")
print(classes.shape)


# Make a grid from batch
out = torchvision.utils.make_grid(inputs)


imshow(out)
# TODO: show multiple labels per image
#imshow(out, title=[class_names[x] for x in classes])

NameError: name 'collections' is not defined

In [11]:
use_cuda = True

device = torch.device('cuda') if use_cuda else torch.device('cpu')

lr = 0.05
n_epochs = 5
bs = 256
class_map, _ = get_class_map()
n_classes = len(class_map.keys())

model = TwoLayerModel(128*128*3, 1024, 512, n_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

In [83]:
dataloader = get_dataloader(bs, samples_to_print=30)

[[False False False False False False False False False False False False
  False False]
 [False False False False False  True False False False  True  True False
  False False]
 [False False False False False False False False False False False False
  False False]
 [False False False False False False False False False False False False
  False False]
 [False False False False False False False  True False  True False False
  False False]
 [False False False False False False False  True False  True False False
  False False]
 [False False False False False False False False False False False False
  False False]
 [False False False False False  True False False False  True False False
  False False]
 [False False False False False False False False False False False False
  False False]
 [False False False False False False False False False False False False
  False False]
 [False False False False False False False False False False False False
  False False]
 [False False False F

       False, False, False, False, False])]
   Labels in text:
       

Image number 29
[array([136., 160., 196., ...,  27.,  39.,  29.], dtype=float32), array([False, False, False,  True, False, False, False, False, False,
       False, False, False, False, False])]
   Labels in text:
       clouds 

Image number 30
[array([49.,  0.,  2., ..., 51.,  1.,  2.], dtype=float32), array([False, False, False, False, False,  True, False, False, False,
        True, False, False, False, False])]
   Labels in text:
       female people 

Image number 31
[array([ 0.,  0.,  0., ..., 14., 10.,  7.], dtype=float32), array([False, False, False,  True, False, False, False, False, False,
       False, False, False, False, False])]
   Labels in text:
       clouds 


In [None]:
train(dataloader, model, optimizer, criterion, device, n_epochs)