In [1]:
import random, math
import numpy as np
import torch

from torch import nn
from torch.utils.data import TensorDataset, DataLoader, RandomSampler

import torchvision.transforms as transforms
from torchvision.utils import save_image
from torchvision.datasets import ImageFolder

import sys, shutil
from os import listdir, mkdir
from PIL import Image
from collections import Counter

from IPython.display import display, clear_output

In [2]:
ORIG_IMAGE_PATH = '../data/images'
ORIG_LABEL_PATH = '../data/annotations'
LABEL_PATH = '../data/annotations2'
IMAGE_PATH = '../data/images2'
RANDOM_SEED = 1234

In [3]:
def get_class_map():
    ret = {}

    i = 0
    for fname in listdir(ORIG_LABEL_PATH):
        img_class, _ = fname.split('.')
        ret[img_class] = i
        i += 1

    return ret

In [4]:
def train_valid_test_split(train_fr=.7, valid_fr=.3, test_fr=0):
    """
    Creates subfolders train, test, validation. 
    Splits annotation files into train, validation and test sets.
    """
    random.seed(RANDOM_SEED)
    
    try:
        mkdir(f'{LABEL_PATH}')
        mkdir(f'{LABEL_PATH}/train')
        mkdir(f'{LABEL_PATH}/test')
        mkdir(f'{LABEL_PATH}/validation')
    except:
        pass
      
    # Make the split
    for fname in listdir(ORIG_LABEL_PATH):
        with open(f'{ORIG_LABEL_PATH}/{fname}', 'r') as fh:
            img_ids = fh.read().splitlines()
            random.shuffle(img_ids)
            split1, split2 = math.ceil(test_fr * len(img_ids)), math.ceil(valid_fr * len(img_ids))
            test_ids, valid_ids, train_ids = img_ids[:split1], img_ids[split1:split2], img_ids[split2:]
            with open(f'{LABEL_PATH}/test/{fname}', 'w') as outfile:
                outfile.write('\n'.join(test_ids))
            with open(f'{LABEL_PATH}/validation/{fname}', 'w') as outfile:
                outfile.write('\n'.join(valid_ids))
            with open(f'{LABEL_PATH}/train/{fname}', 'w') as outfile:
                outfile.writelines('\n'.join(train_ids))
            print(f'Wrote {len(train_ids)}, {len(valid_ids)}, {len(test_ids)} lines (of {len(img_ids)}) in {fname} (train, validation, test)')

# Need to run only once. 
if False:
    train_valid_test_split()

In [5]:
def copy_images_to_subfolders():
    """
    Copy images to subfolders by label
    """
    log = []
    # Loop through each set
    for setname in listdir(LABEL_PATH):
        display(f'Processing set: {setname}')
        try:
            mkdir(f'{IMAGE_PATH}/{setname}')
        except:
            pass
        # Loop through each label
        for fname in listdir(f'{LABEL_PATH}/{setname}'):
            with open(f'{LABEL_PATH}/{setname}/{fname}', 'r') as fh:
                img_class, _ = fname.split('.')
                clear_output(wait=True)
                display(f'Processing class: {img_class}')
                try:
                    mkdir(f'{IMAGE_PATH}/{setname}/{img_class}')
                except:
                    pass
                with open(f'{LABEL_PATH}/{setname}/{fname}', 'r') as fh:
                    destdir = f'{IMAGE_PATH}/{setname}/{img_class}'
                    # Loop through each image
                    for idx in fh.read().splitlines():
                        shutil.copy(f'{ORIG_IMAGE_PATH}/im{idx}.jpg', destdir)
                    log.append(f'Copied {len(listdir(destdir))} files in {destdir}')
        
    print('\n'.join(log))
    print("\nDone")


# No need to run multiple times
if False:
    try:
        mkdir(f'{IMAGE_PATH}')
    except:
        pass
    copy_images_to_subfolders()

In [6]:
# We probably won't need this

def get_dataset(set_path, max_items, image_path=IMAGE_PATH):
    data = []

    # mapping from class names to integers
    class_map = get_class_map()

    # loop through all the annotations
    for fname in listdir(set_path):

        img_class, _ = fname.split('.')
        #print(f'Reading class: {img_class}')
        clear_output(wait=True)
        display(f'Reading set: {set_path}, class: {img_class}')

        # open the annotation
        with open(f'{set_path}/{fname}', 'r') as fh:

            # get image ids from annotation file
            img_ids = fh.read().splitlines()

            # gather the images with labels
            i = 0
            for img_id in img_ids:
                img_path = f'{image_path}/im{img_id}.jpg'
                #print(img_path)
                img = Image.open(img_path)
                img_data = np.asarray(img)

                # skip black-and-white images
                if not len(img_data.shape) == 3:
                    continue

                img_data = img_data.flatten().astype(np.float32)

                data.append([img_data, class_map[img_class]])

                if i > max_items: break
                i += 1
                
    return data

In [7]:
def compute_batch_statistics(bs, max_items, root_path):
    """
    We need to run this only once with given dataset
    From https://forums.fast.ai/t/image-normalization-in-pytorch/7534/7?u=laochanlam
    """
    
    print(f'\nProcessing folder: {root_path}')
    
    transform = transforms.Compose([
        transforms.ToTensor(),
    ])

    dataset = ImageFolder(root=root_path, transform=transform)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=bs, shuffle=False, num_workers=4)

    pop_mean = []
    pop_std = []
    for i, data in enumerate(dataloader):
        numpy_image = data[0].numpy().reshape(len(data[0]), 3, 128, 128)
        pop_mean.append(np.mean(numpy_image, axis=(0,2,3)))
        pop_std.append(np.std(numpy_image, axis=(0,2,3), ddof=1)) # ddof=1  => Sample standard deviation

    mean, std = np.array(pop_mean).mean(axis=0), np.array(pop_std).mean(axis=0)
    
    print(f'Batch mean {mean}, std {std}')
    return mean, std

# Default values (calculated from train and validation data)
MEAN_tr = (0.43805328, 0.40177783, 0.36756444)
STD_tr = (0.3032908, 0.28911096, 0.2931179)
MEAN_val = (0.4391351, 0.3997592, 0.36547714)
STD_val = (0.30170158, 0.2876057, 0.29113483)

# No need to run multiple times
if False:
    MEAN_tr, STD_tr = compute_batch_statistics(bs=64, max_items=256, root_path=f'{IMAGE_PATH}/train/')
    MEAN_val, STD_val = compute_batch_statistics(bs=64, max_items=256, root_path=f'{IMAGE_PATH}/validation/')
    

In [8]:
class TwoLayerModel(nn.Module):
    def __init__(self, n_input, n_hidden1, n_hidden2, n_classes):
        super().__init__()

        self.input_layer = nn.Linear(n_input, n_hidden1)
        self.hidden1 = nn.Linear(n_hidden1, n_hidden2)
        self.hidden2 = nn.Linear(n_hidden2, n_classes)
        self.relu = nn.ReLU()
        self.bn0 = nn.BatchNorm1d(n_input)
        self.bn1 = nn.BatchNorm1d(n_hidden1)
        self.bn2 = nn.BatchNorm1d(n_hidden2)

    def forward(self, x):
        x = self.bn0(x)
        x = self.input_layer(x)
        x = self.relu(x)
        x = self.bn1(x)
        x = self.hidden1(x)
        x = self.relu(x)
        x = self.bn2(x)
        x = self.hidden2(x)

        return x

In [9]:
class OneLayerModel(nn.Module):
    def __init__(self, n_input, n_hidden, n_classes):
        super().__init__()
        
        self.bn0 = nn.BatchNorm1d(n_input)
        self.input_layer = nn.Linear(n_input, n_hidden)
        self.relu = nn.ReLU()
        self.bn1 = nn.BatchNorm1d(n_hidden)
        self.hidden = nn.Linear(n_hidden, n_classes)

    def forward(self, x):
        x = self.bn0(x)
        x = self.input_layer(x)
        x = self.relu(x)
        x = self.bn1(x)
        x = self.hidden(x)

        return x

In [10]:
def get_dataloader(root_path, bs, mean, std):
    
    transform = transforms.Compose([
        #transforms.ToPILImage(),
        #transforms.Grayscale(num_output_channels=1),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ])
    
    #train_data = get_dataset(f'{LABEL_PATH}/train', max_items)
    dataset = ImageFolder(root=root_path, transform=transform)
    dataloader = DataLoader(dataset, batch_size=bs, shuffle=True)
    
    return dataloader

In [11]:
def train(dataloader, model, optimizer, criterion, device, n_epochs=50, losses=[]):

    log = []
    model.train()

    for epoch in range(n_epochs):
        
        for i, batch in enumerate(dataloader):
            X, y = batch
            X = X.reshape(64, 128*128*3)
            
            X = X.to(device)
            y = y.to(device)

            optimizer.zero_grad()
            y_pred = model(X)
            loss = criterion(y_pred, y)
            loss.backward()
            optimizer.step()
            
            losses.append(loss)
            
            clear_output(wait=True)
            display(f'Epoch: {epoch+1}, iteration: {i+1}, loss: {loss}')

        log.append(f'Epoch: {epoch+1}, loss: {loss}')
    
    print('\n'.join(log))
    print('\nDone training')

In [12]:
use_cuda = False

device = torch.device('cuda') if use_cuda else torch.device('cpu')

lr = 0.01
n_epochs = 2 #10
bs = 64 #256

n_classes = len(get_class_map().keys())

#model = TwoLayerModel(128*128*3, 128, 64, n_classes).to(device)
model = OneLayerModel(128*128*3, 128, n_classes).to(device)   # (self, D_in, H, D_out):

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

In [13]:
train_dataloader = get_dataloader(f'{IMAGE_PATH}/train/', bs, MEAN_tr, STD_tr)

In [14]:
train(train_dataloader, model, optimizer, criterion, device, n_epochs)

'Epoch: 1, iteration: 221, loss: 2.3439509868621826'

RuntimeError: shape '[64, 49152]' is invalid for input of size 344064