In [1]:
import cv2
import os
import numpy as np
from tqdm.notebook import tqdm
import torch

torch.manual_seed(1337)
np.random.seed(1337)
torch.backends.cudnn.benchmark = True

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("Using device {}".format(device))
print(torch.version.cuda)

Using device cuda
10.2


# Images / Data

* Change the size of each image to the median values of the entire image set (460 x 310)
* Denoise with `cv2.fastNlMeansDenoisingColored`

In [2]:
IMAGES_FILEPATH = 'data/food'
PROCESSED_IMAGES_FILEPATH = 'data/food_processed'
IMAGE_DIMS = (460, 310)

def listdir_jpg(path):
    """os.listdir but only for jpg"""
    for f in os.listdir(path):
        _, extension = os.path.splitext(f)
        if extension == ".jpg":
            yield f

number_of_images = len(list(listdir_jpg(IMAGES_FILEPATH)))

print("Total number of images: {}".format(number_of_images))

Total number of images: 10000


In [None]:
def load_images():
    """Load and return standard size images."""
    should_preprocess = not (os.path.exists(PROCESSED_IMAGES_FILEPATH) \
        and len(list(listdir_jpg(PROCESSED_IMAGES_FILEPATH))) == number_of_images)
    
    image_dir = IMAGES_FILEPATH if should_preprocess else PROCESSED_IMAGES_FILEPATH
    images_gen = sorted(listdir_jpg(image_dir))
    tqdm_desc = "Loading {} images".format("raw" if should_preprocess else "processed")
    images = [cv2.imread(os.path.join(image_dir, img)) for img in tqdm(images_gen, desc=tqdm_desc)]
    print(images[0].shape)
    
    if should_preprocess:
        if not os.path.exists(PROCESSED_IMAGES_FILEPATH):
            os.makedirs(PROCESSED_IMAGES_FILEPATH)
        pbar = tqdm(total=len(images), desc="Processing image size", position=0)
        for i, image in enumerate(images):
            # Preprocess
            # Standardize the dimensions of the image
            image = cv2.resize(image, dsize=IMAGE_DIMS, interpolation=cv2.INTER_LANCZOS4)
            # Denoise
            #image = cv2.fastNlMeansDenoisingColored(image, None, 8, 8, 7, 21)
            images[i] = image
            # Save for later
            cv2.imwrite(os.path.join(PROCESSED_IMAGES_FILEPATH, "{:05d}.jpg".format(i)), image)
            pbar.update()
    return images

images = load_images()

HBox(children=(FloatProgress(value=0.0, description='Loading processed images', max=10000.0, style=ProgressSty…

### Denoising examples

In [None]:
import matplotlib.pyplot as plt

for image in images[100:105]:
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    dst = cv2.fastNlMeansDenoisingColored(image, None, 4, 4, 7, 21)
    f = plt.figure()
    f.set_figheight(15)
    f.set_figwidth(15)
    f.add_subplot(1,2, 1)
    plt.imshow(image)
    f.add_subplot(1,2, 2)
    plt.imshow(dst)
    plt.show(block=True)
    
images[0].shape

## Compute the mean from images

Subtracted in `TripletImageDataset.__getitem__`

In [None]:
#images_mean = np.mean(images, axis=(0, 1, 2), keepdims=True)
#print("Mean:", images_mean.shape, " | Shape:", images_mean)

## Dataset

In [None]:
USE_VALIDATION = True
VALIDATION_FRACTION = 0.04
TRAIN_LABEL_TRIPLETS_FRACTION = 1

In [None]:
def load_triplet_labels(triplet_labels_fp):
    label_triplets = []

    for line in open(triplet_labels_fp, 'r'):
        anchor, positive, negative = tuple(map(int, line.split()))
        label_triplets.append((anchor, positive, negative))
        
    return label_triplets
        
train_label_triplets = load_triplet_labels('data/train_triplets.txt')
test_label_triplets = load_triplet_labels('data/test_triplets.txt')

In [None]:
train_label_triplets_size = len(train_label_triplets) * TRAIN_LABEL_TRIPLETS_FRACTION

if USE_VALIDATION:
    validation_triplets = train_label_triplets[int(-VALIDATION_FRACTION*train_label_triplets_size):]
    train_triplets = train_label_triplets[:int((1-VALIDATION_FRACTION)*train_label_triplets_size)]
else:
    validation_triplets = None
    train_triplets = train_label_triplets

In [None]:
from torch.utils.data import Dataset, DataLoader
from PIL import Image

class TripletImageDataset(Dataset):
    
    def __init__(self, triplets, transform=None):
        """Create triplets from triplet_labels_fp"""
        self.triplets = triplets
        self.transform = transform
        
    # Required for Map-style dataset
    def __getitem__(self, index):
        i_1, i_2, i_3 = self.triplets[index]
        img_1 =  Image.fromarray(cv2.cvtColor(images[i_1], cv2.COLOR_BGR2RGB))
        img_2 = Image.fromarray(cv2.cvtColor(images[i_2], cv2.COLOR_BGR2RGB))
        img_3 = Image.fromarray(cv2.cvtColor(images[i_3], cv2.COLOR_BGR2RGB))
        return self.transform(img_1), self.transform(img_2), self.transform(img_3)
        #return (images[i_1] - images_mean)[0], (images[i_2] - images_mean)[0], (images[i_3] - images_mean)[0]

    def __len__(self):
        return len(self.triplets)
    
    def _img_to_tensor(self, img):
        return img.to(device)
    
    def generate_batches(self, batch_size, shuffle=True, drop_last=True):
        dataloader = DataLoader(dataset=self, batch_size=batch_size, 
                                shuffle=shuffle, drop_last=drop_last)

        for img1, img2, img3 in dataloader:
            yield self._img_to_tensor(img1), self._img_to_tensor(img2), self._img_to_tensor(img3)

In [None]:
from torchvision import transforms

input_size = 224

train_set_transform = transforms.Compose([
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

train_set = TripletImageDataset(train_triplets, train_set_transform)

In [None]:
val_set_transform = transforms.Compose([
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

# Model

In [None]:
import torch.nn as nn

class NeuralNet(nn.Module):
    
    EMBEDDING_DIM=64
    
    def __init__(self):
        super(NeuralNet, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 32, 7),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #nn.Dropout(0.3),
            nn.Conv2d(32, 64, 5),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #nn.Dropout(0.3),
            nn.Conv2d(64, 128, 3),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #nn.Dropout(0.25),
            nn.Conv2d(128, 256, 1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #nn.Dropout(0.2),
            nn.Conv2d(256, 28, 1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #nn.Dropout(0.2),
        )
        
        self.fc = nn.Sequential(
            nn.Linear(3276, 512),
            nn.PReLU(),
            nn.Linear(512, NeuralNet.EMBEDDING_DIM)
        )
        
        self.apply(self.init_weights)
        
    def forward(self, x):
        x = self.conv(x)
        x = x.reshape(x.size(0), -1)
        x = self.fc(x)
        return x
    
    def init_weights(self, m):
        if isinstance(m, nn.Conv2d):
            torch.nn.init.kaiming_normal_(m.weight)

In [None]:
def closest_image(anchor, positive, negative):
    """
    Returns: 
        int: 1 if the anchor is more similar to the positive image. 
                0 if anchor is more similar to the negative image.
                
    Note:
        Works for batches. 
        Each input tensor should have this shape: `N x W x H x C` 
        where `N` is the number of samples.
    """
    pairwise_dist = nn.PairwiseDistance(p=2)
    distance_pos = pairwise_dist(anchor, positive)
    distance_neg = pairwise_dist(anchor, negative)
    return (~(distance_pos >= distance_neg).to(torch.bool)).to(torch.int32)

In [None]:
def accuracy(pred, gold):
    return np.mean((pred == gold))

## Training

In [None]:
# https://github.com/pytorch/examples/blob/1de2ff9338bacaaffa123d03ce53d7522d5dcc2e/imagenet/main.py#L287
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)

In [None]:
def predict(model, predict_set):
    """Predict 0/1 for the given predict_set (Dataset)"""
    predict_set_choices = []
    # List of tuples which the model chose as most similar
    model.eval()
    with torch.no_grad():
        for anchor_img, positive_img, negative_img in tqdm(predict_set.generate_batches(batch_size, shuffle=False, drop_last=False), leave=True, total=len(predict_set) // batch_size):
            # Get embeddings from our model
            anchor_emb = model(anchor_img)
            positive_emb = model(positive_img)
            negative_emb = model(negative_img)

            # Compute distances and the corresponding labels
            labels = closest_image(anchor_emb, positive_emb, negative_emb)

            predict_set_choices.append(labels.cpu().numpy())
    result = np.array(predict_set_choices).flatten()
    return np.concatenate(result).ravel()

In [None]:
class Training:
    
    def start(self, model, optimizer, dataset, criterion, scheduler, batch_size, epochs=15, model_name='model.pth'):
        nr_batches = len(dataset) // batch_size
        progress_bar = tqdm(desc='', total=epochs * nr_batches,
            leave=False
        )
        
        # Start the training
        for epoch in range(epochs):
            model.train()
            losses = AverageMeter('Loss', ':.3f')
            running_pred = []
            batches = dataset.generate_batches(batch_size)
            for batch_index, (anchor_img, positive_img, negative_img) in enumerate(batches):
                progress_bar.set_description_str("E {} | B {}".format(
                    epoch+1, batch_index
                ))
                
                optimizer.zero_grad()
                anchor_out = model(anchor_img)
                positive_out = model(positive_img)
                negative_out = model(negative_img)

                loss = criterion(anchor_out, positive_out, negative_out)

                losses.update(loss.item(), anchor_img.size(0))
                loss.backward()
                optimizer.step()
                
                predicted_labels = closest_image(anchor_out, positive_out, negative_out).cpu().numpy()
                acc_t = accuracy(predicted_labels, np.ones_like(predicted_labels))
                running_pred.append(predicted_labels)
                
                progress_bar.set_postfix_str("{} | BatchAcc={:.3f}".format(losses, acc_t))
                progress_bar.update()
                
            # Save the model
            model_fp = 'trained_models/'
            if not os.path.exists(model_fp):
                os.makedirs(model_fp)
            torch.save(model.state_dict(), os.path.join(model_fp, model_name))
            
            if validation_triplets is not None:
                validation_set = TripletImageDataset(validation_triplets, val_set_transform)
                validation_set_predictions = predict(model, validation_set)
                validation_acc = accuracy(validation_set_predictions, np.ones_like(validation_set_predictions))
            
            # Compute accuracy
            predictions = np.array(running_pred).flatten()
            acc = accuracy(predictions, np.ones_like(predictions))
            
            # Change LR for next epoch
            if scheduler is not None:
                scheduler.step()
            
            # Update progress bar
            progress_bar.write(
                "[Epoch {}/{}]: {}, Val Accuracy {}, Train Accuracy {}".format(
                    epoch+1, epochs, losses, validation_acc, acc
                )
            )
            progress_bar.refresh()

In [None]:
model = NeuralNet()
model.zero_grad()
model = model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[1], gamma=0.1)
criterion = nn.TripletMarginLoss(margin=0.2)

batch_size = 16

Training().start(model, optimizer, train_set, criterion, scheduler, batch_size)

# Predictions

In [None]:
train_set_predictions = predict(model, train_set)

In [None]:
print("Train set accuracy: {}".format(accuracy(train_set_predictions, np.ones_like(train_set_predictions))))

In [None]:
test_set = TripletImageDataset(test_label_triplets)
test_set_predictions = predict(model, test_set)

In [None]:
print(len(test_set_predictions), test_set_predictions.shape)
print(type(test_set_predictions[0]))
np.savetxt('data/submission.txt', test_set_predictions, fmt="%d")

## Visualizing results

In [None]:
import matplotlib.pyplot as plt
from matplotlib import gridspec

def show_predicted_images():
    fig = plt.figure(figsize=(20, 40))
    n_images_to_show = 5
    images = next(test_set.generate_batches(n_images_to_show, shuffle=False))
    gs = gridspec.GridSpec(n_images_to_show, 3)
    for i in range(n_images_to_show):
        for j in range(len(images)):
            ax = fig.add_subplot(gs[i, j])
            if (j == 2 - test_set_predictions[i]):
                for spine in ax.spines.values():
                    spine.set_edgecolor('green')
                    spine.set_linewidth('7')
            else:
                for spine in ax.spines.values():
                    spine.set_linewidth('0')
            ax.imshow(images[j][i].cpu().permute(1, 2, 0) + images_mean[0])
            
            ax.tick_params(axis='both', which='both', bottom=False, top=False, labelbottom=False, left=False, labelleft=False)
    plt.show()
    
show_predicted_images()

# Model loading

In [None]:
model.load_state_dict(torch.load('trained_models/model.pth'))
model = model.to(device)

Training().start(model, train_set, batch_size, epochs=10, model_name='model_retrained_denoised.pth')

# Pre-trained Models

In [43]:
import torchvision
from torchvision import models

def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
    # Initialize these variables which will be set in this if statement. Each of these
    #   variables is model specific.
    model_ft = None
    input_size = 0

    if model_name == "resnet":
        """ Resnet18
        """
        model_ft = models.resnet18(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "alexnet":
        """ Alexnet
        """
        model_ft = models.alexnet(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "vgg":
        """ VGG11_bn
        """
        model_ft = models.vgg11_bn(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "squeezenet":
        """ Squeezenet
        """
        model_ft = models.squeezenet1_0(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        model_ft.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1,1), stride=(1,1))
        model_ft.num_classes = num_classes
        input_size = 224

    elif model_name == "densenet":
        """ Densenet
        """
        model_ft = models.densenet121(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier.in_features
        model_ft.classifier = nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "inception":
        """ Inception v3
        Be careful, expects (299,299) sized images and has auxiliary output
        """
        model_ft = models.inception_v3(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        # Handle the auxilary net
        num_ftrs = model_ft.AuxLogits.fc.in_features
        model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
        # Handle the primary net
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs,num_classes)
        input_size = 299

    else:
        print("Invalid model name, exiting...")
        exit()

    return model_ft, input_size

feature_extract = True

# Initialize the model for this run
model_pretrained, input_size = initialize_model('squeezenet', 128, feature_extract, use_pretrained=True)

# Print the model we just instantiated
print(model_pretrained)

SqueezeNet(
  (features): Sequential(
    (0): Conv2d(3, 96, kernel_size=(7, 7), stride=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    (3): Fire(
      (squeeze): Conv2d(96, 16, kernel_size=(1, 1), stride=(1, 1))
      (squeeze_activation): ReLU(inplace=True)
      (expand1x1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
      (expand1x1_activation): ReLU(inplace=True)
      (expand3x3): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (expand3x3_activation): ReLU(inplace=True)
    )
    (4): Fire(
      (squeeze): Conv2d(128, 16, kernel_size=(1, 1), stride=(1, 1))
      (squeeze_activation): ReLU(inplace=True)
      (expand1x1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
      (expand1x1_activation): ReLU(inplace=True)
      (expand3x3): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (expand3x3_activation): ReLU(inplace=True)
    )
    (5): Fire(
   

In [44]:
# Send the model to GPU
model_pretrained = model_pretrained.to(device)

# Gather the parameters to be optimized/updated in this run. If we are
#  finetuning we will be updating all parameters. However, if we are
#  doing feature extract method, we will only update the parameters
#  that we have just initialized, i.e. the parameters with requires_grad
#  is True.
params_to_update = model_pretrained.parameters()
print("Params to learn:")

if feature_extract:
    params_to_update = []
    for name,param in model_pretrained.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            print("\t",name)
else:
    for name,param in model_pretrained.named_parameters():
        if param.requires_grad == True:
            print("\t",name)

Params to learn:
	 classifier.1.weight
	 classifier.1.bias


In [47]:
criterion_pretrained = nn.TripletMarginLoss(margin=0.2)
optimizer_pretrained = torch.optim.Adam(params_to_update, lr=1e-5)
#scheduler_pretrained = None
#scheduler_pretrained = torch.optim.lr_scheduler.MultiStepLR(optimizer_pretrained, milestones=[1], gamma=0.1)
scheduler_pretrained = torch.optim.lr_scheduler.StepLR(optimizer_pretrained, 2, gamma=0.1)


batch_size = 32

Training().start(model_pretrained, optimizer_pretrained, train_set, criterion_pretrained, scheduler_pretrained, batch_size, model_name='squeezenet_model.pth')

HBox(children=(FloatProgress(value=0.0, max=525.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


[Epoch 1/15]: Loss 1.151 (1.544), Val Accuracy 0.6382978723404256, Train Accuracy 0.5696428571428571


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


[Epoch 2/15]: Loss 1.615 (1.564), Val Accuracy 0.6595744680851063, Train Accuracy 0.5526785714285715


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


[Epoch 3/15]: Loss 2.546 (1.672), Val Accuracy 0.6595744680851063, Train Accuracy 0.5330357142857143


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


[Epoch 4/15]: Loss 1.703 (1.595), Val Accuracy 0.6595744680851063, Train Accuracy 0.5267857142857143


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


[Epoch 5/15]: Loss 1.210 (1.481), Val Accuracy 0.6595744680851063, Train Accuracy 0.5616071428571429


KeyboardInterrupt: 