In [1]:
import cv2
import os
import numpy as np
from tqdm.notebook import tqdm
import torch

torch.manual_seed(1337)
np.random.seed(1337)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("Using device {}".format(device))
print(torch.version.cuda)

Using device cuda
10.2


# Images / Data

## Standardize

* Change the size of each image to the median values of the entire image set (460 x 310)

In [2]:
IMAGES_FILEPATH = 'data/food'
PROCESSED_IMAGES_FILEPATH = 'data/food_processed'
IMAGE_DIMS = (460, 310)

def listdir_jpg(path):
    """os.listdir but only for jpg"""
    for f in os.listdir(path):
        _, extension = os.path.splitext(f)
        if extension == ".jpg":
            yield f

number_of_images = len(list(listdir_jpg(IMAGES_FILEPATH)))

print("Total number of images: {}".format(number_of_images))

Total number of images: 10000


In [3]:
def load_images():
    """Load and return standard size images."""
    should_preprocess = not (os.path.exists(PROCESSED_IMAGES_FILEPATH) \
        and len(list(listdir_jpg(PROCESSED_IMAGES_FILEPATH))) == number_of_images)
    
    image_dir = IMAGES_FILEPATH if should_preprocess else PROCESSED_IMAGES_FILEPATH
    images_gen = sorted(listdir_jpg(image_dir))
    tqdm_desc = "Loading {} images".format("raw" if should_preprocess else "processed")
    images = [cv2.imread(os.path.join(image_dir, img)) for img in tqdm(images_gen, desc=tqdm_desc)]
    
    
    if should_preprocess:
        if not os.path.exists(PROCESSED_IMAGES_FILEPATH):
            os.makedirs(PROCESSED_IMAGES_FILEPATH)
        pbar = tqdm(total=len(images), desc="Processing image size", position=0)
        for i, image in enumerate(images):
            # Preprocess
            image = cv2.resize(image, dsize=IMAGE_DIMS, interpolation=cv2.INTER_LANCZOS4)
            images[i] = image
            # Save for later
            cv2.imwrite(os.path.join(PROCESSED_IMAGES_FILEPATH, "{:05d}.jpg".format(i)), image)
            pbar.update()
    return images

images = load_images()

HBox(children=(FloatProgress(value=0.0, description='Loading processed images', max=10000.0, style=ProgressSty…




## Normalize images to [0, 1]

In [4]:
for i, image in tqdm(enumerate(images), desc="Normalizing images", total=len(images)):
    images[i] = cv2.normalize(image, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)

HBox(children=(FloatProgress(value=0.0, description='Normalizing images', max=10000.0, style=ProgressStyle(des…




## Dataset

In [24]:
from torch.utils.data import Dataset, DataLoader

class TripletImageDataset(Dataset):
    
    def __init__(self, triplet_labels_fp):
        """Create triplets from triplet_labels_fp"""
        triplets = []
        for line in open(triplet_labels_fp, 'r'):
            anchor, positive, negative = tuple(map(int, line.split()))
            triplets.append((anchor, positive, negative))
        
        self.triplets = triplets
        
    # Required for Map-style dataset
    def __getitem__(self, index):
        i_1, i_2, i_3 = self.triplets[index]
        return images[i_1], images[i_2], images[i_3]

    def __len__(self):
        return len(self.triplets)
    
    def _img_to_tensor(self, img):
        return img.float().permute(0, 3, 1, 2).to(device)
    
    def generate_batches(self, batch_size, shuffle=True, drop_last=True):
        dataloader = DataLoader(dataset=self, batch_size=batch_size, 
                                shuffle=shuffle, drop_last=drop_last)

        for img1, img2, img3 in dataloader:
            yield self._img_to_tensor(img1), self._img_to_tensor(img2), self._img_to_tensor(img3)

In [6]:
train_set = TripletImageDataset('data/train_triplets.txt')

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




# Model

In [7]:
import torch.nn as nn

class NeuralNet(nn.Module):
    
    EMBEDDING_DIM=50
    
    def __init__(self):
        super(NeuralNet, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 32, 7),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #nn.Dropout(0.3),
            nn.Conv2d(32, 64, 5),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #nn.Dropout(0.3),
            nn.Conv2d(64, 128, 3),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #nn.Dropout(0.3),
            nn.Conv2d(128, 256, 1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #nn.Dropout(0.3),
            nn.Conv2d(256, 28, 1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #nn.Dropout(0.3),
        )
        
        self.fc = nn.Sequential(
            nn.Linear(3276, 512),
            nn.PReLU(),
            nn.Linear(512, NeuralNet.EMBEDDING_DIM)
        )
        
        self.apply(self.init_weights)
        
    def forward(self, x):
        x = self.conv(x)
        x = x.reshape(x.size(0), -1)
        x = self.fc(x)
        return x
    
    def init_weights(self, m):
        if isinstance(m, nn.Conv2d):
            torch.nn.init.kaiming_normal_(m.weight)

## Training

In [59]:
class Training:
    
    def start(self, model, dataset, batch_size, epochs=15, model_name='model.pth'):
        nr_batches = len(dataset) // batch_size
        progress_bar = tqdm(desc='', total=epochs * nr_batches,
            leave=False
        )
        # Start the training
        model.train()
        for epoch in range(epochs):
            running_loss = []
            running_pred = []
            batches = dataset.generate_batches(batch_size)
            for batch_index, (anchor_img, positive_img, negative_img) in enumerate(batches):
                progress_bar.set_description_str("E {} | B {}".format(
                    epoch+1, batch_index
                ))
                
                optimizer.zero_grad()
                anchor_out = model(anchor_img)
                positive_out = model(positive_img)
                negative_out = model(negative_img)

                loss = criterion(anchor_out, positive_out, negative_out)
                loss.backward()
                optimizer.step()
                
                loss_t = loss.cpu().detach().numpy()
                running_loss.append(loss_t)
                
                predicted_labels = closest_image(anchor_out, positive_out, negative_out)
                running_pred.append(predicted_labels.cpu().numpy())
                
                progress_bar.set_postfix_str("Loss={0:.3f}".format(np.mean(running_loss)))
                progress_bar.update()
                
            # Save the model
            model_fp = 'trained_models/'
            if not os.path.exists(model_fp):
                os.makedirs(model_fp)
            torch.save(model.state_dict(), os.path.join(model_fp, model_name))
            # Compute accuracy
            predictions = np.array(running_pred).flatten()
            acc = accuracy(predictions, np.ones_like(predictions))
            # Update progress bar
            progress_bar.write(
                "[Epoch {}/{}]: Loss {}, Acc {}".format(
                    epoch+1, epochs,  np.mean(running_loss), acc
                )
            )
            progress_bar.refresh()

In [10]:
model = NeuralNet()
model.zero_grad()
model = model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.TripletMarginLoss(margin=0.5)

batch_size = 16

Training().start(model, train_set, batch_size)

HBox(children=(FloatProgress(value=0.0, max=55785.0), HTML(value='')))

[Epoch 1/15]: Loss 0.4536386728286743
[Epoch 2/15]: Loss 0.4665737748146057
[Epoch 3/15]: Loss 0.45581337809562683
[Epoch 4/15]: Loss 0.4243156313896179
[Epoch 5/15]: Loss 0.41445910930633545
[Epoch 6/15]: Loss 0.4018169641494751
[Epoch 7/15]: Loss 0.3813132643699646
[Epoch 8/15]: Loss 0.3544401228427887
[Epoch 9/15]: Loss 0.32084590196609497
[Epoch 10/15]: Loss 0.28474193811416626
[Epoch 11/15]: Loss 0.2514423131942749
[Epoch 12/15]: Loss 0.219896599650383
[Epoch 13/15]: Loss 0.19316422939300537
[Epoch 14/15]: Loss 0.17123612761497498
[Epoch 15/15]: Loss 0.1528848111629486


# Predictions

In [47]:
test_set = TripletImageDataset('data/test_triplets.txt')

def closest_image(anchor, positive, negative):
    """
    Returns: 
        int: 1 if the anchor is more similar to the positive image. 
                0 if anchor is more similar to the negative image.
                
    Note:
        Works for batches. 
        Each input tensor should have this shape: `N x W x H x C` 
        where `N` is the number of samples.
    """
    pairwise_dist = nn.PairwiseDistance(p=2)
    distance_pos = pairwise_dist(anchor, positive)
    distance_neg = pairwise_dist(anchor, negative)
    return (~(distance_pos >= distance_neg).to(torch.bool)).to(torch.int32)

def predict(predict_set):
    """Predict 0/1 for the given predict_set (Dataset)"""
    predict_set_choices = []
    # List of tuples which the model chose as most similar
    model.eval()
    with torch.no_grad():
        for anchor_img, positive_img, negative_img in tqdm(predict_set.generate_batches(batch_size, shuffle=False, drop_last=False), total=len(predict_set) // batch_size):
            # Get embeddings from our model
            anchor_emb = model(anchor_img)
            positive_emb = model(positive_img)
            negative_emb = model(negative_img)

            # Compute distances and the corresponding labels
            labels = closest_image(anchor_emb, positive_emb, negative_emb)

            predict_set_choices.append(labels.cpu().numpy())
    result = np.array(predict_set_choices).flatten()
    return np.concatenate(result).ravel()

In [61]:
train_set_predictions = predict(train_set)

HBox(children=(FloatProgress(value=0.0, max=3719.0), HTML(value='')))




In [62]:
def accuracy(pred, gold):
    return np.mean((pred == gold))
    
print("Train set accuracy: {}".format(accuracy(train_set_predictions, np.ones_like(train_set_predictions))))

Train set accuracy: 0.9456607577921532


In [63]:
last_test_set_predictions = test_set_predictions
test_set_predictions = predict(test_set)

HBox(children=(FloatProgress(value=0.0, max=3721.0), HTML(value='')))




In [64]:
print(len(test_set_predictions), test_set_predictions.shape)
print(type(test_set_predictions[0]))
np.savetxt('data/submission.txt', test_set_predictions, fmt="%d")

59544 (59544,)
<class 'numpy.int32'>


## Visualizing results

In [None]:
import matplotlib.pyplot as plt
from matplotlib import gridspec

print(test_set_choices)
def show_predicted_images():
    fig = plt.figure(figsize=(20, 40))
    n_images_to_show = 5
    images = next(test_set.generate_batches(n_images_to_show, shuffle=False))
    gs = gridspec.GridSpec(n_images_to_show, 3)
    for i in range(n_images_to_show):
        for j in range(len(images)):
            ax = fig.add_subplot(gs[i, j])
            if (j == 2 - test_set_choices[i]):
                for spine in ax.spines.values():
                    spine.set_edgecolor('green')
                    spine.set_linewidth('7')
            else:
                for spine in ax.spines.values():
                    spine.set_edgecolor('green')
                    spine.set_linewidth('0')
            ax.imshow(images[j][i].cpu().permute(1, 2, 0))
            
            ax.tick_params(axis='both', which='both', bottom=False, top=False, labelbottom=False, left=False, labelleft=False)
    plt.show()
    
show_predicted_images()

# Model loading

In [60]:
model.load_state_dict(torch.load('trained_models/model.pth'))
model = model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.TripletMarginLoss(margin=0.5)

batch_size = 16

Training().start(model, train_set, batch_size, epochs=9, model_name='retrained_model.pth')

HBox(children=(FloatProgress(value=0.0, max=33471.0), HTML(value='')))

[Epoch 1/9]: Loss 0.1367347687482834, Acc 0.8920744823877387
[Epoch 2/9]: Loss 0.12115588784217834, Acc 0.9058046517881151
[Epoch 3/9]: Loss 0.10844552516937256, Acc 0.9166106480236623
[Epoch 4/9]: Loss 0.09669335931539536, Acc 0.9263746974993278
[Epoch 5/9]: Loss 0.08912786096334457, Acc 0.9318029040064534
[Epoch 6/9]: Loss 0.08189882338047028, Acc 0.9374663888141974


KeyboardInterrupt: 