#Food Similarity

###Author: Armando

In [1]:
import os
import random
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow.keras import  activations, datasets, layers, losses, metrics, models, optimizers, regularizers
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.layers import Input
from tensorflow.keras.preprocessing.image import img_to_array, load_img

Parts of the code are based on the Keras example of siamese networks, although not the model itself. 

The source can be found here: https://keras.io/examples/vision/siamese_network/

In [2]:
seed = 13
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)

The notebook was run in Google Colab

In [3]:
# Load dataset
img_path = "/content/drive/MyDrive/foodsimilarity/food/food/"
train_triplets_path = "/content/drive/MyDrive/foodsimilarity/train_triplets.txt"
test_triplets_path = "/content/drive/MyDrive/foodsimilarity/test_triplets.txt"

Input shape depends on the selected pre-trained model we use.

In [4]:
input_shape = (299,299,3)

To get some features embeddings for the images, a pretrained ResNet is used, in this case ResNetV2. First, features are computed for all 10000 images.

In [5]:
def pretrained_feature(input_shape):
    resnet = tf.keras.applications.InceptionResNetV2(pooling='avg',include_top=False)
    # the basic features embeddings are computed by a ResNet
    resnet.trainable = False

    # input
    x = x_in = Input(shape=input_shape)
    x = resnet(x)

    model = Model(inputs=x_in, outputs=x)
    return model

Load all unzipped images from a folder and pass them to the ResNetV2 to get features empeddings

In [6]:
def load_images(path, batch_size=1):
    # image indices
    idx_images = 10000
    idx = 0

    while True:
        batch = []
    # we load all images and get the embeddings from a pretrained NN
        while len(batch) < batch_size:
            img_name= path + '{0:05}'.format(idx) + ".jpg"
            img = load_img(img_name)
            img = tf.keras.applications.inception_resnet_v2.preprocess_input(img_to_array(img))
            batch.append(img)
            idx = (idx + 1) % idx_images

        batch = np.array(batch)
        labels = np.zeros(batch_size)

        try:
            yield batch, labels
        except StopIteration:
            return

Wrapper function to load all images from food folder and get the embeddings.

In [7]:
def feature_extraction():
    feature_extraction = pretrained_feature(input_shape)
    images = load_images(img_path, 1)
    feature = feature_extraction.predict(images, steps=10000)
    return feature

Return the training dataset and labels or just the test dataset without labels. Important is that all triplets are concatenated, so only one single neural network must be trained and not a Siamese network.

In [8]:
def get_triplets(features, triplets_file, labels_est=False):
    train_tensors = []
    labels = []

    # read triplets
    trips = pd.read_csv(triplets_file, delim_whitespace=True, header=None, names=["A", "P", "N"])

    for i in range(len(trips)):
        triplet = trips.iloc[i]
        A, P, N = triplet['A'], triplet['P'], triplet['N']
        # compute features per image
        tensor_a = features[A]
        tensor_p = features[P]
        tensor_n = features[N]
        # concatenate features of all images into one numpy array
        triplet_tensor = np.concatenate((tensor_a, tensor_p, tensor_n), axis=-1)
        if (labels_est):
            # this is just for the training dataset
            reverse_triplet_tensor = np.concatenate((tensor_a, tensor_n, tensor_p), axis=-1)
            train_tensors.append(triplet_tensor)
            labels.append(1)
            train_tensors.append(reverse_triplet_tensor)
            labels.append(0)
        else:
            train_tensors.append(triplet_tensor)

    train_tensors = np.array(train_tensors)
    if (labels_est):
        labels = np.array(labels)
        return train_tensors, labels
    else:
        return train_tensors

Load pre-computed features or compute features if not available yet.

In [9]:
if os.path.exists("/content/drive/MyDrive/foodsimilarity/features.txt"):
  features = np.loadtxt("/content/drive/MyDrive/foodsimilarity/features.txt", delimiter=",")
else:
  features = feature_extraction()
  np.savetxt("/content/drive/MyDrive/foodsimilarity/features.txt", features, delimiter=",", fmt='%1.10e')

Preprocess the train and test tensors as well as the labels.

In [10]:
train_tensors, labels = get_triplets(features, train_triplets_path, labels_est=True)
test_tensors = get_triplets(features, test_triplets_path, labels_est=False)

In [11]:
(test_tensors.shape, train_tensors.shape)

((59544, 4608), (119030, 4608))

Define a simple model that can be trained to learn the embeddings of an anchor image, a positive and a negative image. Here no pre-trained model is necessary.

In [12]:
model = models.Sequential([
            layers.InputLayer(train_tensors.shape[1:]),
            layers.Dropout(0.7, name='dropout_0'),
            layers.Dense(1152, activation=activations.relu, name='dense_1'),
            layers.Dense(288, activation=activations.relu, name='dense_2'),
            layers.Dense(72, activation=activations.relu, name='dense_3'),
            layers.Dense(18, activation=activations.relu, name='dense_4'),
            layers.Dense(1, activation=activations.sigmoid, name='output')   
        ])
model.compile(optimizer=optimizers.Adam(),
                   loss=losses.binary_crossentropy,
                   metrics=['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dropout_0 (Dropout)          (None, 4608)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 1152)              5309568   
_________________________________________________________________
dense_2 (Dense)              (None, 288)               332064    
_________________________________________________________________
dense_3 (Dense)              (None, 72)                20808     
_________________________________________________________________
dense_4 (Dense)              (None, 18)                1314      
_________________________________________________________________
output (Dense)               (None, 1)                 19        
Total params: 5,663,773
Trainable params: 5,663,773
Non-trainable params: 0
______________________________________________

In [13]:
model.fit(x=train_tensors, y = labels, epochs=7)

Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


<keras.callbacks.History at 0x7fe75be04f10>

In [14]:
y_test = model.predict(test_tensors)

We return classifications so the predictions have to be in $\{0,1\}$

In [15]:
y_test_thresh = np.where(y_test < 0.5, 0, 1)
np.savetxt('/content/drive/MyDrive/foodsimilarity/result.txt', y_test_thresh, fmt='%d')

For this particular dataset, a Siamese network could not outperform a simple neural network with concatenated features. By swapping the order of the second and third columns in the training set to create negative examples, the network can be trained with a larger sample. 