In [1]:
# ! pip install pandas
# ! pip install ftfy regex tqdm
# ! pip install git+https://github.com/openai/CLIP.git
# ! pip install transformers

In [2]:
import pandas as pd
import os

In [3]:
data_dir = "data/"
images_dir = "Food Images/Food Images"
csv_file = "Food Ingredients and Recipe Dataset with Image Name Mapping.csv"
csv_file = "export.csv"

### Extract from CSV images paths

In [5]:
# df = pd.read_csv(os.path.join(data_dir, csv_file))
df = pd.read_csv(csv_file)
df = df[:10] # pas besoin de tout prendre => test avec 10 rows

In [6]:
df.columns

Index(['Title', 'Ingredients', 'Summary', 'Image_Name', 'Cleaned_Ingredients'], dtype='object')

In [7]:
NAMES = df[df['Image_Name']=='#NAME?']
len(NAMES)

0

In [8]:
# REMOVE NAME
df_cleared = df[df['Image_Name']!='#NAME?']
len(df), len(df_cleared)

(10, 10)

In [9]:
csv_names = list(df_cleared["Image_Name"])

### Extract from DIR images paths

In [10]:
images = os.listdir(os.path.join(data_dir, images_dir))
len(images)

13582

### Get the missing

In [11]:
missing = []
images_paths = []
for csv_name in csv_names:
    csv_name += ".jpg"
    if csv_name in images:
        images_paths.append(csv_name)
    else:
        missing.append(csv_name)

In [12]:
len(missing), len(images_paths)

(0, 10)

**In the missing array we have 111 elements, 30 of which correspond to `#NAME?` (starts with `-`) in the csv file and the rest are actually missing**

#### TEST CLIP

In [13]:
import torch
import clip
from PIL import Image
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [14]:
models = clip.available_models()
models

['RN50',
 'RN101',
 'RN50x4',
 'RN50x16',
 'RN50x64',
 'ViT-B/32',
 'ViT-B/16',
 'ViT-L/14',
 'ViT-L/14@336px']

In [15]:
model_name = 'RN50x4'
model, preprocess = clip.load(model_name)
input_resolution = model.visual.input_resolution
context_length = model.context_length
vocab_size = model.vocab_size

print("Model parameters:", f"{np.sum([int(np.prod(p.shape)) for p in model.parameters()]):,}")
print("Input resolution:", input_resolution)
print("Context length:", context_length)
print("Vocab size:", vocab_size)

Model parameters: 178,300,601
Input resolution: 288
Context length: 77
Vocab size: 49408


#### TEXT to IMAGE

In [45]:
idx = 1 # take one sample at idx=1
desc_test, label_test = df.loc[idx, "Summary"], df.loc[idx, "Image_Name"]
len(desc_test.split()), label_test

(34, 'crispy-salt-and-pepper-potatoes-dan-kluger')

In [46]:
# find its index in list of images_dir
for i, temp in enumerate(images):
    if label_test+".jpg" == temp:
        break

In [47]:
test_images = images[i-1:i+2] # take one before and one after
test_images

['crispy-salt-and-pepper-chicken-with-caramelized-fennel-and-shallots-51230420.jpg',
 'crispy-salt-and-pepper-potatoes-dan-kluger.jpg',
 'crispy-salt-and-vinegar-potatoes-51234270.jpg']

In [48]:
device = "cuda" if torch.cuda.is_available() else "cpu"
scores = {}
images_inputs = []

# Preprocess the 3 images 
for im in test_images:
    im_path = os.path.join(data_dir, images_dir, im)
    images_inputs.append(preprocess(Image.open(im_path)))
images_inputs_tensor = torch.tensor(np.stack(images_inputs)).to(device)

In [49]:
# Preprocess the text
text_tokens = clip.tokenize([desc_test]).cuda()

with torch.no_grad():
    image_features = model.encode_image(images_inputs_tensor).float()
    text_features = model.encode_text(text_tokens).float()

In [50]:
text_tokens.shape, image_features.shape, text_features.shape

(torch.Size([1, 77]), torch.Size([3, 640]), torch.Size([1, 640]))

In [97]:
# Get similarity matrix
image_features /= image_features.norm(dim=-1, keepdim=True)
text_features /= text_features.norm(dim=-1, keepdim=True)
similarity = (100.0 * image_features @ text_features.T).float().softmax(dim=0)
values, indices = similarity.sort(dim=0, descending=True)
# values, indices = similarity[0].topk(1)

# Print the result
print("\nTop predictions:\n")
for value, index in zip(values, indices):
    print(f"{test_images[index]:>16s}: {100 * value.item():.2f}%")


Top predictions:

crispy-salt-and-pepper-chicken-with-caramelized-fennel-and-shallots-51230420.jpg: 98.22%


## Instructions to move data and symbolic link

- scp local_dir ribal.teeny@machine:~remote_dir

in host:
- mv remote_dir /Data
- ln -s /Data/mydata project_dir/mydata