In [2]:
import torch 
import torchvision.transforms as T
from transformers import AutoFeatureExtractor, AutoModel
from PIL import Image
import numpy as np
import pandas as pd
import os
import faiss


model_ckpt = "google/vit-base-patch16-224-in21k"
model = AutoModel.from_pretrained(model_ckpt)
hidden_dim = model.config.hidden_size

#prepare the image
transformation_chain = T.Compose(
    [
        # resize and tensorize
        T.Resize((224,224)),
        T.ToTensor()
    ]
)

In [3]:
def extract_embeddings(model: torch.nn.Module, images):
    device = model.device


    # `transformation_chain` is a compostion of preprocessing
    # transformations we apply to the input images to prepare them
    # for the model. For more details, check out the accompanying Colab Notebook.
    image_batch_transformed = torch.stack(
        [transformation_chain(image) for image in images]
    )
    new_batch = image_batch_transformed.to(device)
    with torch.no_grad():
        embeddings = model(new_batch).last_hidden_state[:, 0].cpu()
    return embeddings


In [5]:
feine_filenames = os.listdir("feine_data/")
training_data = [Image.open("feine_data/" + n) for n in feine_filenames]


In [6]:
mdev = model.to("cuda")
intermediate_embeddings = extract_embeddings(mdev, training_data)
embeddings = np.array([np.array(i) for i in list(intermediate_embeddings)])

In [23]:
index = faiss.IndexFlatL2(768)
faiss.normalize_L2(embeddings)
index.add(embeddings)

In [29]:
def identify_drink(image):
    embeddings1 = extract_embeddings(mdev, [image])
    _vector = np.array(embeddings1)
    faiss.normalize_L2(_vector)
    distances, ann = index.search(_vector, k=1)
    return ann[0]

In [36]:
testdata_list = os.listdir("feine_testing_data/")
testing_data = [Image.open("feine_testing_data/" + n).convert("RGB") for n in testdata_list]
for i in range(len(testing_data)):
    drinkid = identify_drink(testing_data[i])
    print("Actual drink: " + testdata_list[i])
    print("Identified: " + feine_filenames[drinkid[0]])
    print()
    


Actual drink: Monster_retailimage.png
Identified: monster-absolutely-zero-energy-drink.jpg

Actual drink: RedBull_handheld.png
Identified: quake-energy-slurpee.jpg

Actual drink: Monster_handheld.jpg
Identified: monster-absolutely-zero-energy-drink.jpg

Actual drink: Monster_zero_ultra_handheld.jpg
Identified: black-bruin-energy-drink.jpg

Actual drink: Monster_onDesk.jpg
Identified: rockstar.jpg



In [5]:

base_url = "https://www.caffeineinformer.com/wp-content/caffeine/"

feine_df = pd.read_html('/home/bala/Documents/product_ID.ai/feine.html', extract_links = "all")[0]
feine_df = feine_df.apply(lambda col: [v[0] if v[1] is None else f'{v[1].split("/")[2]}.jpg' for v in  col])

feine_df.head()



Unnamed: 0,"(, None)","('', None).1","('', None).2","('', None).3","('', None).4"
0,28-energy-drink-black-white.jpg,8.46,125,80,9.5
1,3d-energy-drink.jpg,16.0,15,200,12.5
2,4-purpose.jpg,8.46,70,70,8.3
3,4-c-energy-rush.jpg,16.9,15,170,10.1
4,adrenaline-shoc-energy-drink.jpg,16.0,10,250,15.6
