# Homework 02: Using CLIP to embed product images and recommend similar items

[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/biodatlab/xlab-recommendation/blob/main/solution_notebooks/HW_02_fashion_image_search.ipynb)

* Dataset: https://www.kaggle.com/datasets/paramaggarwal/fashion-product-images-dataset
* Interface with Kaggle API: https://www.kaggle.com/discussions/general/74235

## 1. Dataset preparation

In [None]:
!pip install kaggle

In [None]:
# follow the "interface with Kaggle API" link, upload kaggle.json

from google.colab import files
files.upload()

In [None]:
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
# test access kaggle dataset 
!kaggle datasets list

In [None]:
# download dataset

!kaggle datasets download -d paramaggarwal/fashion-product-images-small

In [None]:
# extract dataset zip file

!unzip fashion-product-images-small.zip

## 2. SOLUTION: CLIP Image Search

In [None]:
# install library

! pip install torch ftfy regex tqdm numpy
! pip install openai-clip
! pip install gradio
! pip install gdown

In [None]:
# import essential library

import os
import os.path as op
from PIL import Image
from zipfile import ZipFile

import numpy as np
from tqdm import tqdm
import torch

import clip

In [None]:
# check available runtime

device = "cuda" if torch.cuda.is_available() else "cpu"
if device == "cuda": 
  ! pip install faiss-gpu 
else:
  ! pip install faiss-cpu 

print("Now running with " + device)

In [None]:
# load Vit-B/32 model

model, preprocess = clip.load("ViT-B/32", device=device)

In [None]:
dataset_path = op.join(os.getcwd(), "myntradataset/images")
all_images_path = os.listdir(op.join(dataset_path))

In [None]:
all_images_path.sort()
print(all_images_path)

In [None]:
embeddings_storage = np.zeros((len(all_images_path), 512), dtype=np.float32)

file_counter = 0

for images in tqdm(all_images_path):
    with torch.no_grad():
        image = (
            preprocess(Image.open(op.join(os.getcwd(), dataset_path, images)))
            .unsqueeze(0)
            .to(device)
        )

        embeddings_storage[file_counter] = np.array(
            model.encode_image(image).numpy(force=True)[0].astype("float32")
        )

        file_counter += 1

In [None]:
# create embeddings vector using FAISS

import faiss

index = faiss.IndexFlatL2(
    512
)  # dimension of 1 embedding decoded from CLIP model is 512
index.add(embeddings_storage)

# add embeddings into faiss vector

print(index.ntotal)  # number of images embeddings store in dataset vector

In [None]:
# recommend from images

import gradio as gr


def recommend_similar_image(image_path):
    print(f"get image path {image_path}")

    test_image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)

    with torch.no_grad():
        test_embeddings = (
            model.encode_image(test_image).numpy(force=True)[0].astype("float32")
        )
        test_embeddings = np.array([test_embeddings])

    k = 4  # number of recommendations
    square_distance, image_index = index.search(test_embeddings, k)
    print(image_index)
    print(square_distance)

    print("Opening Images...")
    recommended_images = [
        (
            Image.open(op.join(dataset_path, all_images_path[image_index[0][i]])),
            f"Recommended Rank {i+1}",
        )
        for i in range(k)
    ]
    return recommended_images


example_path = []
demo = gr.Interface(
    fn=recommend_similar_image,
    inputs=gr.Image(type="filepath"),
    outputs=gr.Gallery(),
).launch(share=True, debug=True)