<a href="https://colab.research.google.com/github/gu-ma/hgk-ml-workshop/blob/main/notebooks/Image_Search_02_Search_Dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Search image in the Dataset

With this notebook you can search for photos using natural language.

## Setup

In [None]:
! pip install git+https://github.com/openai/CLIP.git

In [None]:
import clip
import torch

# Load the open CLIP model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

## Connect to Gdrive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## Load the dataset

You will need the Dataset and the precomputed feature vectors for this. 

In [None]:
import re
import os
import math
import shutil
import numpy as np
import pandas as pd
from pathlib import Path

# @markdown Path to source directory on google drive. Right click your directory and choose "copy path" then paste it below.

# @markdown ⚠️ __This is the folder with the scenes and images__ ⚠️
gdrive_input_dir = "/content/drive/MyDrive/AI/hgk_workshop/playlist01_output"  # @param { type:'string' }

# @markdown ⚠️ __This is the folder with the features saved when we processed the dataset__ ⚠️
gdrive_clip_input_dir = "/content/drive/MyDrive/AI/hgk_workshop/playlist01_output_clip"  # @param { type:'string' }

# Some other dir / vars
(gdrive_path, gdrive_folder) = os.path.split(gdrive_input_dir)

input_dir = gdrive_folder
output_dir = f'{gdrive_folder}_clip'

gdrive_output_dir = os.path.join(gdrive_path, output_dir)

# Create directories
os.makedirs(input_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok=True)

# Copy all jpg locally
! cp -r {gdrive_input_dir}/*.jpg {input_dir}

# Copy features
shutil.copytree(gdrive_clip_input_dir, output_dir, dirs_exist_ok=True)

# Set the path to the photos
photos_path = Path(input_dir)

# List all JPGs in the folder
photos_files = list(photos_path.glob("*.jpg"))
photos_files.sort()

# Print some statistics
print(f"Photos found: {len(photos_files)}")
print(*photos_files[:10], sep='\n')

# Path where the feature vectors will be stored
features_path = Path(output_dir)

# Load the features and the corresponding IDs
photo_features = np.load(features_path / "features.npy")
photo_ids = pd.read_csv(features_path / "photo_ids.csv")
photo_ids = list(photo_ids['photo_id'])


## Search

Specify your search query and encode it to a feature vector using CLIP.

In [None]:
from IPython.display import Image
from IPython.core.display import HTML

search_query = "A man with a blue shirt" #@param {type:"string"}

with torch.no_grad():
    # Encode and normalize the description using CLIP
    text_encoded = model.encode_text(clip.tokenize(search_query).to(device))
    text_encoded /= text_encoded.norm(dim=-1, keepdim=True)

# Compare the text features to the image features and find the best match.

# Retrieve the description vector and the photo vectors
text_features = text_encoded.cpu().numpy()

# Compute the similarity between the descrption and each photo using the Cosine similarity
similarities = (text_features @ photo_features.T).squeeze(0).tolist()

# Sort the photos by their similarity score
best_photos = sorted(zip(similarities, range(photo_features.shape[0])), key=lambda x: x[0], reverse=True)

# Show results

# Iterate over the top 3 results
for i in range(3):
    # Retrieve the photo ID
    pct, idx = best_photos[i]
    photo_id = photo_ids[idx]

    # Display the photo
    display(Image(photos_files[idx]))
    print(pct, idx, photos_files[idx])