### Install necessary libraries

In [65]:
# !pip install opencv-python
# !pip install opencv-python-headless
# !pip install tqdm
# !pip install ipyplot
# !pip install qdrant-client

In [66]:
from qdrant_client import QdrantClient
from qdrant_client.http import models
from qdrant_client.http.models import Filter, FieldCondition, VectorParams, MatchValue
import cv2
import numpy as np
import os
import ipyplot

### Use `QdrantClient` to connect to your Qdrant server.  

I did not use `:memory:`, but I setup a docker container for that.

Here is the command to create docker

`docker run -p 6333:6333 -p 6334:6334  -v $(pwd)/qdrant_storage:/qdrant/storage:z qdrant/qdrant`

In [67]:
qdrant_client = QdrantClient(host="localhost", port=6333, prefer_grpc=True)

### Create a new collection called 'animals' if it does not exists

About the `size` of the vector: When the next block of code use `224*224` as image size, I suspect that the `size` parameter should also be `224*224`.

In [68]:
first_collection = qdrant_client.recreate_collection(
    collection_name="animals",
    vectors_config=models.VectorParams(
        size=224*224, distance=models.Distance.COSINE
    )
)

### Convert image to vector

Actually there seems to be quite a few 3rd parties API, including OpenAI, available for image embedding.  I did not use it.

The reason is that, sometimes the images are sensitive enough (e.g. including PII) to **NOT** to dispose/send to 3rd parties.  In order to cater for this scenario, I would rather using CV2 to create embedded images.

In [69]:
def preprocess_image(image_path):
    # Read the image
    image = cv2.imread(image_path)

    # Resize the image to a fixed size (optional)
    resized_image = cv2.resize(image, (224, 224))

    # Convert the image to grayscale (optional)
    gray_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)

    # Normalize the pixel values
    normalized_image = cv2.normalize(
        gray_image, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)

    # Flatten the image into a 1D array
    flattened_image = normalized_image.flatten()

    return flattened_image

### Loop through and embed images

I use the [animal data in Kaggle](https://www.kaggle.com/datasets/iamsouravbanerjee/animal-image-dataset-90-different-animals).

The full animal directory path would look like this:

`./archive/animals/dog/4aacd195b5.jpg`

In [70]:
animal_directory = "archive/animals"

id = 0

for animal_type in os.listdir(animal_directory):
    # if animal_type is cat, dog or fox, the process
    if animal_type not in ["cat", "dog", "fox"]:
        continue

    animal_type_directory = os.path.join(animal_directory, animal_type)

    for image_file in os.listdir(animal_type_directory):
        image_path = os.path.join(animal_type_directory, image_file)

        # Preprocess the image and get the embedding
        embedding = preprocess_image(image_path)

        id += 1

        # Add the embedding to Qdrant
        qdrant_client.upsert(
            collection_name="animals",
            points=[
                models.PointStruct(
                    id = id,
                    vector = embedding,
                    payload = {
                        "animal_type": animal_type,
                        "image_file": image_file
                    },
                )
            ]
        )


### Now create the search request to Qdrant

Actually [the documentation from Qdrant on Python](https://python-client.qdrant.tech/qdrant_client.http.models.models) is very difficult to read.  So if you don't understand their doc, that's OK.  

You can refer to [another Qdrant example](https://colab.research.google.com/github/qdrant/examples/blob/master/qdrant_101_getting_started/getting_started.ipynb#scrollTo=5-Z4LbHE4lws) to find out how different functions work

In [71]:
# Create search request
# https://python-client.qdrant.tech/qdrant_client.http.models.models

# search_image = os.path.join(animal_directory, 'dog', '2a8a6a6050.jpg')
search_image = os.path.join(animal_directory, 'dog', '4aacd195b5.jpg')

result = qdrant_client.search(
    collection_name="animals",
    query_vector=preprocess_image(search_image),
    query_filter=Filter(
        must=[
            FieldCondition(
                key="animal_type",
                match=MatchValue(
                    value="dog"
                )
            )
        ]
    ),
    limit=10,
    with_payload=True,
    score_threshold=0,
)

### Then, show the original search image

In [72]:
ipyplot.plot_images([search_image], img_width=400)

### Now show the result of the search, look good to me


In [73]:
all_imgs = []
all_img_files = []
for file in result:
    # print(file)
    image_path = os.path.join(animal_directory, file.payload['animal_type'], file.payload['image_file'])
    all_imgs.append(image_path)
    all_img_files.append(file.payload['image_file'])

ipyplot.plot_images(all_imgs, all_img_files, img_width=300, )

### How about using `DOT` as algorithm for distance finding?

Let's duplicate the code and wrap it as a helper function to test

In [74]:
def create_and_add_vectors(collection_name, distance):
    first_collection = qdrant_client.recreate_collection(
        collection_name=collection_name,
        vectors_config=models.VectorParams(
            size=224*224, distance=distance
        )
    )

    animal_directory = "archive/animals"

    id = 0

    for animal_type in os.listdir(animal_directory):
        # if animal_type is cat, dog or fox, the process
        if animal_type not in ["cat", "dog", "fox"]:
            continue

        animal_type_directory = os.path.join(animal_directory, animal_type)

        for image_file in os.listdir(animal_type_directory):
            image_path = os.path.join(animal_type_directory, image_file)

            # Preprocess the image and get the embedding
            embedding = preprocess_image(image_path)

            id += 1

            # Add the embedding to Qdrant
            qdrant_client.upsert(
                collection_name=collection_name,
                points=[
                    models.PointStruct(
                        id = id,
                        vector = embedding,
                        payload = {
                            "animal_type": animal_type,
                            "image_file": image_file
                        },
                    )
                ]
            )

#### Do the magic

In [75]:
create_and_add_vectors('animals_dot', models.Distance.DOT)

#### Create the search request, using the new collection

Looks...reasonable?

In [78]:
# Create search request
# https://python-client.qdrant.tech/qdrant_client.http.models.models

# search_image = os.path.join(animal_directory, 'dog', '2a8a6a6050.jpg')
search_image = os.path.join(animal_directory, 'dog', '4aacd195b5.jpg')

result = qdrant_client.search(
    collection_name="animals_dot",  # <==== Change the collection
    query_vector=preprocess_image(search_image),
    query_filter=Filter(
        must=[
            FieldCondition(
                key="animal_type",
                match=MatchValue(
                    value="dog"
                )
            )
        ]
    ),
    limit=10,
    with_payload=True,
    score_threshold=0,
)

# Display the search results.  This block of code is duplicated for demo purpose
all_imgs = []
all_img_files = []
for file in result:
    # print(file)
    image_path = os.path.join(
        animal_directory, file.payload['animal_type'], file.payload['image_file'])
    all_imgs.append(image_path)
    all_img_files.append(file.payload['image_file'])

# Show Original image
ipyplot.plot_images([search_image], img_width=400)

# Show search result
ipyplot.plot_images(all_imgs, all_img_files, img_width=300, )

### Let's do it again with `EUCLID`

In [82]:
create_and_add_vectors('animals_euclid', models.Distance.EUCLID)

And let's duplicate everything, and 

Yes, you saw that right, there is no result!

In [85]:
# Create search request
# https://python-client.qdrant.tech/qdrant_client.http.models.models

search_image = os.path.join(animal_directory, 'dog', '4aacd195b5.jpg')

result = qdrant_client.search(
    collection_name="animals_euclid",  # <==== Change the collection
    query_vector=preprocess_image(search_image),
    query_filter=Filter(
        must=[
            FieldCondition(
                key="animal_type",
                match=MatchValue(
                    value="dog"
                )
            )
        ]
    ),
    limit=10,
    with_payload=True,
    score_threshold=0,
)

# Display the search results.  This block of code is duplicated for demo purpose
all_imgs = []
all_img_files = []
for file in result:
    # print(file)
    image_path = os.path.join(
        animal_directory, file.payload['animal_type'], file.payload['image_file'])
    all_imgs.append(image_path)
    all_img_files.append(file.payload['image_file'])

# Show Original image
ipyplot.plot_images([search_image], img_width=400)

# Show search result
# ipyplot.plot_images(all_imgs, img_width=300)
print(all_imgs)

[]


### Let's do it again with `MANHATTAN`

In [86]:
create_and_add_vectors('animals_manhattan', models.Distance.MANHATTAN)

And let's duplicate everything, and 

Yes, you saw that right, there is no result!

In [88]:
# Create search request
# https://python-client.qdrant.tech/qdrant_client.http.models.models

search_image = os.path.join(animal_directory, 'dog', '4aacd195b5.jpg')

result = qdrant_client.search(
    collection_name="animals_manhattan",  # <==== Change the collection
    query_vector=preprocess_image(search_image),
    query_filter=Filter(
        must=[
            FieldCondition(
                key="animal_type",
                match=MatchValue(
                    value="dog"
                )
            )
        ]
    ),
    limit=10,
    with_payload=True,
    score_threshold=0,
)

# Display the search results.  This block of code is duplicated for demo purpose
all_imgs = []
all_img_files = []
for file in result:
    # print(file)
    image_path = os.path.join(
        animal_directory, file.payload['animal_type'], file.payload['image_file'])
    all_imgs.append(image_path)
    all_img_files.append(file.payload['image_file'])

# Show Original image
ipyplot.plot_images([search_image], img_width=400)

# Show search result
# ipyplot.plot_images(all_imgs, img_width=300)
print(all_imgs)

[]
