## Checking vector search on text description!

In [None]:
from tqdm.notebook import tqdm

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
from dotenv import load_dotenv

load_dotenv()

WEAVIATE_URL = os.getenv("WEAVIATE_URL")
WEAVIATE_KEY = os.getenv("WEAVIATE_KEY")

print(WEAVIATE_URL)
print(WEAVIATE_KEY)

In [None]:
import weaviate
from weaviate.classes.init import Auth

client = weaviate.connect_to_weaviate_cloud(
    cluster_url=WEAVIATE_URL,
    auth_credentials=Auth.api_key(WEAVIATE_KEY),
)

client.is_ready()

## Get local data

In [None]:
import pandas as pd

In [None]:
df = pd.read_json("../results/farfetch.jsonl", lines=True)
df.head()


In [None]:
df_selected = df[df["model"]=="qwen2.5vl:7b"][["image_name", "description"]].reset_index(drop=True)
df_selected.head()

In [None]:
import weaviate.classes.config as wc

if client.collections.exists("Farfetch"):
    client.collections.delete("Farfetch")

client.collections.create(
    name="Farfetch",
    properties=[
        wc.Property(name="image_name", data_type=wc.DataType.TEXT),
        wc.Property(name="description", data_type=wc.DataType.TEXT),
    ],
    vector_config=[
        wc.Configure.Vectors.text2vec_weaviate(
            name="main_vector",
            model="Snowflake/snowflake-arctic-embed-l-v2.0",
            source_properties=["description"],
        )
    ],
)

In [None]:
farfetch = client.collections.get("Farfetch")

with farfetch.batch.dynamic() as batch:
    for index, row in tqdm(df_selected.iterrows()):
        batch.add_object(
            properties={
                "image_name": row["image_name"],
                "description": row["description"]
            }
        )

print(f"Farfetch count: {len(farfetch)}")

In [None]:
from pprint import pprint

In [None]:
farfetch = client.collections.get("Farfetch")

response = farfetch.query.near_text(
    query="red dress",
    limit=5
)

# for item in response.objects:
#     pprint(item)

In [None]:
import matplotlib.image as mpimg
import matplotlib.pyplot as plt

from pathlib import Path

def query_and_display(collection, query_text, limit=5, relative_path="../data/farfetch/images/"):
    
    collection = client.collections.get(collection)
    
    response = collection.query.near_text(
        query=query_text,
        limit=limit
    )

    cols=5
    rows=cols//limit
    fig, axes = plt.subplots(rows, cols, figsize=(2*cols, 2*rows))
    axes = axes.flatten()  # Convert 2D array to 1D for easier indexing
    
    images = [relative_path + item.properties["image_name"] for item in response.objects]

    for i, ax in enumerate(axes):
        img = mpimg.imread(str(images[i]))
        ax.imshow(img)
        ax.set_title(f"{Path(images[i]).name}", fontsize=8)  # Optional: show filename
        ax.axis('off')  # Remove axes
    else:
        ax.axis('off')  # Hide empty subplots

    plt.tight_layout()
    plt.show()


In [None]:
query_and_display("Farfetch", "", limit=5)