In [None]:
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct

import pandas as pd

In [None]:
qdrant_client = QdrantClient(
    url="http://localhost:6333"
)

In [None]:
qdrant_client.create_collection(
    collection_name="Amazon-items-collection-00",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE))

In [None]:
df_items = pd.read_json('../data/meta_Electronics_2022_2023_with_category_ratings_100_sample_1000.jsonl', lines=True)

### Concatenate title and featues

In [None]:
def preprocess_data(row):
    return f"{row['title']} {''.join(row['features'])}"

In [None]:
df_items["preprocessed_data"] = df_items.apply(preprocess_data, axis=1)

In [None]:
# df_items.head(2)

### Sample 50 items 

In [None]:
df_sample = df_items.sample(50, random_state=42)

### Embedding function

In [None]:
import openai
from dotenv import load_dotenv
import os

load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

In [None]:
def get_embedding(text, model="text-embedding-3-small"):
    response = openai.embeddings.create(
        input=[text],
        model=model
    )
    return response.data[0].embedding

### Embed data

In [None]:
data_to_emded = df_sample["preprocessed_data"].tolist()
pointstructs = []
for i, data in enumerate(data_to_emded):
    embedding = get_embedding(data)
    pointstructs.append(
        PointStruct(
        id=i,
        vector=embedding,
        payload={"text": data}
    )
)

In [None]:
# pointstructs

### Write embedded data to Qdrant

In [None]:
qdrant_client.upsert(
    collection_name="Amazon-items-collection-00",
    wait=True,
    points=pointstructs
)

### Function for data retrieval

In [None]:
def retrieve_data(query):
    query_embedding = get_embedding(query)
    results = qdrant_client.query_points(
        collection_name="Amazon-items-collection-00",
        query=query_embedding,
        limit=10
    )
    return results

In [None]:
try:
    qdrant_client.get_collections()
    print("Qdrant is up!")
except Exception as e:
    print(f"Connection failed: {e}")

In [None]:
retrieve_data("What earphones can I get?").points