In [None]:
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, PointStruct
import os
from dotenv import load_dotenv

load_dotenv('../../.env')

import pandas as pd
from openai import OpenAI

client = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key=os.getenv('OPENAI_API_KEY'),
)


Read sample data with amazon inventory data

In [None]:
df_items = pd.read_json('../../data/meta_Electronics_2022_2023_with_category_ratings_100_sample_1000.jsonl', lines=True)

In [None]:
df_items.head()

In [None]:
list(df_items['features'].items())[0]

In [None]:
list(df_items['images'].items())[0]

preprocess title and features


In [None]:
def preprocess_description(row):
    feature = (row['features'])
    return f"{row['title']}  {' '.join(row['features'])}"

In [None]:
def extra_first_large_image(row):
    return row['images'][0].get('large','')

In [None]:
df_items['description'] = df_items.apply(preprocess_description, axis=1)
df_items['image'] = df_items.apply(extra_first_large_image, axis=1)

In [None]:
df_items.head()

In [None]:
(df_items['description'])[0]

Sample 50 items from the list


In [None]:
df_sample = df_items.sample(50, random_state=4)

In [None]:
len(df_sample)

In [None]:
data_to_embed = df_sample[['description', 'image', 'average_rating', 'rating_number', 'parent_asin']].to_dict(orient='records')

In [None]:
data_to_embed

In [None]:
embedding = client.embeddings.create(
  model="baai/bge-base-en-v1.5",
  input="Your text string goes here",
  encoding_format="float"
)

In [None]:
len(embedding.data[0].embedding)

In [None]:
def get_embedding(input:str):
    embedding = client.embeddings.create(
        model="baai/bge-base-en-v1.5",
        input= input,
        encoding_format="float"
        )
    return embedding.data[0].embedding


In [None]:
get_embedding("Lorem ipsum dolor sit, amet consectetur adipisicing elit. Accusamus ratione odit vitae eveniet, cum inventore, nihil vel consectetur sequi praesentium iste, quibusdam iusto. Saepe quas quasi repellat quam, vero rerum!")

qdrant collection

In [None]:
qdrant_client= QdrantClient(url = 'http://localhost:6333')

In [None]:
qdrant_client.create_collection(
    collection_name='Amazon-items-collection',
    vectors_config=VectorParams(size=768,distance=Distance.COSINE))

In [None]:
pointStructs = []
for i, item in enumerate(data_to_embed):
    embedding = get_embedding(item['description'])
    pointStructs.append(PointStruct(id=1, vector= embedding, payload= item))

pointStructs

In [None]:
operation_info = qdrant_client.upsert(
    collection_name="Amazon-items-collection",
    wait=True,
    points=pointStructs,
)

In [None]:
def retrieve_data(input:str):
    query_embedding = get_embedding('charging adaopter')

    return qdrant_client.query_points(
        collection_name="Amazon-items-collection",
        query=query_embedding,
        with_payload=True,
        limit=5
    ).points

In [None]:
retrieve_data('Rubber Isolating Feet.')