In [None]:
# import pandas as pd

# # Read the file from the data folder
# file_path = 'data/merged_10K_updated.csv'  # Replace 'your_file.csv' with the actual file name
# data = pd.read_csv(file_path)

# # Select the first 20 records
# data_20 = data.head(20)

# # Save the new file with 20 records
# output_path = 'data/product_20.csv'  # Replace with your desired output file name
# data_20.to_csv(output_path, index=False)

# print(f"New file with 20 records saved to {output_path}")

## Implement Vector Embedding & Vector Search

In [None]:
import pandas as pd 

df = pd.read_csv('data/merged_10K_updated.csv')
# df = df[:15]
df.head()

In [None]:
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.http.models import VectorParams, Distance, PointStruct
import uuid

# Load model
model = SentenceTransformer('all-MiniLM-L6-v2')

products = []
for index, row in df.iterrows():
    product = {
        "id": str(uuid.uuid4()),  # Generate a random UUID
        "name": row['product_name'],
        "description": row['description'],
        "category": row['category'],
        "price": row['price'],
    }
    products.append(product)


# Combine name + description for embedding
texts = [f"{p['name']} - {p['description']} - {p['price']}$" for p in products]
vectors = model.encode(texts).tolist()

In [None]:
print(len(texts))

### Upload to Qdrant

In [None]:
# Connect to Qdrant
from qdrant_client import QdrantClient
client = QdrantClient(host="localhost", port=6333)

# Create (or recreate) collection
client.recreate_collection(
    collection_name="products",
    vectors_config=VectorParams(size=384, distance=Distance.COSINE),
)

# Upload points with full payload
points = [
    PointStruct(
        id=p["id"],
        vector=v,
        payload={
            "name": p["name"],
            "description": p["description"],
            "category": p["category"],
            "price": p["price"]
        }
    )
    for p, v in zip(products, vectors)
]

client.upsert(collection_name="products", points=points)

In [None]:
print("Total points in collection:", client.count(collection_name="products"))

### Search with User Query

In [None]:
def search_products(query: str, top_k: int = 3):
    query_vector = model.encode(query).tolist()
    results = client.search(
        collection_name="products",
        query_vector=query_vector,
        limit=top_k
    )
    return results

# Example
user_input = "Suggest some blanket within 25$"
matches = search_products(user_input)

for match in matches:
    print(match.payload, "Score:", match.score)


### Add new data

In [None]:
# # get some new product
# df2 = pd.read_csv('data/product_20.csv')
# df2 = df2[7:12]
# df2.head()

In [None]:
# Get product list from df 
# updated_product = []
# for index, row in df2.iterrows():
#     product = {
#         "id": row['id'],
#         "name": row['product_name'],
#         "description": row['description'],
#         "category": row['category'],
#         "price": row['price'],
#     }
#     updated_product.append(product)

# print(updated_product)

In [None]:
# # Combine name + description for embedding
# texts = [f"{p['name']} - {p['description']}" for p in products]
# vectors = model.encode(texts).tolist()

In [None]:
# Upload points with full payload
# points = [
#     PointStruct(
#         id=p["id"],
#         vector=v,
#         payload={
#             "name": p["name"],
#             "description": p["description"],
#             "category": p["category"],
#             "price": p["price"]
#         }
#     )
#     for p, v in zip(updated_product, vectors)
# ]

# client.upsert(collection_name="products", points=points)

In [None]:
# collections = client.get_collections()
# print(collections)
# print(client.count(collection_name="products"))


### ❌ Deleting Points or Entire Collection


In [None]:
# from qdrant_client.http.models import PointIdsList

# client.delete(
#     collection_name="products",
#     points_selector=PointIdsList(points=["97c27567-d47a-47c5-9718-b25e16d042d7"])  # ID(s) to delete
# )

In [None]:
client.delete_collection(collection_name="products")