In [3]:
import pandas as pd

# Read the file from the data folder
file_path = 'data/products.csv'  # Replace 'your_file.csv' with the actual file name
data = pd.read_csv(file_path)

# Select the first 20 records
data_20 = data.head(20)
data_20.head()

# Save the new file with 20 records
output_path = 'data/new_product_20.csv'  # Replace with your desired output file name
data_20.to_csv(output_path, index=False)

print(f"New file with 20 records saved to {output_path}")

New file with 20 records saved to data/new_product_20.csv


## Implement Vector Embedding & Vector Search

In [4]:
import pandas as pd 

df = pd.read_csv('data/new_product_20.csv')
# df = df[:15]
df.head()

Unnamed: 0,title,description,category,price,brand,specTableContent,text_for_embedding
0,"""Transcend 64GB microSDXC UHS-I 300x, Class 1...",,Computers_and_Accessories,,,,"Title: ""Transcend 64GB microSDXC UHS-I 300x, ..."
1,"""TARJETA SD 32GB SDHC CLASE 10 300X"" 300X | T...","""Tipología: Secure Digital analogico; Capacida...",Camera_and_Photo,,,,"Title: ""TARJETA SD 32GB SDHC CLASE 10 300X"" 3..."
2,"""Sigma 50mm F1.4 DG HSM Art (Nikon) ""@en Aden...","""\n The staple Sigma 50...",Camera_and_Photo,,,Manufacturer: Sigma Sku #: A50DGHN Our Price:...,"Title: ""Sigma 50mm F1.4 DG HSM Art (Nikon) ""@..."
3,"""Sigma 4.5mm f/2.8 EX DC Circular Fisheye HSM...","""\n This autofocus circ...",Camera_and_Photo,,,Manufacturer: Sigma Sku #: EXDC4.5HAS Our Pri...,"Title: ""Sigma 4.5mm f/2.8 EX DC Circular Fish..."
4,"""Nikkor AF-S 50mm f1.8G""@en "" Nikkor f1.8G | ...","""\n \n** Have a questi...",Camera_and_Photo,,,,"Title: ""Nikkor AF-S 50mm f1.8G""@en "" Nikkor f..."


In [11]:
df['text_for_embedding'][2]

'Title:  "Sigma 50mm F1.4 DG HSM Art (Nikon) "@en Aden Camera Digital Cameras Toronto Canada Store " Sigma | SLR, Category: Camera_and_Photo, Price: None, Brand: None, Specifications:  Manufacturer: Sigma Sku #: A50DGHN Our Price: To See Product Details, add to your cart Sigma 50mm F1.4 DG HSM Art (Nikon) Our Price: $1,049.00 Close MSRP: $1,199.95 Quantity: \xa0 Availability: In Stock FREE SHIPPING WITHIN CANADA!'

In [17]:
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.http.models import VectorParams, Distance, PointStruct
import uuid

# Load model
model = SentenceTransformer('all-MiniLM-L6-v2')

products = []
for index, row in df.iterrows():
    product = {
        "id": str(uuid.uuid4()),  # Generate a random UUID
        "name": row['product_name'],
        "description": row['description'],
        "category": row['category'],
        "price": row['price'],
    }
    products.append(product)


# Combine name + description for embedding
texts = [f"{p['name']} - {p['description']} - {p['price']}$" for p in products]
vectors = model.encode(texts).tolist()



In [18]:
print(len(texts))

2154


### Upload to Qdrant

In [19]:
# Connect to Qdrant
from qdrant_client import QdrantClient
client = QdrantClient(host="localhost", port=6333)

# Create (or recreate) collection
client.recreate_collection(
    collection_name="products",
    vectors_config=VectorParams(size=384, distance=Distance.COSINE),
)

# Upload points with full payload
points = [
    PointStruct(
        id=p["id"],
        vector=v,
        payload={
            "name": p["name"],
            "description": p["description"],
            "category": p["category"],
            "price": p["price"]
        }
    )
    for p, v in zip(products, vectors)
]

client.upsert(collection_name="products", points=points)

  client.recreate_collection(


UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [20]:
print("Total points in collection:", client.count(collection_name="products"))

Total points in collection: count=2154


### Search with User Query

In [29]:
def search_products(query: str, top_k: int = 3):
    query_vector = model.encode(query).tolist()
    results = client.search(
        collection_name="products",
        query_vector=query_vector,
        limit=top_k
    )
    return results

# Example
user_input = "yoga pants with yellow stripe"
matches = search_products(user_input)

for match in matches:
    print(match.payload, "Score:", match.score)


{'name': "Women's Yoga Pants", 'description': 'Comfortable and stretchy yoga pants for workouts or lounging.', 'category': 'Clothing - Activewear', 'price': 34.5} Score: 0.6157073
{'name': 'Non-Slip Yoga Socks', 'description': 'Yoga socks designed to provide better grip and stability.', 'category': 'Fitness', 'price': 12.99} Score: 0.5297296
{'name': 'Yoga Mat', 'description': 'Non-slip yoga mat for optimal grip and comfort.', 'category': 'Fitness', 'price': 25.0} Score: 0.49380147


  results = client.search(


### Add new data

In [22]:
# # get some new product
# df2 = pd.read_csv('data/product_20.csv')
# df2 = df2[7:12]
# df2.head()

In [23]:
# Get product list from df 
# updated_product = []
# for index, row in df2.iterrows():
#     product = {
#         "id": row['id'],
#         "name": row['product_name'],
#         "description": row['description'],
#         "category": row['category'],
#         "price": row['price'],
#     }
#     updated_product.append(product)

# print(updated_product)

In [24]:
# # Combine name + description for embedding
# texts = [f"{p['name']} - {p['description']}" for p in products]
# vectors = model.encode(texts).tolist()

In [25]:
# Upload points with full payload
# points = [
#     PointStruct(
#         id=p["id"],
#         vector=v,
#         payload={
#             "name": p["name"],
#             "description": p["description"],
#             "category": p["category"],
#             "price": p["price"]
#         }
#     )
#     for p, v in zip(updated_product, vectors)
# ]

# client.upsert(collection_name="products", points=points)

In [26]:
# collections = client.get_collections()
# print(collections)
# print(client.count(collection_name="products"))


### ❌ Deleting Points or Entire Collection


In [27]:
# from qdrant_client.http.models import PointIdsList

# client.delete(
#     collection_name="products",
#     points_selector=PointIdsList(points=["97c27567-d47a-47c5-9718-b25e16d042d7"])  # ID(s) to delete
# )

In [2]:
from qdrant_client import QdrantClient
client = QdrantClient(host="localhost", port=6333)

client.delete_collection(collection_name="products")

True