In [58]:
# import pandas as pd

# # Read the file from the data folder
# file_path = 'data/merged_10K_updated.csv'  # Replace 'your_file.csv' with the actual file name
# data = pd.read_csv(file_path)

# # Select the first 20 records
# data_20 = data.head(20)

# # Save the new file with 20 records
# output_path = 'data/product_20.csv'  # Replace with your desired output file name
# data_20.to_csv(output_path, index=False)

# print(f"New file with 20 records saved to {output_path}")

## Implement Vector Embedding & Vector Search

In [59]:
import pandas as pd 

df = pd.read_csv('data/product_20.csv')
df = df[:15]
df.head()

Unnamed: 0,id,product_name,description,category,price
0,2eb6177b-de78-4098-ae66-a605e88eae88,Protein Bar Variety Pack,A pack of assorted nut and protein bars for a ...,Food - Snacks,12.99
1,97c27567-d47a-47c5-9718-b25e16d042d7,Organic Baby Spinach,"Fresh baby spinach leaves, great for salads an...",Food - Produce,2.99
2,426d6163-fb56-4ba3-b94c-af6a663d4582,Sliced Cheese,"Assorted sliced cheese, perfect for sandwiches.",Food - Dairy,4.49
3,543b78b0-ce30-40c5-b6a3-f1a3096cae62,Multifunctional Baby Bottle Warmer,Warmer for heating baby bottles and food jars.,Baby,39.99
4,0caa0f7a-abc0-4235-a957-5238d7682104,Self-Massaging Pillow,Comfortable pillow with built-in massage featu...,Health,49.99


In [60]:
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.http.models import VectorParams, Distance, PointStruct

# Load model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Get product list from df 
products = []
for index, row in df.iterrows():
    product = {
        "id": row['id'],
        "name": row['product_name'],
        "description": row['description'],
        "category": row['category'],
        "price": row['price'],
    }
    products.append(product)

# Combine name + description for embedding
texts = [f"{p['name']} - {p['description']} - {p['price']}$" for p in products]
vectors = model.encode(texts).tolist()




In [61]:
print(texts)

['Protein Bar Variety Pack - A pack of assorted nut and protein bars for a quick energy boost. - 12.99$', 'Organic Baby Spinach - Fresh baby spinach leaves, great for salads and smoothies. - 2.99$', 'Sliced Cheese - Assorted sliced cheese, perfect for sandwiches. - 4.49$', 'Multifunctional Baby Bottle Warmer - Warmer for heating baby bottles and food jars. - 39.99$', 'Self-Massaging Pillow - Comfortable pillow with built-in massage features. - 49.99$', 'Window Bird Feeder with Suction Cups - Clear feeder that attaches to windows for bird watching. - 28.99$', 'Quinoa & Black Bean Salad - A nutritious salad with quinoa and black beans - 7.49$', 'Graphic Print Leggings - Trendy leggings with a unique graphic print, versatile for workouts and casual wear. - 29.99$', 'Mango Chia Pudding - Healthy chia pudding made with mango and coconut milk. - 4.49$', 'Faux Fur Throw Blanket - Cozy faux fur blanket to add warmth and style to your home. - 20.99$', 'Decorative Throw Blanket - Soft throw blan

### Upload to Qdrant

In [62]:
# Connect to Qdrant
client = QdrantClient(host="localhost", port=6333)

# Create (or recreate) collection
client.recreate_collection(
    collection_name="products",
    vectors_config=VectorParams(size=384, distance=Distance.COSINE),
)

# Upload points with full payload
points = [
    PointStruct(
        id=p["id"],
        vector=v,
        payload={
            "name": p["name"],
            "description": p["description"],
            "category": p["category"],
            "price": p["price"]
        }
    )
    for p, v in zip(products, vectors)
]

client.upsert(collection_name="products", points=points)

  client.recreate_collection(


UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [63]:
print("Total points in collection:", client.count(collection_name="products"))

Total points in collection: count=15


### Search with User Query

In [64]:
def search_products(query: str, top_k: int = 3):
    query_vector = model.encode(query).tolist()
    results = client.search(
        collection_name="products",
        query_vector=query_vector,
        limit=top_k
    )
    return results

# Example
user_input = "Suggest some blanket within 25$"
matches = search_products(user_input)

for match in matches:
    print(match.payload, "Score:", match.score)


{'name': 'Decorative Throw Blanket', 'description': 'Soft throw blanket for cozy home decor.', 'category': 'Home', 'price': 39.99} Score: 0.65913844
{'name': 'Faux Fur Throw Blanket', 'description': 'Cozy faux fur blanket to add warmth and style to your home.', 'category': 'Home', 'price': 20.99} Score: 0.62334424
{'name': 'Multifunctional Baby Bottle Warmer', 'description': 'Warmer for heating baby bottles and food jars.', 'category': 'Baby', 'price': 39.99} Score: 0.42477262


  results = client.search(


### Add new data

In [65]:
# get some new product
df2 = pd.read_csv('data/product_20.csv')
df2 = df2[7:12]
df2.head()

Unnamed: 0,id,product_name,description,category,price
7,526927ba-e4a9-498c-8aac-3feab8f0f5ff,Graphic Print Leggings,"Trendy leggings with a unique graphic print, v...",Clothing - Activewear,29.99
8,69a2dc6f-5c8a-4b12-8a22-cdd640596693,Mango Chia Pudding,Healthy chia pudding made with mango and cocon...,Food - Snacks,4.49
9,866254ca-1a0f-4afe-aa82-6e43f8e5d535,Faux Fur Throw Blanket,Cozy faux fur blanket to add warmth and style ...,Home,20.99
10,69ac4ae7-cbab-4680-bd3e-58a698498fae,Decorative Throw Blanket,Soft throw blanket for cozy home decor.,Home,39.99
11,e5cdd5f9-d132-4cfa-b6a1-0ea513169f38,Compact Electric Kettle,Quick-boiling kettle for home and office use.,Kitchen,29.99


In [66]:
# Get product list from df 
updated_product = []
for index, row in df2.iterrows():
    product = {
        "id": row['id'],
        "name": row['product_name'],
        "description": row['description'],
        "category": row['category'],
        "price": row['price'],
    }
    updated_product.append(product)

print(updated_product)

[{'id': '526927ba-e4a9-498c-8aac-3feab8f0f5ff', 'name': 'Graphic Print Leggings', 'description': 'Trendy leggings with a unique graphic print, versatile for workouts and casual wear.', 'category': 'Clothing - Activewear', 'price': 29.99}, {'id': '69a2dc6f-5c8a-4b12-8a22-cdd640596693', 'name': 'Mango Chia Pudding', 'description': 'Healthy chia pudding made with mango and coconut milk.', 'category': 'Food - Snacks', 'price': 4.49}, {'id': '866254ca-1a0f-4afe-aa82-6e43f8e5d535', 'name': 'Faux Fur Throw Blanket', 'description': 'Cozy faux fur blanket to add warmth and style to your home.', 'category': 'Home', 'price': 20.99}, {'id': '69ac4ae7-cbab-4680-bd3e-58a698498fae', 'name': 'Decorative Throw Blanket', 'description': 'Soft throw blanket for cozy home decor.', 'category': 'Home', 'price': 39.99}, {'id': 'e5cdd5f9-d132-4cfa-b6a1-0ea513169f38', 'name': 'Compact Electric Kettle', 'description': 'Quick-boiling kettle for home and office use.', 'category': 'Kitchen', 'price': 29.99}]


In [67]:
# Combine name + description for embedding
texts = [f"{p['name']} - {p['description']}" for p in products]
vectors = model.encode(texts).tolist()

In [68]:
# Upload points with full payload
points = [
    PointStruct(
        id=p["id"],
        vector=v,
        payload={
            "name": p["name"],
            "description": p["description"],
            "category": p["category"],
            "price": p["price"]
        }
    )
    for p, v in zip(updated_product, vectors)
]

client.upsert(collection_name="products", points=points)

UpdateResult(operation_id=1, status=<UpdateStatus.COMPLETED: 'completed'>)

In [69]:
collections = client.get_collections()
print(collections)
print(client.count(collection_name="products"))


collections=[CollectionDescription(name='products')]
count=15


### ❌ Deleting Points or Entire Collection


In [70]:
from qdrant_client.http.models import PointIdsList

client.delete(
    collection_name="products",
    points_selector=PointIdsList(points=["97c27567-d47a-47c5-9718-b25e16d042d7"])  # ID(s) to delete
)

UpdateResult(operation_id=2, status=<UpdateStatus.COMPLETED: 'completed'>)

In [71]:
client.delete_collection(collection_name="star_charts")

False