## Install Modules

In [1]:
# !pip install chromadb sentence-transformers

## Import Modules

In [2]:
import pandas as pd
import chromadb
from chromadb.config import Settings
from sentence_transformers import SentenceTransformer

## Load the Dataset

In [4]:
df = pd.read_csv('amazon.csv')
df = df[['product_name', 'category', 'about_product']]
df.head(3)

Unnamed: 0,product_name,category,about_product
0,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,High Compatibility : Compatible With iPhone 12...
1,Ambrane Unbreakable 60W / 3A Fast Charging 1.5...,Computers&Accessories|Accessories&Peripherals|...,"Compatible with all Type C enabled devices, be..."
2,Sounce Fast Phone Charging Cable & Data Sync U...,Computers&Accessories|Accessories&Peripherals|...,【 Fast Charger& Data Sync】-With built-in safet...


In [5]:
df['combined_text'] = ''
for col in df.columns:
    df['combined_text'] += df[col] + '\n'
df.head(1)

Unnamed: 0,product_name,category,about_product,combined_text
0,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,High Compatibility : Compatible With iPhone 12...,Wayona Nylon Braided USB to Lightning Fast Cha...


In [7]:
len(df)

1465

## Create Embeddings

In [6]:
# load the embedding model
model = SentenceTransformer('BAAI/bge-small-en')

# convert text to embeddings
embeddings = model.encode(df['combined_text'].tolist())

## Initialize ChromaDB

In [8]:
client = chromadb.Client(Settings())
# create collection
collection_name = 'product_search'
collection = client.create_collection(name=collection_name)

In [9]:
# prepare the data for chromadb
ids = []
metadatas = []
for i in range(len(embeddings)):
    ids.append(str(i))
    metadatas.append({'product_name': df['product_name'][i]})

# insert the data into the collection
collection.add(ids=ids, embeddings=embeddings, metadatas=metadatas)

In [10]:
# create function for retrieval
def vector_search(query, model, collection, top_n=10):
    # get embedding for query
    query_embedding = model.encode([query])

    # search the chroma index
    result = collection.query(query_embeddings=query_embedding, n_results=top_n)

    return result

In [13]:
# sample usage
query = 'ambrane 10000mah powerbank'
search_results = vector_search(query, model, collection, 5)

# display results
for id, product in zip(search_results['ids'][0], search_results['metadatas'][0]):
    print(f'ID: {id}\nProduct Name: {product['product_name']}\n---------------------------------------------------')

ID: 413
Product Name: Ambrane 10000mAh Slim Power Bank, 20W Fast Charging, Dual Output, Type C PD (Input & Output), Quick Charge, Li-Polymer, Multi-Layer Protection for iPhone, Anrdoid & Other Devices (Stylo 10K, Black)
---------------------------------------------------
ID: 444
Product Name: Ambrane 10000mAh Slim Power Bank, 20W Fast Charging, Dual Output, Type C PD (Input & Output), Quick Charge, Li-Polymer, Multi-Layer Protection for iPhone, Anrdoid & Other Devices (Stylo 10K, Green)
---------------------------------------------------
ID: 657
Product Name: Ambrane 20000mAh Power Bank with 20W Fast Charging, Triple Output, Power Delivery, Type C Input, Made in India, Multi-Layer Protection, Li-Polymer + Type C Cable (Stylo-20k, Black)
---------------------------------------------------
ID: 417
Product Name: Ambrane 20000mAh Power Bank with 20W Fast Charging, Triple Output, Power Delivery, Type C Input, Made in India, Multi-Layer Protection, Li-Polymer + Type C Cable (Stylo-20k, Black

In [14]:
# delete id 417
collection.delete(ids=['417'])

In [15]:
query = 'ambrane 10000mah powerbank'
search_results = vector_search(query, model, collection, 5)

# display results
for id, product in zip(search_results['ids'][0], search_results['metadatas'][0]):
    print(f'ID: {id}\nProduct Name: {product['product_name']}\n---------------------------------------------------')

ID: 413
Product Name: Ambrane 10000mAh Slim Power Bank, 20W Fast Charging, Dual Output, Type C PD (Input & Output), Quick Charge, Li-Polymer, Multi-Layer Protection for iPhone, Anrdoid & Other Devices (Stylo 10K, Black)
---------------------------------------------------
ID: 444
Product Name: Ambrane 10000mAh Slim Power Bank, 20W Fast Charging, Dual Output, Type C PD (Input & Output), Quick Charge, Li-Polymer, Multi-Layer Protection for iPhone, Anrdoid & Other Devices (Stylo 10K, Green)
---------------------------------------------------
ID: 657
Product Name: Ambrane 20000mAh Power Bank with 20W Fast Charging, Triple Output, Power Delivery, Type C Input, Made in India, Multi-Layer Protection, Li-Polymer + Type C Cable (Stylo-20k, Black)
---------------------------------------------------
ID: 577
Product Name: Ambrane 27000mAh Power Bank, 20W Fast Charging, Triple Output, Type C PD (Input & Output), Quick Charge, Li-Polymer, Multi-Layer Protection for iPhone, Smartphones & Other Devices

In [16]:
# update existing item
collection.update(
    ids=['413'],
    metadatas=[{'product_name': 'Ambrane 10000mAh Slim Power Bank, 20W Fast Charging, Dual Output, Type C PD (Input & Output), Quick Charge, Li-Polymer, Multi-Layer Protection for iPhone, Anrdoid & Other Devices (Stylo 10K, Red)'}]
)

In [17]:
query = 'ambrane 10000mah powerbank'
search_results = vector_search(query, model, collection, 5)

# display results
for id, product in zip(search_results['ids'][0], search_results['metadatas'][0]):
    print(f'ID: {id}\nProduct Name: {product['product_name']}\n---------------------------------------------------')

ID: 413
Product Name: Ambrane 10000mAh Slim Power Bank, 20W Fast Charging, Dual Output, Type C PD (Input & Output), Quick Charge, Li-Polymer, Multi-Layer Protection for iPhone, Anrdoid & Other Devices (Stylo 10K, Red)
---------------------------------------------------
ID: 444
Product Name: Ambrane 10000mAh Slim Power Bank, 20W Fast Charging, Dual Output, Type C PD (Input & Output), Quick Charge, Li-Polymer, Multi-Layer Protection for iPhone, Anrdoid & Other Devices (Stylo 10K, Green)
---------------------------------------------------
ID: 657
Product Name: Ambrane 20000mAh Power Bank with 20W Fast Charging, Triple Output, Power Delivery, Type C Input, Made in India, Multi-Layer Protection, Li-Polymer + Type C Cable (Stylo-20k, Black)
---------------------------------------------------
ID: 577
Product Name: Ambrane 27000mAh Power Bank, 20W Fast Charging, Triple Output, Type C PD (Input & Output), Quick Charge, Li-Polymer, Multi-Layer Protection for iPhone, Smartphones & Other Devices (