In [1]:
from pinecone import Pinecone,ServerlessSpec
import os 
import pandas as pd 
import dotenv
from time import time 
from typing import List
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
dotenv.load_dotenv()

  from tqdm.autonotebook import tqdm

For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  return _bootstrap._gcd_import(name[level:], package, level)


True

In [2]:
model_name = os.getenv("MODEL_NAME")
pinecone_api_key = os.getenv("PINECONE_API_KEY")

In [3]:
pc = Pinecone(api_key=pinecone_api_key)

In [23]:


def get_embeddings(texts:List[str]):
    embedding_model_name = "BAAI/bge-small-en-v1.5"
    model_kwargs = {'device': 'cpu'}
    encode_kwargs = {'normalize_embeddings': True}
    hf_embeddings = HuggingFaceBgeEmbeddings(
        model_name=os.getenv("EMBEDDINGS_MODEL_NAME",embedding_model_name),
        model_kwargs=model_kwargs,
        encode_kwargs=encode_kwargs
    )
    embeddings_output = hf_embeddings.embed_documents(texts=texts)
    
    return embeddings_output

In [11]:
embeddings_demo = get_embeddings(texts=['hello world'])



In [12]:
len(embeddings_demo[0])

384

In [13]:
df=pd.read_json('products/products.jsonl',lines=True)

In [15]:
df.head(2)

Unnamed: 0,name,category,description,ingredients,price,rating,image_path
0,Cappuccino,Coffee,A rich and creamy cappuccino made with freshly...,"[Espresso, Steamed Milk, Milk Foam]",4.5,4.7,cappuccino.jpg
1,Jumbo Savory Scone,Bakery,"Deliciously flaky and buttery, this jumbo savo...","[Flour, Butter, Cheese, Herbs, Baking Powder, ...",3.25,4.3,SavoryScone.webp


In [16]:
df['text'] =  df['name']+" : "+df['description'] + \
                " -- Ingredients: " + df['ingredients'].astype(str) + \
                " -- Price: " + df['price'].astype(str) + \
                " -- rating: " + df['rating'].astype(str) 

In [17]:
df['text'].head()

0    Cappuccino : A rich and creamy cappuccino made...
1    Jumbo Savory Scone : Deliciously flaky and but...
2    Latte : Smooth and creamy, our latte combines ...
3    Chocolate Chip Biscotti : Crunchy and delightf...
4    Espresso shot : A bold shot of rich espresso, ...
Name: text, dtype: object

In [18]:

texts = df['text'].tolist()

In [19]:

with open("./products/Merry's_way_about_us.txt") as f:
    Merry_way_about_section = f.read()
    
Merry_way_about_section = "Coffee shop Merry's Way about section: " + Merry_way_about_section
texts.append(Merry_way_about_section)


In [20]:
with open('products/menu_items_text.txt') as f:
    menue_items_text = f.read()
    
menue_items_text = "Menu Items: " + menue_items_text
texts.append(menue_items_text)

In [25]:
len(texts)

20

In [22]:
emebddings = get_embeddings(texts=texts)



In [24]:
len(emebddings)

20

In [26]:
len(emebddings[0])

384

In [27]:
index_name = os.getenv("PINECONE_INDEX_NAME")

In [28]:
pc.create_index(
    name=index_name,
    dimension=384, # Replace with your model dimensions
    metric="cosine", # Replace with your model metric
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    ) 
)

In [40]:
while not pc.describe_index(index_name).status['ready']:
    time.sleep(1)

index = pc.Index(index_name)
vectors=[]

for text,e in zip(texts,emebddings):
    
    entry_id = text.split(":")[0].strip()
    vectors.append({
        "id":entry_id,
        "values":e,
        "metadata":{"text":text}
    })

print(vectors)
# index.upsert(
#     vectors=vectors,
#     namespace="ns1"
# )

[{'id': 'Cappuccino', 'values': [-0.01406584121286869, -0.06437015533447266, 0.003818049095571041, 0.004768860060721636, 0.030061054974794388, -0.04575859010219574, 0.03738417848944664, 0.01077637542039156, -0.02763562649488449, -0.05079451948404312, -0.05240840092301369, -0.033733125776052475, -0.0002980723511427641, -0.04381433501839638, 0.03753119334578514, -0.017064454033970833, -0.0194668211042881, -0.08067309856414795, -0.07583886384963989, -0.007986270822584629, 0.04853237420320511, -0.061235059052705765, -0.10755736380815506, -0.03705848008394241, 0.03555623069405556, -0.048546578735113144, 0.08778645843267441, -0.019896002486348152, 0.0018020514398813248, -0.13085296750068665, -0.012980393134057522, 0.009819691069424152, -0.027959927916526794, -0.025871824473142624, -0.03502177447080612, 0.0009856621036306024, 0.06742729246616364, -0.07588779926300049, 0.046987127512693405, 0.0006804739241488278, -0.002484557218849659, 0.029773006215691566, -0.06420514732599258, -0.00200272002

In [31]:
test_embed = get_embeddings(texts=["Is cappuccino lactose-free?"])



In [33]:
len(test_embed[0])

384

In [34]:
results = index.query(
    namespace="ns1",
    vector=test_embed[0],
    top_k=3,
    include_values=False,
    include_metadata=True
)

print(results)

{'matches': [{'id': 'Cappuccino',
              'metadata': {'text': 'Cappuccino : A rich and creamy cappuccino '
                                   'made with freshly brewed espresso, steamed '
                                   'milk, and a frothy milk cap. This '
                                   'delightful drink offers a perfect balance '
                                   'of bold coffee flavor and smooth milk, '
                                   'making it an ideal companion for relaxing '
                                   'mornings or lively conversations. -- '
                                   "Ingredients: ['Espresso', 'Steamed Milk', "
                                   "'Milk Foam'] -- Price: 4.5 -- rating: 4.7"},
              'score': 0.734912813,
              'values': []},
             {'id': 'Sugar Free Vanilla syrup',
              'metadata': {'text': 'Sugar Free Vanilla syrup : Enjoy the sweet '
                                   'flavor of vanilla without the 