In [26]:
from pinecone import Pinecone, ServerlessSpec
import os
import cohere
from groq import Groq
import pandas as pd
from time import time
import dotenv
dotenv.load_dotenv(dotenv_path=".env")

True

In [27]:
co = cohere.ClientV2(api_key=os.getenv("COHERE_API_KEY"))
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
pinecone_api_key = os.getenv("PINECONE_API_KEY")
dimension = 384
metric = "cosine"

In [28]:
pc = Pinecone(api_key=pinecone_api_key)

In [29]:
df=pd.read_json('api\products\products.jsonl',lines=True)

In [30]:
df['text'] =  df['name']+" : "+df['description'] + \
                " -- Ingredients: " + df['ingredients'].astype(str) + \
                " -- Price: " + df['price'].astype(str) + \
                " -- rating: " + df['rating'].astype(str) 

In [31]:
texts = df['text'].tolist()

In [32]:
with open('api/products/Merry\'s_way_about_us.txt') as f:
    Merry_way_about_section = f.read()
    
Merry_way_about_section = "Coffee shop Merry's Way about section: " + Merry_way_about_section
texts.append(Merry_way_about_section)

In [33]:
with open('api/products/menu_items_text.txt') as f:
    menue_items_text = f.read()
    
menue_items_text = "Menu Items: " + menue_items_text
texts.append(menue_items_text)

In [34]:
def get_embedding(text_input):
    response = co.embed(
        texts=text_input,
        model='embed-english-v3.0',
        input_type='search_document',
        embedding_types=["float"]
    )
    
    return response.embeddings.float_

In [35]:
embeddings = get_embedding(texts)

In [36]:
len(embeddings)

20

In [44]:
index_name = "coffeshop-index"
pc.create_index(
    name=index_name,
    dimension=384,  # Changed from 1024 to match embedding size
    metric="cosine", 
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    ) 
)

In [46]:
while not pc.describe_index(index_name).status['ready']:
    time.sleep(1)

index = pc.Index(index_name)

vectors = []
for text, e in zip(texts, embeddings):
    entry_id = text.split(":")[0].strip()
    vectors.append({
        "id": entry_id,
        "values": e,
        "metadata": {'text': text}
    })
    
index.upsert(
    vectors=vectors,
    namespace="ns1"
)

{'upserted_count': 20}

In [49]:
output = get_embedding(["Is Cappuccino lactose-free?"])
embeding = output

In [50]:
results = index.query(
    namespace="ns1",
    vector=embeding,
    top_k=3,
    include_values=False,
    include_metadata=True
)

print(results)

{'matches': [{'id': 'Cappuccino',
              'metadata': {'text': 'Cappuccino : A rich and creamy cappuccino '
                                   'made with freshly brewed espresso, steamed '
                                   'milk, and a frothy milk cap. This '
                                   'delightful drink offers a perfect balance '
                                   'of bold coffee flavor and smooth milk, '
                                   'making it an ideal companion for relaxing '
                                   'mornings or lively conversations. -- '
                                   "Ingredients: ['Espresso', 'Steamed Milk', "
                                   "'Milk Foam'] -- Price: 4.5 -- rating: 4.7"},
              'score': 0.57559067,
              'values': []},
             {'id': 'Latte',
              'metadata': {'text': 'Latte : Smooth and creamy, our latte '
                                   'combines rich espresso with velvety '
                  