In [1]:
from pinecone import Pinecone, ServerlessSpec 
from openai import OpenAI
import pandas as pd 
import os 
import dotenv
import time
dotenv.load_dotenv()

True

In [2]:
token = os.getenv("RUNPOD_TOKEN") 
openai_base_url = os.getenv("RUNPOD_EMBEDDING_URL")
model_name = os.getenv("MODEL_NAME")
pinecone_api_key = os.getenv("PINECONE_API_KEY")
index_name = os.getenv("PINECONE_INDEX_NAME")

In [3]:
pc = Pinecone(api_key = pinecone_api_key)
client = OpenAI(
    api_key = token,
    base_url = openai_base_url
)

# Dataset 

In [4]:
df = pd.read_json("products/products.jsonl", lines=True)

In [5]:
df.head(3)

Unnamed: 0,name,category,description,ingredients,price,rating,image_path
0,Cappucin,Coffee,"It is made with freshly brewed espresso, steam...","[Espresso, Steamed Milk, Milk Foam]",2.5,4.7,capucin.jpg
1,Jumbo Savory Scone,Bakery,"Deliciously flaky and buttery, this jumbo savo...","[Flour, Butter, Cheese, Herbs, Baking Powder, ...",3.25,4.3,SavoryScone.webp
2,Direct,Coffee,"Smooth and creamy, our latte combines rich esp...","[Espresso, Steamed Milk, Milk Foam]",3.0,4.8,Direct.jpg


In [6]:
df['text'] = df['name'] + ": "+ df["description"] + \
" -- Ingredients: " + df["ingredients"].astype(str) + \
" -- Price: " + df["price"].astype(str) + \
" -- Rating: " + df["rating"].astype(str)

In [7]:
texts = df['text'].tolist()

In [8]:
with open('products/Amine_about_us.txt') as f:
    amine_about_section = f.read()
amine_about_section = "Amine's coffee shop about section: " + amine_about_section 
texts.append(amine_about_section)

In [9]:
with open('products/menu_items_text.txt') as f:
    men_items_section = f.read()
men_items_section = "Menu Items: " + men_items_section 
texts.append(men_items_section)

# Generate Embeddings

In [10]:
output = client.embeddings.create(input=texts, model=model_name)

In [11]:
embeddings = output.data

# Push data to Pinecone 

In [12]:
# Run this cell only one time, and if you restart this notebook after running it, do not repeat the execution
pc.create_index(
    name = index_name,
    dimension = 384,
    metric = "cosine",
    spec = ServerlessSpec(
        cloud = "aws",
        region = "us-east-1"
    )
)

{
    "name": "amine-coffee-shop",
    "metric": "cosine",
    "host": "amine-coffee-shop-hfic0nd.svc.aped-4627-b74a.pinecone.io",
    "spec": {
        "serverless": {
            "cloud": "aws",
            "region": "us-east-1"
        }
    },
    "status": {
        "ready": true,
        "state": "Ready"
    },
    "vector_type": "dense",
    "dimension": 384,
    "deletion_protection": "disabled",
    "tags": null
}

In [13]:
while not pc.describe_index(index_name).status.ready: 
    time.sleep(1)

index = pc.Index(index_name)
vectors = []
for text, embed in zip(texts, embeddings):
    entry_id = text.split(":")[0]
    vectors.append({
        "id": entry_id,
        "values": embed.embedding,
        "metadata": {"text": text}
    })
index.upsert(vectors=vectors, namespace="ns1")

  from .autonotebook import tqdm as notebook_tqdm


{'upserted_count': 20}

# Get the Closest Documents

In [17]:
output_ = client.embeddings.create(input=["Is cappucin free?"], model = model_name)
embedding = output.data[0].embedding

In [22]:
results = index.query(
    namespace = "ns1",
    vector = embedding, 
    top_k = 2,
    include_values = False,
    include_metadata = True
)

In [23]:
results

{'matches': [{'id': 'Cappucin',
              'metadata': {'text': 'Cappucin: It is made with freshly brewed '
                                   'espresso, steamed milk, and a frothy milk '
                                   'cap. This delightful drink offers a '
                                   'perfect balance of bold coffee flavor and '
                                   'smooth milk, making it an ideal companion '
                                   'for relaxing mornings or lively '
                                   'conversations. -- Ingredients: '
                                   "['Espresso', 'Steamed Milk', 'Milk Foam'] "
                                   '-- Price: 2.5 -- Rating: 4.7'},
              'score': 0.997835159,
              'values': []},
             {'id': 'Direct',
              'metadata': {'text': 'Direct: Smooth and creamy, our latte '
                                   'combines rich espresso with velvety '
                                   'steamed 