In [2]:
%load_ext autoreload
%autoreload 2
%load_ext dotenv
%dotenv

In [3]:
import os
import json
import pandas as pd
import time
from pinecone import Pinecone, ServerlessSpec
from langchain_openai import OpenAIEmbeddings
from langchain_openai import OpenAI
from IPython.display import Markdown

  from tqdm.autonotebook import tqdm


## 01 Set Pinecone Key and Client

In [4]:
api_key = os.environ.get("PINECONE_API_KEY")

In [5]:
# configure client
pc = Pinecone(api_key=api_key)

## 03 Create Pinecone index

In [12]:
index_name = 'recipes-index'
if index_name in pc.list_indexes().names():
    pc.delete_index(index_name)

# We create a new index with a dimension size of 1536 (for text-embedding-ada-002)
pc.create_index(
    index_name,
    dimension=1536,  # dimensionality of text-embedding-ada-002
    metric='cosine',
    spec=ServerlessSpec(cloud="aws", region="us-east-1"),
)

# Wait for the index to be initialized
while not pc.describe_index(index_name).status['ready']:
    time.sleep(1)

index = pc.Index(index_name)
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

## 05 Create Embeddings

In [13]:
texts = (
    'Title: ' + data['title'] + ', ' +
    'Description: ' + data['description'] + ', ' +
    'Ingredients: ' + data['ingredients'] + ', ' +
    'Prep time: ' + data['prep_time'] + ', ' +
    'Cook time: ' + data['cook_time'] + ', ' +
    'Calories: ' + data['kcal'].astype(str) + ' kcal, ' +
    'Fat: ' + data['fat'].astype(str) + ' g, ' +
    'Saturates: ' + data['saturates'].astype(str) + ' g, ' +
    'Carbs: ' + data['carbs'].astype(str) + ' g, ' +
    'Sugars: ' + data['sugars'].astype(str) + ' g, ' +
    'Fibre: ' + data['fibre'].astype(str) + ' g, ' +
    'Protein: ' + data['protein'].astype(str) + ' g, ' +
    'Salt: ' + data['salt'].astype(str) + ' g'
)

texts_list = texts.tolist()


## 06 Open AI Key

In [6]:
openai_api_key = os.getenv('OPENAI_API_KEY')

In [7]:
embed_model = "text-embedding-3-small"
embeddings_model = OpenAIEmbeddings(model=embed_model)

## 07 Create Open Ai embeddings for data

In [15]:

embeddings = []
for text in texts_list:
  response = embeddings_model.embed_query(text)
  embeddings.append(response)


In [17]:
embeddings[:5]
print(len(embeddings))

1106


## 08 Upsert the Data to Pinecone

In [18]:
# Step 5: Upsert the Data to Pinecone
for i, embedding_vector in enumerate(embeddings):
    metadata = {
        "id": str(i),
        "title": data.loc[i, 'title'],
        "description": data.loc[i, 'description'],
        "prep_time": data.loc[i, 'prep_time'],
        "cook_time": data.loc[i, 'cook_time'],
        "difficulty": data.loc[i, 'difficulty'],
        "serves": data.loc[i, 'serves'],
        "diet_type": data.loc[i, 'diet_type'],
        "ingredients": data.loc[i, 'ingredients'],
        "calories": data.loc[i, 'kcal'],
        "fat": data.loc[i, 'fat'],
        "saturates": data.loc[i, 'saturates'],
        "carbs": data.loc[i, 'carbs'],
        "sugars": data.loc[i, 'sugars'],
        "fibre": data.loc[i, 'fibre'],
        "protein": data.loc[i, 'protein'],
        "salt": data.loc[i, 'salt'],
        "instructions": data.loc[i, 'instructions'],
    }
    index.upsert(vectors=[(str(i), embedding_vector, metadata)])

print(f"Uploaded {len(embeddings)} recipes to Pinecone.")

Uploaded 1106 recipes to Pinecone.


## 09 Query Pinecone with a new recipe query

In [8]:
# query = "please a recipe that can be prepare in less than 20 min"
query = "please a dessert and a salad recommendation"
# create the query vector
query_vector = embeddings_model.embed_query(query)

# Retrieve relevant contexts from Pinecone
query_res = index.query(vector=query_vector, top_k=3, include_metadata=True)
print(query_res)
print(len(query_res["matches"]))

# Extract the matched metadata (recipes) from Pinecone results
contexts = [item['metadata'] for item in query_res['matches']]

NameError: name 'index' is not defined

In [46]:
context_texts = [f"Title: {c['title']}\nPreparation Time: {c['prep_time']}\nCooking Time: {c['cook_time']}\nDifficulty: {c['difficulty']}\nServes: {c['serves']}\nDiet Type: {c['diet_type']}\nDescription: {c['description']}\nIngredients: {c['ingredients']}\nInstructions: {c['instructions']} " for c in contexts]
augmented_query = "\n\n---\n\n".join(context_texts) + "\n\n-----\n\n" + query
print(augmented_query)

Title: Rose & pomegranate jellies with cardamom panna cotta
Preparation Time: 30 mins
Cooking Time: 15 mins
Difficulty: More effort
Serves: Serves 6
Diet Type: Gluten-free
Description: This dessert combo is wonderfully refreshing, with the garnet-coloured, tart jelly cutting through the creaminess of the cardamom panna cotta base
Ingredients: 7gelatine leaves, 1 tsplemon juice, 100gcaster sugar, 250mlunsweetened pomegranate juice, 1 tsprosewater, 1pomegranate, seeds only, 300mldouble cream, 200mlwhole milk, 6cardamom pods, seeds lightly bashed, 1vanilla pod, split, 2½gelatine leaves, 75gcaster sugar, For the panna cotta, 300mldouble cream, 200mlwhole milk, 6cardamom pods, seeds lightly bashed, 1vanilla pod, split, 2½gelatine leaves, 75gcaster sugar
Instructions: First, make the jelly. Soak the gelatine in cold water for 10 mins. Bring the lemon juice and 300ml water to the boil over a medium heat. Stir in the sugar until dissolved, then remove from the heat. Squeeze the excess water fr

In [44]:
# system message to 'prime' the model
primer = f"""You are Q&A bot. A highly intelligent system that answers
user questions based on the information provided by the user above
each question. If the information can not be found in the information
provided by the user you truthfully say "I don't know".
"""

In [26]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

llm = OpenAI(model="gpt-4o-mini", temperature=0)

message = """
Answer this question using the provided context only.
{question}
Context:
{context}
"""

prompt = ChatPromptTemplate.from_messages([("human", augmented_query)])

rag_chain = {"context": primer, "question": RunnablePassthrough()} | prompt | llm

response = rag_chain.invoke("give me a recipe with chicken and spaghetti")

print(response.content)

# res = llm.invoke(
#     messages=[
#         {"role": "system", "content": primer},
#         {"role": "user", "content": augmented_query}
#     ]
# )


TypeError: Expected a Runnable, callable or dict.Instead got an unsupported type: <class 'str'>

In [None]:
res

In [None]:
response_content = res.choices[0].message.content
display(Markdown(response_content))

In [50]:
# Step 9: Compare Response with Non-Augmented Query
res_non_augmented = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": primer},
        {"role": "user", "content": query}
    ]
)



In [None]:
response_content = res.choices[0].message.content
display(Markdown(response_content))