<a href="https://colab.research.google.com/github/SyedBilalDeveloper/AgenticAI-Journey/blob/main/01%20-%20Langchan/01_RAG_with_Pinecone_API.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Step 1: Install Required Libraries
%pip install -qU langchain-pinecone langchain-google-genai

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/41.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.7/41.7 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m24.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m412.7/412.7 kB[0m [31m21.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m427.3/427.3 kB[0m [31m21.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.5/87.5 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.3/50.3 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# Step 2: Import Required Modules and Setup Pinecone
from google.colab import userdata
from pinecone import Pinecone, ServerlessSpec

In [4]:
# Fetch the Pinecone API Key securely from user data
PINECONE_API_KEY = userdata.get('PINECONE_API_KEY')

In [5]:
# Initialize Pinecone client with the API key
pc = Pinecone(api_key=PINECONE_API_KEY)

In [10]:
# Create a Pinecone Index
index_name = "rag-project"
pc.create_index(
    name=index_name,
    dimension=768,
    metric="cosine",
    spec=ServerlessSpec(cloud="aws", region="us-east-1"),
)


In [11]:
# Connect to the created index
index = pc.Index(index_name)


In [12]:
# Step 3: Setup Google Generative AI Embeddings
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import os

In [14]:
# Fetch the Google API Key securely from user data
os.environ["GOOGLE_API_KEY"] = userdata.get('GOOGLE_API_KEY')


In [15]:
# Initialize the embedding model
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [17]:
# Step 4: Convert a Text Query into an Embedding Vector
vector = embeddings.embed_query("we are building rag text")
print("Embedding Vector (First 5 values):", vector[:5])
# Display the first 5 values of the vector


Embedding Vector (First 5 values): [0.04691950976848602, -0.019704652950167656, -0.05014841631054878, -0.03578326478600502, 0.02460779808461666]


In [18]:
# Step 5: Initialize Pinecone Vector Store
from langchain_pinecone import PineconeVectorStore
vector_store = PineconeVectorStore(index=index, embedding=embeddings)

In [20]:
# Step 6: Create Sample Documents
from langchain_core.documents import Document
documents = [
    Document(page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.", metadata={"source": "news"}),
    Document(page_content="Building an exciting new project with LangChain - come check it out!", metadata={"source": "tweet"}),
    Document(page_content="Robbers broke into the city bank and stole $1 million in cash.", metadata={"source": "news"}),
    Document(page_content="Wow! That was an amazing movie. I can't wait to see it again.", metadata={"source": "tweet"}),
    Document(page_content="Is the new iPhone worth the price? Read this review to find out.", metadata={"source": "website"}),
    Document(page_content="The top 10 soccer players in the world right now.", metadata={"source": "website"}),
]


In [21]:
# Store documents in Pinecone
from uuid import uuid4
uuids = [str(uuid4()) for _ in range(len(documents))]
vector_store.add_documents(documents=documents, ids=uuids)


['6179f478-0d74-4689-8983-c40611240c1b',
 'b60f98c6-75db-47fc-922f-e50dbc3f2799',
 'b222c487-726b-469e-a6b7-c89c4971305a',
 '8b527bb8-bbd7-471a-b6f7-3851e86e4999',
 '3940f2f4-24a5-4ba1-b6d9-513eff497aca',
 '63677ac2-f92f-4d74-b9c7-7c3cffb18bd3']

In [22]:
# Step 7: Retrieve Relevant Documents Using Similarity Search
results = vector_store.similarity_search(
    "What is the weather forecast for tomorrow?",
    k=2,
    filter={"source": "news"},
)

In [23]:
# Print the retrieved documents
print("\nRetrieved Documents Based on Query:")
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")


Retrieved Documents Based on Query:


In [26]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    # other params...
)

In [27]:
# Step 8: Generate an Answer to a Query
def answer_to_user(query: str):
    vector_results = vector_store.similarity_search(query, k=2)  # Get similar docs
    # Assuming 'model' is pre-trained for response generation (not shown here)
    final_answer = llm.invoke(f"ANSWER THIS QUERY:{query}, HERE ARE SOME REFERENCES ANSWER{vector_results}")

    # Placeholder for the generated answer
    return final_answer


In [29]:
# Example Input: Query to generate an answer
response = answer_to_user("What is the weather like tomorrow?")
print("\nGenerated Answer:", response.content)


Generated Answer: The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.


In [30]:
# Example Input: Query to generate an answer
response = answer_to_user("What is the weather?")
print("\nGenerated Answer:", response.content)


Generated Answer: Based on the provided text, the weather forecast for tomorrow is cloudy and overcast with a high of 62 degrees.
