<a href="https://colab.research.google.com/github/WasifAli22/Agentic-AI/blob/main/Rag_with_Langchain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# Install or upgrade the necessary libraries for integrating LangChain with Pinecone and Google Generative AI

%pip install -qU langchain-pinecone langchain-google-genai

In [None]:
# Import user-specific secrets from Google Colab and initialize the Pinecone client
from google.colab import userdata
from pinecone import Pinecone, ServerlessSpec

# Retrieves the Pinecone API key securely using Colab's userdata utility
pinecone_api_key = userdata.get('PINECONE_API_KEY')

# Sets up the Pinecone client instance with the provided API key
pc = Pinecone(api_key=pinecone_api_key)

In [None]:
import time
# Create a Pinecone index for the RAG system with 768-dim embeddings and cosine similarity

index_name = "rags-system"  # Change to a unique name
pc.create_index(
    name=index_name,
    dimension=768,
    metric="cosine",
    spec=ServerlessSpec(cloud="aws", region="us-east-1"),
)

index = pc.Index(index_name)

In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import os

# Set up Google Generative AI embeddings using API key from Colab
os.environ["GOOGLE_API_KEY"] = userdata.get("GOOGLE_API_KEY")

embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")


In [None]:
# Generate embedding vector for a sample query and preview first 5 values
vector = embeddings.embed_query("We are building a rag system!")
vector[:5]

[-0.008313032798469067,
 -0.017420252785086632,
 -0.024946659803390503,
 -0.004834046121686697,
 -0.003315989626571536]

In [None]:
# Initialize Pinecone vector store with the index and embedding model

from langchain_pinecone import PineconeVectorStore

vector_store = PineconeVectorStore(index=index, embedding=embeddings)

In [None]:
from uuid import uuid4

from langchain_core.documents import Document
# Create a list of sample documents with metadata from various sources (tweets, news, websites)

document_1 = Document(
    page_content="I had chocalate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]


In [1]:
# Generate unique IDs and add documents to the vector store

uuids = [str(uuid4()) for _ in range(len(documents))]
vector_store.add_documents(documents=documents, ids=uuids)

In [None]:
# Perform similarity search for a query with filter on 'tweet' source, returning top 2 results

results = vector_store.similarity_search(
    "LangChain provides abstractions to make working with LLMs easy",
    k=2,
    filter={"source": "tweet"},
)
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

* LangGraph is the best framework for building stateful, agentic applications! [{'source': 'tweet'}]
* Building an exciting new project with LangChain - come check it out! [{'source': 'tweet'}]


In [None]:
# Run similarity search with score for a weather-related query, filtered by 'news' source

results = vector_store.similarity_search_with_score(
    "Will it be hot tomorrow?", k=1, filter={"source": "news"}
)
for res, score in results:
    print(f"* [SIM={score:3f}] {res.page_content} [{res.metadata}]")

* [SIM=0.639318] The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees. [{'source': 'news'}]


In [2]:
# Initialize Gemini LLM with specific parameters (zero temperature, retry logic, etc.)

from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash-001",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    # other params...
)

In [None]:
# Function to generate an answer using similarity search and Gemini LLM

def answer_to_user(query:str):
  vector_results = vector_store.similarity_search(query,k=2)
print(len(vector_results))
final_answer = llm.invoke(f"Answer this user query : {query}, Here are some references to the answer : {vector_results}")
ChatGoogleGenAI(results,query)
retun final_answer

In [None]:
# Run the custom function with a sample query and store the generated answer
answer = answer_to_user("LangChain provides abstractions to make working with LLMs easy")

In [None]:
answer.content