<a href="https://colab.research.google.com/github/UmardrazArshad/Quarter_02_PROJECTS/blob/main/02_LangChain_Rag_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%pip install -qU langchain-pinecone langchain-google-genai

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/41.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.3/41.3 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━[0m [32m0.8/1.2 MB[0m [31m22.3 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.8/244.8 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.4/85.4 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25h

##  **Initialize Pinecone**

In [39]:
#  Initialize Pinecone
from google.colab import userdata

from pinecone import Pinecone, ServerlessSpec

pinecone_api_key = userdata.get('PINECONE_API_KEY')


pc = Pinecone(api_key=pinecone_api_key)

In [41]:
#create index
index_name = "lanchain-rag-project-new"  # change if desired

pc.create_index(
    name=index_name,
    dimension=768,
    metric="cosine",
    spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )

index = pc.Index(index_name)

In [42]:
#Embedding
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import os

os.environ["GOOGLE_API_KEY"] = userdata.get('GOOGLE_API_KEY')

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [43]:
# Vectors
vector = embeddings.embed_query(" Tell me the road map ot learn Agentic Ai.")

In [44]:
vector[:6]

[0.018941514194011688,
 -0.0410316176712513,
 0.002591670723631978,
 -0.023518061265349388,
 0.03384358808398247,
 0.0043175071477890015]

In [45]:
# Vector Store
from langchain_pinecone import PineconeVectorStore

vector_store = PineconeVectorStore(index=index, embedding=embeddings)

## **Set Up Document Loader**

In [46]:
!pip install docx2txt -q
from langchain.document_loaders import TextLoader
from langchain.document_loaders import Docx2txtLoader

# Instead of using TextLoader with an invalid encoding,
# use Docx2txtLoader which is designed for .docx files.
loader = Docx2txtLoader("/content/Agentic_AI_Expert_Roadmap.docx")

documents = loader.load()

In [47]:
# Import the necessary class
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Split documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = text_splitter.split_documents(documents)

## **Embeddings**

In [48]:
from tqdm import tqdm

# Create embeddings and upload to Pinecone
for doc in tqdm(docs):
    vector = embeddings.embed_query(doc.page_content)
    # Change this line to provide a metadata dictionary
    index.upsert([(doc.metadata["source"], vector, {"text": doc.page_content})]) #  Add metadata as a dictionary with key 'text'

100%|██████████| 12/12 [00:04<00:00,  2.94it/s]


##**Set Up Retriever**

In [49]:
from langchain_pinecone import PineconeVectorStore

# Use 'index' instead of 'pinecone_index'
# Initialize PineconeVectorStore with the index and embeddings
retriever = PineconeVectorStore(index=index, embedding=embeddings)

In [50]:
results = vector_store.similarity_search_with_score(
    "Raodmap to achieve agentic ai"
)
for res, score in results:
    print(f"* [SIM={score:3f}] {res.page_content} [{res.metadata}]")

* [SIM=0.556472] 5. **Freelancing Skills**: Effective client communication, proposal writing, and project delivery. [{}]


## **Set Up Google Gemini Flash Model**

In [51]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    # other params...
)

In [69]:
# Combine Retriever and LLM
from re import search
from langchain_pinecone import PineconeVectorStore
from langchain.chains import RetrievalQA

vectorstore = PineconeVectorStore(
    index=index,
    embedding=embeddings
    )

retriever = vectorstore.as_retriever(search_kwargs={"k": 2})

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",  # Other options: "map_reduce", "refine"
    retriever=retriever,
    return_source_documents=True
)

In [63]:
query = "Give me the road map to learn&acheive Agentic AI"
def answer_to_user (query : str):

  # Vector Search
  vector_results = vector_store.similarity_search(query, k=2)
  print (len(vector_results))

  #pass to Model vector Results + User Query
  final_answer = llm.invoke(f"ANSWER THIS UAER QUERY : {query}, Here are some referance to answer {results}")

  return final_answer

## **Query the RAG System**

In [72]:
query = "What kind of steps should i follow to learn agentic ai?"

# Calling answer_to_user with the 'query' string instead of the undefined 'final_answer'
answer = answer_to_user(query)

1


In [73]:
display (f"QUESTION : {query}")
display (f"RESPONSE (Generated by Gemini): {answer}")

'QUESTION : What kind of steps should i follow to learn agentic ai?'

'RESPONSE (Generated by Gemini): content="The provided document fragment only mentions freelancing skills relevant to *working with* Agentic AI, not learning about it.  Therefore, I cannot answer your query using only that reference.  To learn about Agentic AI, you\'ll need to follow a broader learning path.  This would likely involve:\\n\\n1. **Understanding Foundational Concepts:**  Start with a strong grasp of core AI concepts like machine learning, deep learning, reinforcement learning, and natural language processing (NLP).  Online courses (Coursera, edX, Udacity), textbooks, and university-level courses are excellent resources.\\n\\n2. **Focusing on Reinforcement Learning (RL):** Agentic AI heavily relies on RL, which allows agents to learn through trial and error in an environment.  Deep dive into RL algorithms like Q-learning, SARSA, and deep reinforcement learning (DRL) techniques.\\n\\n3. **Exploring Agent Architectures:**  Learn about different agent architectures, such as t