<a href="https://colab.research.google.com/github/hhnafis/PIAIC_LangChain_RAG_Project/blob/main/PIAIC_LangChain_RAG_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install -Uq langchain-pinecone langchain-google-genai

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.7/41.7 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m14.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m427.3/427.3 kB[0m [31m17.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.5/87.5 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.3/50.3 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25h

#Setup Environment Variables

In [None]:
from pinecone import Pinecone, ServerlessSpec
from google.colab import  userdata
pinecone_api_key = userdata.get("PINECONE_API_KEY")
pc = Pinecone(api_key=pinecone_api_key)

#Initializing pinecone(creating index)

In [None]:
import time

index_name = "langchain-rag-project"

pc.create_index(
    name=index_name,
    dimension=768,
    metric="cosine",
    spec=ServerlessSpec(cloud="aws", region="us-east-1"),
)

index = pc.Index(index_name)

#Use langchain for RAG Workflow
###Use Google Gemini embeddings to vectorize a document

In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import os
os.environ['GOOGLE_API_KEY'] = userdata.get("GOOGLE_API_KEY")
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

#Setup the document loader

In [None]:
# Installing the necessary packages
!pip install -Uq langchain-community
!pip install -Uq pypdf
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# # Setup the document loader
# file_path = '/content/AI.pdf'
# loader = PyPDFLoader(file_path) #Change file path if needed
# documents = loader.load()

# # Setup the splitter
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
# docs = text_splitter.split_documents(documents)


#---------------------------Adding more than one documents--------------------#
file_paths = ["/content/AI.pdf", "/content/AI_applications.pdf", "/content/AI_and_its_scope_in_academia.pdf"]
all_docs = []

for file_path in file_paths:
    loader = PyPDFLoader(file_path)
    documents = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
    docs = text_splitter.split_documents(documents)

    for doc in docs:
        doc.metadata["source"] = file_path  # Store the file path in metadata

    all_docs.extend(docs)

docs = all_docs





#Embed and store documents in pinecone

In [None]:
from langchain_pinecone import PineconeVectorStore
vector_store = PineconeVectorStore(index=index,embedding=embeddings)

from uuid import uuid4
from langchain_core.documents import Document

# Iterating through the split documents and creating individual Document objects
for i, doc in enumerate(docs):
    document = Document(page_content=doc.page_content, metadata={"source": doc.metadata["source"]})
    uuid = str(uuid4())  # Generated a unique ID for each document
    vector_store.add_documents(documents=[document], ids=[uuid]) # Added document to Pinecone

#Set Up Retriever

In [None]:
import textwrap
results = vector_store.similarity_search(
    "What is  AI? and what is the role of AI in academia?",
    k=3,
    # filter={"source": doc.metadata["source"]},
)
for res in results:
    wrapped_content = textwrap.fill(res.page_content,width=150)

    print("-" * 30)
    print(wrapped_content)
    print("\n")
    print(res.metadata)

------------------------------
working to develop systems that can help students to choose  majors based on areas where they succeed and struggle. While  students don’t have to take
the advice, it could mark a  brave  new world of college major selection for future students.    3.1.6. It is altering how we find and interact with
information  We rarely even notice the AI systems that affect the  information we see and find on a daily basis. Google adapts  results to u sers
based on location, Amazon makes  recommendations based on previous purchases, Siri adapts to  your needs and commands, and nearly all web ads are
geared  toward your interests and shopping preferences.   These kinds of intelligent systems play a big role in how we  interact with information in
our personal and professional  lives, and could just change how we find and use information  in schools and academia as well. Over the past few
decades,  AI-based systems have already radically changed how we


{'source': '/cont

#Setup the Google Gemini Flash model

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0.2,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    verbose=True,
)

#Now combining both the retriever and LLM to make a complete RAG System

In [None]:
def answer_to_user(query: str):
    # Vector Search
    vector_results = vector_store.similarity_search(query, k=1)


    # Pass the model vector search + user query
    final_answer = llm.invoke(
        f"ANSWER THIS USER QUERY: {query}, Here are some references {vector_results}"
    )


    return final_answer

#Testing the RAG system

In [None]:
answer = answer_to_user("What is Artificial Intelligence?")
answer.content

'Based on the provided text, Artificial Intelligence (AI) is a field that uses advanced techniques to solve real-life problems.  It encompasses subfields like Machine Learning (ML) and Deep Learning (DL), with ML being a subset of AI and DL being a subset of ML.  One example of an AI application mentioned is Natural Language Processing (NLP), which focuses on computer-human communication (like Google Translate).'

Note: By comparing the output of this RAG system with that of a simple retriever, we can observe the efficiency with which the RAG system produces the output.

#Deployment of the RAG system as an API

In [None]:
#Complete RAG system deployment using FastAPI
!pip install -Uq fastapi uvicorn
#importing necessary libraries
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel

from langchain_core.documents import Document

# Initialize FastAPI app
app = FastAPI(title="RAG API", description="Retrieval Augmented Generation API")

def answer_to_user(query: str):
    vector_results = vector_store.similarity_search(query, k=1)

    context_content = vector_results[0].page_content if vector_results else ""
    prompt = f"""
    Question:
    {query}

    Context:
    {context_content}


    Response (Generated by Gemini):

    """
    final_answer = llm.invoke(prompt)

    return {"question":query, "answer":final_answer.content}


class Query(BaseModel):
    query: str
#API end point
@app.post("/query")
async def query_rag(query_data: Query):
    try:
        answer = answer_to_user(query_data.query)
        response = {
            "Question": query_data.query,
            "Response (Generated by Gemini)": answer.content,
        }
        return response
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error processing query: {str(e)}")

In [None]:
test_query = "What were the major milestones in AI development during the 20th century?"
test_answer = answer_to_user(test_query)

# Print the question and answer in the desired format
print(f"Question:\n{test_answer['question']}\n\nResponse (Generated by Gemini):\n\"{test_answer['answer']}\"")

Question:
What were the major milestones in AI development during the 20th century?

Response (Generated by Gemini):
"The provided context mentions a resurgence of AI in the 2010s but lacks detail on 20th-century milestones.  To answer the question, we need to look beyond the given text.  Here are some major milestones in AI development during the 20th century:

* **1950s:**
    * **Alan Turing's "Computing Machinery and Intelligence" (1950):**  This paper proposed the Turing Test, a benchmark for machine intelligence.
    * **Dartmouth Workshop (1956):**  Widely considered the birth of AI as a field.  Researchers coined the term "artificial intelligence" and laid out the ambitious goals of the field.  Early programs like the Logic Theorist and the General Problem Solver were developed.

* **1960s:**
    * **Development of early expert systems:** Programs designed to mimic the decision-making of human experts in specific domains (e.g., medical diagnosis).
    * **ELIZA (1966):** A natu

##Ended!!!!!