<a href="https://colab.research.google.com/github/algorise/agentic-ai/blob/main/rag_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install langchain pinecone-client google-generativeai openai tqdm

In [None]:
pip install dotenv

In [None]:
import os
from google.colab import userdata
from dotenv import load_dotenv

load_dotenv()

os.environ["GOOGLE_API_KEY"] = userdata.getenv("GOOGLE_API_KEY")
os.environ["PINECONE_API_KEY"] = userdata.getenv("PINECONE_API_KEY")

In [None]:
from pinecone import Pinecone, ServerlessSpec

# Initialize Pinecone
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

# Create a new index or connect to an existing one
index_name = "online-rag-project"

pc.create_index(index_name,
                dimension=768,
                metric="cosine",
                spec = ServerlessSpec(cloud="aws", region="us-east-1"))

# Connect to the index
index = pc.Index(index_name)

In [None]:
from langchain.embeddings import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(api_key=os.getenv("GOOGLE_API_KEY"))

In [None]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Load documents
loader = TextLoader("documents.txt")  # Replace with your file
documents = loader.load()

# Split documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = text_splitter.split_documents(documents)

In [None]:
from tqdm import tqdm

# Create embeddings and upload to Pinecone
for doc in tqdm(docs):
    vector = embeddings.embed_query(doc.page_content)
    index.upsert([(doc.metadata["source"], vector, doc.page_content)])

In [None]:
from langchain.chat_models import GoogleGeminiFlash

gemini_model = GoogleGeminiFlash(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.7)

In [None]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm=gemini_model,
    chain_type="stuff",  # Other options: "map_reduce", "refine"
    retriever=retriever
)

In [None]:
query = "What is the history of artificial intelligence?"
response = qa_chain.run(query)
print(response)