<a href="https://colab.research.google.com/github/fatimakgeneng/LangChain-RAG-Project/blob/main/LangChain_RAG_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [50]:
%pip install -q -U python-dotenv langchain pinecone-client google-generativeai openai tqdm

In [51]:
import os
from dotenv import load_dotenv
from google.colab import userdata

# Retrieve and set the environment variables
os.environ['PINECONE_API_KEY'] = userdata.get('PINECONE_API_KEY')
os.environ['PINECONE_ENVIRONMENT'] = userdata.get('PINECONE_ENVIRONMENT')
os.environ['GOOGLE_API_KEY'] = userdata.get('GOOGLE_API_KEY')

In [52]:
from pinecone import Pinecone
pc = Pinecone(api_key=os.environ['PINECONE_API_KEY'], environment=os.environ['PINECONE_ENVIRONMENT'])

In [53]:
# Connect to an existing index
index_name = "gemini-rag-index"  # Replace with your actual index name
index = pc.Index(index_name)
print(f"Successfully connected to index: {index_name}")

Successfully connected to index: gemini-rag-index


In [54]:
%pip install -q -U langchain-google-genai
from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001",
    google_api_key=os.environ["GOOGLE_API_KEY"])

In [55]:
%pip install -q -U langchain-community
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Load documents
loader = TextLoader("/content/document.txt")  # Replace with your file
documents = loader.load()

# Split documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = text_splitter.split_documents(documents)

In [56]:
from tqdm import tqdm

# Create embeddings and upload to Pinecone
for doc in tqdm(docs):
    vector = embeddings.embed_query(doc.page_content)
    # Pass metadata as a dictionary
    index.upsert([(doc.metadata["source"], vector, {"text": doc.page_content})])

100%|██████████| 7/7 [00:06<00:00,  1.15it/s]


In [57]:
from langchain.vectorstores import Pinecone

retriever = Pinecone.from_existing_index(index_name=index_name, embedding=embeddings, text_key="text")

In [58]:
%pip install -q -U langchain-google-genai
from langchain_google_genai import ChatGoogleGenerativeAI

gemini_model = ChatGoogleGenerativeAI(api_key=os.environ.get("GOOGLE_API_KEY"),model="gemini-1.5-flash", temperature=0.7)

In [63]:
from langchain.chains import RetrievalQA
from langchain.vectorstores.base import VectorStoreRetriever

# Create a VectorStoreRetriever from your Pinecone index
retriever = VectorStoreRetriever(vectorstore=Pinecone.from_existing_index(index_name=index_name, embedding=embeddings, text_key="text"))


qa_chain = RetrievalQA.from_chain_type(
    llm=gemini_model,
    chain_type="stuff",  # Other options: "map_reduce", "refine"
    retriever=retriever)

In [73]:
query = "what happens when you integrate the AI in genetics"
response = qa_chain.run(query)
print(f"Fatima's Question: {query}")
print()
print(f"LLM's Response: {response}")

Fatima's Question: what happens when you integrate the AI in genetics

LLM's Response: Integrating AI into genetics has the potential to improve human health and solve critical challenges.  However, responsible innovation, collaboration, and regulation are crucial for its success.

