In [None]:
import os
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain_community.vectorstores import FAISS
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [11]:
import google.generativeai as genai
from google.colab import userdata

In [12]:
api_key = userdata.get('GOOGLE_API_KEY')
genai.configure(api_key=api_key)


In [13]:
# Load the text file
file_path = "/content/paul_graham_essay.txt"
loader = TextLoader(file_path)
documents = loader.load()

# Split the text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)

In [14]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001",google_api_key=api_key)

In [15]:
vector_store = FAISS.from_documents(texts, embeddings).as_retriever(search_type = 'similarity', search_kwargs={"k":10})

In [16]:
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", google_api_key=api_key, temperature=0, max_tokens=500, request_options={"timeout":5000})


In [17]:
system_prompt = (
    "You arw an assistant for question-answering tasks. "
    "Use the following pieces of context to answer the question at the end."
    " If you don't know the answer, just "
    "say that you don't know, don't try to make up an answer. Use three sentences maximum and keep"
    " the answer as concise as possible."
    "Always say 'thanks for asking!' at the end of the answer"
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human",  "{input}"),
    ]
)

In [18]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(vector_store, question_answer_chain)

In [26]:
# question = "What is the main topic of the document?"
question = "Who was Sam Altman?"
response = rag_chain.invoke({"input": question})
print(response["answer"], end="\n\n")

Sam Altman was recruited to be the president of Y Combinator, succeeding the founders.  He initially declined the offer, wanting to start a nuclear reactor company. However, he eventually accepted and took over in the winter of 2014. Thanks for asking!

