In [10]:
import bs4 # for Scrapping
import os
from dotenv import load_dotenv
import google.generativeai as genai
from langchain_google_genai import ChatGoogleGenerativeAI #Google genai chat models
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [3]:
load_dotenv()
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
llm = ChatGoogleGenerativeAI(model= "gemini-pro") # type: ignore

In [8]:
loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/",
                       bs_kwargs = dict(
                           parse_only = bs4.SoupStrainer(
                               class_= ("post-content", "post-title","post-header")
                           )
                       ),
                    )
docs = loader.load()


In [15]:
text = """What I Worked On

February 2021\n

\nBefore college the two main things I worked on, outside of school, were writing and programming. I didn't write essays. I wrote what beginning writers were supposed to write then, and probably still are: short stories. My stories were awful. They had hardly any plot, just characters with strong feelings, which I imagined made them deep.

The first programs I tried writing were on the IBM 1401 that our school district used for what was then called "data processing." This was in 9th grade, so I was 13 or 14. The school district's 1401 happened to be in the basement of our junior high school, and my friend Rich Draves and I got permission to use it. It was like a mini Bond villain's lair down there, with all these alien-looking machines — CPU, disk drives, printer, card reader — sitting up on a raised floor under bright fluorescent lights.
"""

In [37]:
chars = 'Before college the two main things I worked on, outside of school, were writing and programming. I'
len(chars)


98

In [46]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 100,
    chunk_overlap  = 20,
    length_function = len,
)

In [47]:
texts = text_splitter.split_text(text)

In [48]:
text

'What I Worked On\n\nFebruary 2021\n\nBefore college the two main things I worked on, outside of school, were writing and programming. I didn\'t write essays. I wrote what beginning writers were supposed to write then, and probably still are: short stories. My stories were awful. They had hardly any plot, just characters with strong feelings, which I imagined made them deep.\n\nThe first programs I tried writing were on the IBM 1401 that our school district used for what was then called "data processing." This was in 9th grade, so I was 13 or 14. The school district\'s 1401 happened to be in the basement of our junior high school, and my friend Rich Draves and I got permission to use it. It was like a mini Bond villain\'s lair down there, with all these alien-looking machines — CPU, disk drives, printer, card reader — sitting up on a raised floor under bright fluorescent lights.\n'

In [49]:
texts

['What I Worked On\n\nFebruary 2021',
 'Before college the two main things I worked on, outside of school, were writing and programming. I',
 "and programming. I didn't write essays. I wrote what beginning writers were supposed to write then,",
 'to write then, and probably still are: short stories. My stories were awful. They had hardly any',
 'They had hardly any plot, just characters with strong feelings, which I imagined made them deep.',
 'The first programs I tried writing were on the IBM 1401 that our school district used for what was',
 'used for what was then called "data processing." This was in 9th grade, so I was 13 or 14. The',
 "I was 13 or 14. The school district's 1401 happened to be in the basement of our junior high",
 'of our junior high school, and my friend Rich Draves and I got permission to use it. It was like a',
 "it. It was like a mini Bond villain's lair down there, with all these alien-looking machines — CPU,",
 'machines — CPU, disk drives, printer, card re

In [50]:
texts[0]

'What I Worked On\n\nFebruary 2021'

In [51]:
texts[1] 

'Before college the two main things I worked on, outside of school, were writing and programming. I'

In [52]:
texts[2]

"and programming. I didn't write essays. I wrote what beginning writers were supposed to write then,"

In [35]:
import bs4 # for Scrapping
import os
from dotenv import load_dotenv
import google.generativeai as genai
from langchain_google_genai import ChatGoogleGenerativeAI #Google genai chat models
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough




load_dotenv()
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
llm = ChatGoogleGenerativeAI(model= "gemini-pro") # type: ignore

loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/",
                       bs_kwargs = dict(
                           parse_only = bs4.SoupStrainer(
                               class_= ("post-content", "post-title","post-header")
                           )
                       ),
                    )
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size =1000, chunk_overlap = 200)
splits = text_splitter.split_documents(docs)
embeddings = GoogleGenerativeAIEmbeddings(model = 'models/embedding-001')
vectorstore = Chroma.from_documents(documents = splits, embedding = embeddings)

retriever = vectorstore.as_retriever()

# this is to pull a already existing prompt
prompt = hub.pull("rlm/rag-prompt")

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context":retriever| format_docs, "question":RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [38]:
rag_chain.invoke("What is Decomposition?")

'The provided context does not contain any information about Decomposition, so I cannot answer this question.'

In [34]:
response = rag_chain.invoke("What is Decomposition?")
print(response)

[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))]
