In [1]:
import os
import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain import hub
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from dotenv import load_dotenv

USER_AGENT environment variable not set, consider setting it to identify your requests.


# Database part

In [15]:
loader = WebBaseLoader(
    web_paths=("https://cheetah.org/learn/about-cheetahs/",),
#     bs_kwargs=dict(
#         parse_only=bs4.SoupStrainer(
#             class_=("post-content", "post-title", "post-header")
#         )
#    ),
)
blog_docs = loader.load()

In [16]:
blog_docs

[Document(metadata={'source': 'https://cheetah.org/learn/about-cheetahs/', 'title': 'About Cheetahs • Cheetah Facts • Cheetah Conservation Fund •', 'description': "Learn about cheetahs from Cheetah Conservation Fund. CCF has the world's leading experts on cheetahs including our founder Dr. Laurie Marker.", 'language': 'en'}, page_content="\n\n\n\n\n\n\n\n\nAbout Cheetahs • Cheetah Facts • Cheetah Conservation Fund •\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nSkip Header\n\n\n\n\n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nDonate\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n\n\n\nSearch\n\n\n\n                Search\n            \n\n\n\n \n\n\nAbout UsWhat We DoConservationResearchEducationInternational Cheetah DayWho We AreCCF’s MissionDr. Laurie MarkerOur Field Conservation CentresStaffGovernanceCCF GlobalGet InvolvedWays to GiveCCF EventsVolunteerVisit CCFCorporate GivingLearnResource Li

In [17]:
# Spliting 
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=50,
    chunk_overlap=10)

# Make splits
splits = text_splitter.split_documents(blog_docs)

# Index
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
from langchain_community.vectorstores import Chroma
vectorstore = Chroma.from_documents(documents=splits,
                                    embedding=embeddings)

create a retriever

In [18]:
retriever = vectorstore.as_retriever()

# HyDE

In [19]:
template = """Please write a scientific paper passage to answer the question
Question: {question}
Passage:"""
prompt_hyde = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser
from langchain_groq import ChatGroq

generate_docs_for_retrieval = (
    prompt_hyde | ChatGroq(temperature=0,api_key=os.getenv('groq_api_key')) | StrOutputParser() 
)

# Run
question = "Why cheetahs are flexible?"
generate_docs_for_retrieval.invoke({"question":question})

"Cheetahs (Acinonyx jubatus) are known for their remarkable agility and hunting prowess, which are largely attributed to their exceptional flexibility. This flexibility is a result of several adaptations that have evolved in cheetahs over time to meet the demands of their specialized hunting niche.\n\nFirstly, cheetahs have a highly flexible spine, which allows them to make quick and precise movements when hunting. The spine of a cheetah is made up of 24 vertebrae, which is more than that of other big cats. This increased number of vertebrae provides cheetahs with a greater range of motion in their backs, enabling them to make sharp turns and adjust their body position rapidly while in pursuit of prey.\n\nSecondly, cheetahs have a unique shoulder and hip structure that contributes to their flexibility. The shoulder blades of a cheetah are not attached to the sternum, allowing for greater mobility and range of motion in the forelimbs. Similarly, the cheetah's pelvis is also flexible, en

In [20]:

# Retrieve
retrieval_chain = generate_docs_for_retrieval | retriever 
retireved_docs = retrieval_chain.invoke({"question":question})
retireved_docs

[Document(metadata={'description': "Learn about cheetahs from Cheetah Conservation Fund. CCF has the world's leading experts on cheetahs including our founder Dr. Laurie Marker.", 'language': 'en', 'source': 'https://cheetah.org/learn/about-cheetahs/', 'title': 'About Cheetahs • Cheetah Facts • Cheetah Conservation Fund •'}, page_content='The flexibility of the cheetah’s spine is unique. The cheetah’s long muscular tail works like a rudder, stabilizing, and acting as a counterbalance to its body weight. Swinging the tail back'),
 Document(metadata={'description': "Learn about cheetahs from Cheetah Conservation Fund. CCF has the world's leading experts on cheetahs including our founder Dr. Laurie Marker.", 'language': 'en', 'source': 'https://cheetah.org/learn/about-cheetahs/', 'title': 'About Cheetahs • Cheetah Facts • Cheetah Conservation Fund •'}, page_content='With its long legs and very slender body, the cheetah is quite different from all other cats and is the only member of its g

In [21]:

# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    prompt
    | ChatGroq(temperature=0,api_key=os.getenv('groq_api_key'))
    | StrOutputParser()
)

final_rag_chain.invoke({"context":retireved_docs,"question":question})

"Cheetahs are flexible due to the flexibility of their spine, which is unique among big cats. This flexibility allows them to achieve high speeds and make sudden sharp turns during chases, thanks to the ability to swing their long muscular tail back and forth, acting as a rudder and counterbalance to their body weight. Additionally, the cheetah's shoulder blade does not attach to the collar bone, providing even more mobility and contributing to their exceptional agility."