In [1]:
from langchain_core.prompts.chat import ChatPromptTemplate
from langchain_ollama.chat_models import ChatOllama
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_ollama.embeddings import OllamaEmbeddings
from langchain_core.runnables import RunnablePassthrough

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
loader = WebBaseLoader("https://en.wikipedia.org/wiki/Jon_Jones")
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(docs)
print(f"Número de chunks: {len(chunks)}")

Número de chunks: 179


In [3]:
# Modelos
embeddings = OllamaEmbeddings(model="nomic-embed-text")
llm = ChatOllama(model="llama3.2", temperature=0.001)

In [4]:
# Persiste vetores no banco de dados local
Chroma.from_documents(
    documents=chunks,
    embedding=embeddings,
    persist_directory="./data",
)

<langchain_community.vectorstores.chroma.Chroma at 0x74fed5352ce0>

In [5]:
vectorstore = Chroma(persist_directory="./data", embedding_function=embeddings)
retriever = vectorstore.as_retriever()

  vectorstore = Chroma(persist_directory="./data", embedding_function=embeddings)


In [6]:
# Testando o retriever
retriever.invoke("Where was Jon Jones born?")

[Document(metadata={'language': 'en', 'source': 'https://en.wikipedia.org/wiki/Jon_Jones', 'title': 'Jon Jones - Wikipedia'}, page_content='Retrieved from "https://en.wikipedia.org/w/index.php?title=Jon_Jones&oldid=1259995859"'),
 Document(metadata={'language': 'en', 'source': 'https://en.wikipedia.org/wiki/Jon_Jones', 'title': 'Jon Jones - Wikipedia'}, page_content='Early life\nJones was born on July 19, 1987, in Rochester, New York.[32] His father Arthur is a pastor at Mount Sinai Church of God in Christ in Binghamton, New York.[33] Arthur discouraged Jon\'s fighting career, "I wanted him to preach. I tried to discourage him from being a fighter. I told him you don\'t want to do that. You can do other things. Be a pastor."[33] Jon\'s mother, Camille, died in 2017 at the age of 55, after a long battle with diabetes.[34]\nJon was one of four children. His older brother, Arthur, is a former American football defensive lineman who played for the Baltimore Ravens, Indianapolis Colts and W

In [7]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are a helpful AI assistant that answers the user's question based only on the context below:
            <context>
                {context}
            </context>

            Answer the user's questions considering the above context only.
            """,
        ),
        ("human", "{question}"),
    ]
)

prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template="You are a helpful AI assistant that answers the user's question based only on the context below:\n            <context>\n                {context}\n            </context>\n\n            Answer the user's questions considering the above context only.\n            "), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, template='{question}'), additional_kwargs={})])

In [8]:
chain = (
    {"question": RunnablePassthrough(), "context": retriever} | prompt | llm | StrOutputParser()
)

In [10]:
stream = chain.stream("Did Jon Jones win the fight against Stepe Miotic?")

for chunk in stream:
    print(chunk, end="", flush=True)
print()

According to the provided context, yes, Jon Jones won his fight against Stipe Miocic. He won the bout by technical knockout via a spinning back kick followed by punches in the third round at UFC 309 on November 16, 2024.


In [12]:
def prompt_printer(prompt):
    print(prompt)

    return prompt

# Debuggin chain
chain = (
    {"question": RunnablePassthrough(), "context": retriever}
    | prompt
    | prompt_printer
    | llm
    | StrOutputParser()
)

answer = chain.invoke("Did Jon Jones win the fight against Stepe Miotic?")
print(answer)

messages=[SystemMessage(content='You are a helpful AI assistant that answers the user\'s question based only on the context below:\n            <context>\n                [Document(metadata={\'language\': \'en\', \'source\': \'https://en.wikipedia.org/wiki/Jon_Jones\', \'title\': \'Jon Jones - Wikipedia\'}, page_content=\'^ "Jon Jones defeats Dominick Reyes". mmadecisions.com. February 8, 2020. Archived from the original on May 2, 2021. Retrieved February 9, 2020.\\n\\n^ "Jon Jones Sets UFC Record With Win Over Dominick Reyes". Forbes.com. February 9, 2020. Archived from the original on May 18, 2020. Retrieved February 9, 2020.\\n\\n^ Bohn, Mike (January 14, 2023). "Francis Ngannou released; Jon Jones returns, meets Ciryl Gane for newly vacated heavyweight title at UFC 285". mmajunkie.usatoday.com. Archived from the original on June 2, 2023. Retrieved January 14, 2023.\\n\\n^ Lelinwalla, Mark (March 5, 2023). "UFC 285: Jon Jones delivers first-round submission of Ciryl Gane to become h