In [11]:
import os
import configparser
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.documents import Document
from langchain_core.runnables import RunnableLambda
from langchain_chroma import Chroma
from langchain_ollama import OllamaEmbeddings
from langchain_ollama import ChatOllama

In [12]:
os.environ["USER_AGENT"] = "llm-retriever-and-tavily/0.1"

In [13]:
# Read the configuration file
config = configparser.ConfigParser()
config.read('config.ini')

# Set the TAVILY_API_KEY environment variable
os.environ["TAVILY_API_KEY"] = config['DEFAULT']['TAVILY_API_KEY']

print(os.environ["TAVILY_API_KEY"][:5] + "..." + os.environ["TAVILY_API_KEY"][-5:])

tvly-...yzpuu


In [14]:
# Embeddings
embeddings = OllamaEmbeddings(
    model="nomic-embed-text",
)

In [15]:
# DB
db = Chroma(
    persist_directory="data/chroma_db", 
    embedding_function=embeddings,
    collection_metadata={"hnsw:space": "cosine"}
)

In [16]:
tavily_retriever = TavilySearchResults(k=3)

In [17]:
# Custom retriever function with similarity score filtering
def hybrid_fallback_retriever(query, threshold=0.7, threshold_tavily=0.3, k=5):
    results = db.similarity_search_with_score(query, k=k)
    for doc, score in results:
        print(f"[DEBUG] [hybrid_fallback_retriever] Chroma Doc: {doc}, Score: {score}")
    chroma_docs = [doc for doc, score in results if score < threshold]
    
    if chroma_docs:
        return chroma_docs
    else:
        print(f"[DEBUG] [hybrid_fallback_retriever] There is no match in Chroma DB, processing with Tavily...")
        tavily_docs = tavily_retriever.invoke(query)
        for doc in tavily_docs:
            print(f"[DEBUG] [hybrid_fallback_retriever] Tavily Doc: {doc}, Score: {doc['score']}")
        tavily_docs = [Document(page_content=doc["content"]) for doc in tavily_docs if doc["score"] >= threshold_tavily]
        return tavily_docs

# Wrap it as a RunnableLambda
retriever = RunnableLambda(lambda query: hybrid_fallback_retriever(query, k=5))

In [18]:
llm = ChatOllama(
    name="back_agent", 
    model="krith/meta-llama-3.1-8b-instruct:IQ2_M", 
    # model="phi3:3.8b", 
    # model="llama3.2:1b", 
    temperature=0.3
)

prompt = """
You're a storyteller with a sharp eye for detail.
You're given a question and a context.
Only use what's in the context—no guesses, no outside knowledge.

Question: {question}

Context: {context}

Answer:
"""

rag_prompt = ChatPromptTemplate.from_messages({'system_message', prompt})

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | rag_prompt
    | llm
    | StrOutputParser()
)

In [19]:
# Test 1: General knowledge
response = rag_chain.invoke("Who did invent the winrar?")
print("\n\n") # Print 2 new lines
print(response)

[DEBUG] [hybrid_fallback_retriever] Chroma Doc: page_content='Herder, the blind German mechanic, who constructed it to the order of the late Professor Moriarty. For years I have been aware of its existence, though I have never' metadata={'author': '', 'creationdate': '2014-03-15T14:16:40+01:00', 'creator': '', 'keywords': '', 'moddate': '2014-03-15T14:16:40+01:00', 'page': 429, 'page_label': '424', 'producer': '', 'ptex.fullbanner': 'This is pdfTeX, Version 3.1415926-2.5-1.40.14 (TeX Live 2013/MacPorts 2013_5) kpathsea version 6.1.1', 'source': 'data/document\\The Complete Sherlock Holmes.pdf', 'subject': '', 'title': '', 'total_pages': 987, 'trapped': '/False'}, Score: 0.8082188367843628
[DEBUG] [hybrid_fallback_retriever] Chroma Doc: page_content='second.  Kayaba — Akihiko!!  I knew that name. There was no way I didn't.  This person, both a game designer and genius in the field of' metadata={'author': 'Kadir', 'creationdate': '2016-09-09T14:43:10+02:00', 'creator': 'Microsoft® Word 2

In [20]:
# Test 2: General knowledge
response = rag_chain.invoke("What is the color of the sky?")
print("\n\n") # Print 2 new lines
print(response)

[DEBUG] [hybrid_fallback_retriever] Chroma Doc: page_content='But no help was forthcoming, however long they waited. On some  days the sky outside was not a crystal blue but covered with grey' metadata={'author': 'Kadir', 'creationdate': '2016-09-09T14:43:10+02:00', 'creator': 'Microsoft® Word 2013', 'moddate': '2016-09-09T14:43:10+02:00', 'page': 53, 'page_label': '54', 'producer': 'Microsoft® Word 2013', 'source': 'data/document\\Sword Art Online - Volume 01 - Aincrad.pdf', 'total_pages': 293}, Score: 0.66416335105896
[DEBUG] [hybrid_fallback_retriever] Chroma Doc: page_content='shoulder.  The clouds drifted by. Then the stars began to appear one by one,  twinkling in the evening sky.  We gazed on as the world changed its colors bit by bit.' metadata={'author': 'Kadir', 'creationdate': '2016-09-09T14:43:10+02:00', 'creator': 'Microsoft® Word 2013', 'moddate': '2016-09-09T14:43:10+02:00', 'page': 266, 'page_label': '267', 'producer': 'Microsoft® Word 2013', 'source': 'data/document\\S

In [21]:
# Test 2: Scientific knowledge
response = rag_chain.invoke("How much is the speed of light?")
print("\n\n") # Print 2 new lines
print(response)

[DEBUG] [hybrid_fallback_retriever] Chroma Doc: page_content='the black loom of the craggy hills around us, and the yellow speck of light burning steadily in front. There is nothing so deceptive as the distance of a light' metadata={'author': '', 'creationdate': '2014-03-15T14:16:40+01:00', 'creator': '', 'keywords': '', 'moddate': '2014-03-15T14:16:40+01:00', 'page': 630, 'page_label': '625', 'producer': '', 'ptex.fullbanner': 'This is pdfTeX, Version 3.1415926-2.5-1.40.14 (TeX Live 2013/MacPorts 2013_5) kpathsea version 6.1.1', 'source': 'data/document\\The Complete Sherlock Holmes.pdf', 'subject': '', 'title': '', 'total_pages': 987, 'trapped': '/False'}, Score: 0.8389956951141357
[DEBUG] [hybrid_fallback_retriever] Chroma Doc: page_content='properly if it reached ninety because he’d be completely out of breath. He didn’t have much time. If I can get closer to it faster… Speed. Speed was his problem. BOOM! BOOM! BOOM!' metadata={'creationdate': '', 'creator': 'PyPDF', 'page': 166, '

In [22]:
# Test 3: Scientific knowledge (analytical)
# The AI needs to understand "density"
# The AI needs to understand unit conversion (from cm3 to liters)

response = rag_chain.invoke("An XYZ oil has a density of 1.9 g/cm3. What is the mass of 2 liters of the oil?")
print("\n\n") # Print 2 new lines
print(response)

[DEBUG] [hybrid_fallback_retriever] Chroma Doc: page_content='the one case a ﬁre, in the other a lamp. The ﬁre was needed, but the lamp was lit—as a comparison of the oil consumed will show—long after it was broad day-' metadata={'author': '', 'creationdate': '2014-03-15T14:16:40+01:00', 'creator': '', 'keywords': '', 'moddate': '2014-03-15T14:16:40+01:00', 'page': 836, 'page_label': '831', 'producer': '', 'ptex.fullbanner': 'This is pdfTeX, Version 3.1415926-2.5-1.40.14 (TeX Live 2013/MacPorts 2013_5) kpathsea version 6.1.1', 'source': 'data/document\\The Complete Sherlock Holmes.pdf', 'subject': '', 'title': '', 'total_pages': 987, 'trapped': '/False'}, Score: 0.8522309064865112
[DEBUG] [hybrid_fallback_retriever] Chroma Doc: page_content='resulting drop of blood in a chemical pipette. “Now, I add this small quantity of blood to a litre of water. You perceive that the resulting mixture has the' metadata={'author': '', 'creationdate': '2014-03-15T14:16:40+01:00', 'creator': '', 'keywo

In [30]:
# Test 4: Story knowledge (chroma)

response = rag_chain.invoke("What is the main problem in Sword Art Online?")
print("\n\n") # Print 2 new lines
print(response)

[DEBUG] [hybrid_fallback_retriever] Chroma Doc: page_content='bug; it is all part of «Sword Art Online»'s system.』  “Part of... the system?” Klein muttered brokenly.' metadata={'author': 'Kadir', 'creationdate': '2016-09-09T14:43:10+02:00', 'creator': 'Microsoft® Word 2013', 'moddate': '2016-09-09T14:43:10+02:00', 'page': 35, 'page_label': '36', 'producer': 'Microsoft® Word 2013', 'source': 'data/document\\Sword Art Online - Volume 01 - Aincrad.pdf', 'total_pages': 293}, Score: 0.5371670722961426
[DEBUG] [hybrid_fallback_retriever] Chroma Doc: page_content='monotonous voice.  『But I ask all of you to understand that «Sword Art Online» is no  longer a simple game. It is a second reality.... From now on, any' metadata={'author': 'Kadir', 'creationdate': '2016-09-09T14:43:10+02:00', 'creator': 'Microsoft® Word 2013', 'moddate': '2016-09-09T14:43:10+02:00', 'page': 39, 'page_label': '40', 'producer': 'Microsoft® Word 2013', 'source': 'data/document\\Sword Art Online - Volume 01 - Aincrad.p

In [24]:
# Test 5: General knowledge (high resources on internet)
response = rag_chain.invoke("Why did Elon Musk build SpaceX?")
print("\n\n") # Print 2 new lines
print(response)

[DEBUG] [hybrid_fallback_retriever] Chroma Doc: page_content='made entirely of data.  So...  Half a year ago, this machine (which started selling in May, 2022)  successfully created a «Virtual Reality». The electronics company' metadata={'author': 'Kadir', 'creationdate': '2016-09-09T14:43:10+02:00', 'creator': 'Microsoft® Word 2013', 'moddate': '2016-09-09T14:43:10+02:00', 'page': 18, 'page_label': '19', 'producer': 'Microsoft® Word 2013', 'source': 'data/document\\Sword Art Online - Volume 01 - Aincrad.pdf', 'total_pages': 293}, Score: 0.9396781921386719
[DEBUG] [hybrid_fallback_retriever] Chroma Doc: page_content='serve as well.”. “I wanted to hire his steam launch.”. “Why, bless you, sir, it is in the steam launch that he has gone. That’s what puzzles me; for I know' metadata={'author': '', 'creationdate': '2014-03-15T14:16:40+01:00', 'creator': '', 'keywords': '', 'moddate': '2014-03-15T14:16:40+01:00', 'page': 96, 'page_label': '91', 'producer': '', 'ptex.fullbanner': 'This is pd

In [33]:
# Test 6: General knowledge (low resources on internet)
response = rag_chain.invoke("Where did Ryan Garnet Andrianto get his bachelor degree?")
print("\n\n") # Print 2 new lines
print(response)

[DEBUG] [hybrid_fallback_retriever] Chroma Doc: page_content='employment, made him head of a department, and showed his good-will towards him in every way. Signor Castalotte was a bachelor, and I believe that he' metadata={'author': '', 'creationdate': '2014-03-15T14:16:40+01:00', 'creator': '', 'keywords': '', 'moddate': '2014-03-15T14:16:40+01:00', 'page': 787, 'page_label': '782', 'producer': '', 'ptex.fullbanner': 'This is pdfTeX, Version 3.1415926-2.5-1.40.14 (TeX Live 2013/MacPorts 2013_5) kpathsea version 6.1.1', 'source': 'data/document\\The Complete Sherlock Holmes.pdf', 'subject': '', 'title': '', 'total_pages': 987, 'trapped': '/False'}, Score: 0.8723361492156982
[DEBUG] [hybrid_fallback_retriever] Chroma Doc: page_content='was, of course, for the propagation and spread of the red-heads as well as for their maintenance. It is exceedingly unfortunate that you should be a bachelor.’' metadata={'author': '', 'creationdate': '2014-03-15T14:16:40+01:00', 'creator': '', 'keywords'

In [34]:
# Test 7: Story knowledge (chroma)
response = rag_chain.invoke("Who is Harry Potter female friend that is good at magic?")
print("\n\n") # Print 2 new lines
print(response)

[DEBUG] [hybrid_fallback_retriever] Chroma Doc: page_content='girl, I suppose!”. “No, it’s because you’re supposed to be the best at magic!” shot back Ron. Hermione jumped up and bits of roast pike slid oﬀ her tin plate onto the ﬂoor.' metadata={'author': 'J. K. Rowling', 'creationdate': '2007-07-23T04:17:48-07:00', 'creator': 'Dark Miasma', 'keywords': '', 'page': 300, 'page_label': '293', 'producer': 'pdfeTeX-1.21a', 'ptex.fullbanner': 'This is pdfeTeX, Version 3.141592-1.21a-2.2 (Web2C 7.5.4) kpathsea version 3.5.4', 'source': 'data/document\\Harry Potter And The Deathly Hallows.pdf', 'subject': '', 'title': 'Harry Potter and the Deathly Hallows', 'total_pages': 768}, Score: 0.6729204654693604
[DEBUG] [hybrid_fallback_retriever] Chroma Doc: page_content='“A History of Magic, ” said Hermione, looking interested. “So your parents knew her? She was an incredible magic historian.”. “And she’s still alive,” said Harry, “and she lives in Godric’s' metadata={'author': 'J. K. Rowling', 'cre

In [27]:
# Test 8: Story knowledge (chroma)
response = rag_chain.invoke("Who is a friend of Harry that is clumsy?")
print("\n\n") # Print 2 new lines
print(response)

[DEBUG] [hybrid_fallback_retriever] Chroma Doc: page_content='“‘Wit beyond measure is man’s greatest treasure.’ ”. “Which makes you pretty skint, witless,” said a cackling voice. Harry whirled around, slipped oﬀ the plinth, and landed on the' metadata={'author': 'J. K. Rowling', 'creationdate': '2007-07-23T04:17:48-07:00', 'creator': 'Dark Miasma', 'keywords': '', 'page': 595, 'page_label': '588', 'producer': 'pdfeTeX-1.21a', 'ptex.fullbanner': 'This is pdfeTeX, Version 3.141592-1.21a-2.2 (Web2C 7.5.4) kpathsea version 3.5.4', 'source': 'data/document\\Harry Potter And The Deathly Hallows.pdf', 'subject': '', 'title': 'Harry Potter and the Deathly Hallows', 'total_pages': 768}, Score: 0.6284981369972229
[DEBUG] [hybrid_fallback_retriever] Chroma Doc: page_content='who staggered backward and fell into a nearby chair, clutching at the neck of her old tartan dressing gown. “I don’t think it makes any diﬀerence what we call him,” Harry' metadata={'author': 'J. K. Rowling', 'creationdate': 

In [28]:
# Test 8: Story knowledge (chroma)
response = rag_chain.invoke("What is the real problem of Harry Potter?")
print("\n\n") # Print 2 new lines
print(response)

[DEBUG] [hybrid_fallback_retriever] Chroma Doc: page_content='might be blamed for Harry Potter’s continued existence. Voldemort, however, seemed to be speaking more to himself than to any 6' metadata={'author': 'J. K. Rowling', 'creationdate': '2007-07-23T04:17:48-07:00', 'creator': 'Dark Miasma', 'keywords': '', 'page': 13, 'page_label': '6', 'producer': 'pdfeTeX-1.21a', 'ptex.fullbanner': 'This is pdfeTeX, Version 3.141592-1.21a-2.2 (Web2C 7.5.4) kpathsea version 3.5.4', 'source': 'data/document\\Harry Potter And The Deathly Hallows.pdf', 'subject': '', 'title': 'Harry Potter and the Deathly Hallows', 'total_pages': 768}, Score: 0.6736656427383423
[DEBUG] [hybrid_fallback_retriever] Chroma Doc: page_content='many mistakes where Harry Potter is concerned. Some of them have been my own. That Potter lives is due more to my errors than to his triumphs.”' metadata={'author': 'J. K. Rowling', 'creationdate': '2007-07-23T04:17:48-07:00', 'creator': 'Dark Miasma', 'keywords': '', 'page': 13,

In [29]:
# Test 9: Story knowledge (chroma)
response = rag_chain.invoke("How did Harry Potter ended up at Hogwarts?")
print("\n\n") # Print 2 new lines
print(response)

[DEBUG] [hybrid_fallback_retriever] Chroma Doc: page_content='the school!”. “Okay, then,” said Harry, defeated. “Forget Hogwarts.” Without any other leads, they traveled into London and, hidden' metadata={'author': 'J. K. Rowling', 'creationdate': '2007-07-23T04:17:48-07:00', 'creator': 'Dark Miasma', 'keywords': '', 'page': 297, 'page_label': '290', 'producer': 'pdfeTeX-1.21a', 'ptex.fullbanner': 'This is pdfeTeX, Version 3.141592-1.21a-2.2 (Web2C 7.5.4) kpathsea version 3.5.4', 'source': 'data/document\\Harry Potter And The Deathly Hallows.pdf', 'subject': '', 'title': 'Harry Potter and the Deathly Hallows', 'total_pages': 768}, Score: 0.542823314666748
[DEBUG] [hybrid_fallback_retriever] Chroma Doc: page_content='You-Know-Who, it was Hogwarts!”. “Oh, come on,” scoﬀed Ron. “His school?”. “Yeah, his school! It was his ﬁrst real home, the place that' metadata={'author': 'J. K. Rowling', 'creationdate': '2007-07-23T04:17:48-07:00', 'creator': 'Dark Miasma', 'keywords': '', 'page': 296, 