In [1]:
from langchain_ollama import OllamaLLM
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
import os

from dotenv import load_dotenv
load_dotenv()

True

### For Reranking

In [2]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
from langchain.retrievers.document_compressors import CrossEncoderReranker

## Select Model

In [3]:
MODEL = "llama3.1:8b-instruct-fp16"
EMBED_MODEL = "Qwen/Qwen3-Embedding-0.6B"
RERANK_MODEL = "BAAI/bge-reranker-base"

In [4]:
# Initialize an instance of the Ollama model
llm = OllamaLLM(model=MODEL, base_url=f"http://{os.getenv("M416_4090")}:11434")

# Invoke the model to generate responses
response = llm.invoke("Who are you?")
print(response)

I'm an artificial intelligence model known as Llama. Llama stands for "Large Language Model Meta AI."


## Hugging face login

In [5]:
from huggingface_hub import login

hf_token = os.getenv("HF_TOKEN")
login(token=hf_token)
!huggingface-cli whoami

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


[1muser: [0m AnsonLau
[1morgs: [0m discord-community,siphonobench


## Read Docs

In [6]:
# We simple split by line here, for advance RAG, will will have different chunking strategy.
with open("../../Docs/Text/cat-facts.txt", "r") as f:
  refs = f.read().splitlines()
refs[:3]

['On average, cats spend 2/3 of every day sleeping. That means a nine-year-old cat has been awake for only three years of its life.',
 'Unlike dogs, cats do not have a sweet tooth. Scientists believe this is due to a mutation in a key taste receptor.',
 'When a cat chases its prey, it keeps its head level. Dogs and humans bob their heads up and down.']

In [7]:
# Create docs using Document datatype from langchain
# Each chunk will have a id and content
from langchain_core.documents import Document

docs = [Document(
    page_content=doc, 
    metadata={"id": i}
) for i, doc in enumerate(refs)]
docs[:3]

[Document(metadata={'id': 0}, page_content='On average, cats spend 2/3 of every day sleeping. That means a nine-year-old cat has been awake for only three years of its life.'),
 Document(metadata={'id': 1}, page_content='Unlike dogs, cats do not have a sweet tooth. Scientists believe this is due to a mutation in a key taste receptor.'),
 Document(metadata={'id': 2}, page_content='When a cat chases its prey, it keeps its head level. Dogs and humans bob their heads up and down.')]

## Create embeddings

In [8]:
# Create an embedding model
model_kwargs = {'trust_remote_code': True}
encode_kwargs = {'normalize_embeddings': False}
embeddings_model = HuggingFaceEmbeddings(
    model_name=EMBED_MODEL,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

In [9]:
reranking_model = HuggingFaceCrossEncoder(
    model_name=RERANK_MODEL,
    model_kwargs=model_kwargs
)

compressor = CrossEncoderReranker(model=reranking_model, top_n=3)

## Create Vector Store

In [10]:
# Chroma DB
vector_store = Chroma.from_documents(docs, embedding = embeddings_model)
base_retriever = vector_store.as_retriever(search_kwargs={"k": 10})
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, 
    base_retriever=base_retriever
)

## Prompt

In [11]:
system_prompt = (
    "You are a cat expert."
    "Answer the following question using only the information provided in the given context."
    "You don't need to explain te answer."
    ""
    "Context: {context}"
)
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)
print(prompt)

input_variables=['context', 'input'] input_types={} partial_variables={} messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template="You are a cat expert.Answer the following question using only the information provided in the given context.You don't need to explain te answer.Context: {context}"), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partial_variables={}, template='{input}'), additional_kwargs={})]


In [12]:
question_answer_chain = create_stuff_documents_chain(llm = llm, prompt = prompt)
# print(question_answer_chain)

In [13]:
chain = create_retrieval_chain(retriever=compression_retriever, combine_docs_chain=question_answer_chain)
# print(chain)

## Main

In [14]:
import pandas as pd

data = pd.read_csv("../../Docs/test.csv", sep=',')
data

Unnamed: 0,queries,answers
0,How much of a day do cats spend sleeping on av...,2/3
1,What is the technical term for a cat's hairball?,Bezoar
2,What do scientists believe caused cats to lose...,a mutation in a key taste receptor
3,What is the top speed a cat can travel over sh...,"31 mph, 49 km"
4,What is the name of the organ in a cat's mouth...,Jacobson’s organ
5,Which wildcat is considered the ancestor of al...,the African Wild Cat
6,What is the group term for cats?,clowder
7,How many different sounds can cats make?,100
8,What is the name of the first cat in space?,"Felicette, Astrocat"
9,How many toes does a cat have on its back paws?,four


In [15]:
queries = data['queries']
answers = data['answers']

In [16]:
counts = 0
wrong_queries = []

for i, query in enumerate(queries):
    response = chain.invoke({"input": query})
    print(f"Query: {query}\nResponse: {response['answer']}\n")
    
    # The following lines perform evaluations.
    # if the answer shows up in your response, the response is considered correct.
    is_correct = False
    if isinstance(answers[i], list):
        for answer in answers[i]:
            if answer.lower() in response['answer'].lower():
                counts += 1
                is_correct = True
                break
    else:
        if answers[i].lower() in response['answer'].lower():
            counts += 1
            is_correct = True
    
    if not is_correct:
        wrong_queries.append(query)

# Improve to let the LLM correctly answer the ten questions.
print(f"Correct numbers: {counts}")
if wrong_queries:
    print("The following queries were answered incorrectly:")
    for q in wrong_queries:
        print(q)

  return forward_call(*args, **kwargs)


Query: How much of a day do cats spend sleeping on average?
Response: 2/3 of the day, which is 16 to 18 hours.

Query: What is the technical term for a cat's hairball?
Response: Bezoar.



  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)


Query: What do scientists believe caused cats to lose their sweet tooth?
Response: A mutation in a key taste receptor.

Query: What is the top speed a cat can travel over short distances?
Response: 31 mph (49 km)



  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)


Query: What is the name of the organ in a cat's mouth that helps it smell?
Response: The Jacobson’s organ.



  return forward_call(*args, **kwargs)


Query: Which wildcat is considered the ancestor of all domestic cats?
Response: The African Wild Cat.



  return forward_call(*args, **kwargs)


Query: What is the group term for cats?
Response: A "clowder."

Query: How many different sounds can cats make?
Response: About 100 different sounds.



  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)


Query: What is the name of the first cat in space?
Response: Felicette (a.k.a. “Astrocat”)

Query: How many toes does a cat have on its back paws?
Response: 4

Correct numbers: 7
The following queries were answered incorrectly:
What is the top speed a cat can travel over short distances?
What is the name of the first cat in space?
How many toes does a cat have on its back paws?


  return forward_call(*args, **kwargs)
