In [2]:
# import packages

from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

import torch
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from typing import List, Dict


In [3]:
# Knowledge Base

kb_text = """
Animals on buses
We appreciate animals and pets in all shapes and sizes, so we have policies in place to ensure the safety of service and non-service animals when it comes to riding our buses. For more detailed information on our policies, please read our Rules of Conduct.

Service animals
In accordance with the Americans with Disabilities Act, service or guide animals are allowed on all Community Transit buses.
A service animal means any dog that is individually trained to do work or perform tasks for the benefit of an individual with a disability, including a physical, sensory, psychiatric, intellectual, or other mental disability.
Emotional Support Animals are not considered service animals by the Americans with Disabilities Act (ADA) or under Washington state law.
Service animals are limited to dogs.
Service animals are not required to be muzzled. If a service animal is not harnessed, leashed, crated, or tethered, it must remain under the control of the individual with disabilities through voice, signal, or other effective controls.

Non-service animals
Some pets and non-service animals are allowed on local and Swift buses with the following rules:
Dogs and cats are not required to be in a crate. However, pets that are not in a crate are required to wear a leash and a commercially produced muzzle that covers the pet’s mouth and is secured behind the head. All other small animals must be in a crate during transportation.
In all cases, owners must have complete control of their animal at all times. Animals cannot occupy a seat, block the aisle, wander or disturb other passengers. Animals cannot run at large without a leash or obstruct the flow of passenger or bus traffic. Animals may occupy a passenger’s lap or under the seat while in a transit vehicle or facility.
All animals are prohibited from leaving waste on transit property.
On Sound Transit buses, only service animals and small animals in carrying containers are allowed.
"""

kb_chunks: List[str] = [
    # Chunk 0 – intro
    "We have policies for service and non-service animals on Community Transit buses to ensure safety.",
    
    # Chunk 1 – definition of service animals
    "In accordance with the Americans with Disabilities Act, service or guide animals are allowed on all Community Transit buses. "
    "A service animal means any dog that is individually trained to do work or perform tasks for the benefit of an individual with a disability, "
    "including a physical, sensory, psychiatric, intellectual, or other mental disability. "
    "Emotional Support Animals are not considered service animals by the ADA or Washington state law. "
    "Service animals are limited to dogs. "
    "Service animals are not required to be muzzled. If a service animal is not harnessed, leashed, crated, or tethered, "
    "it must remain under the control of the individual with disabilities through voice, signal, or other effective controls.",
    
    # Chunk 2 – non-service animals rules
    "Some pets and non-service animals are allowed on local and Swift buses. Dogs and cats are not required to be in a crate. "
    "However, pets that are not in a crate are required to wear a leash and a commercially produced muzzle that covers the pet’s mouth "
    "and is secured behind the head. All other small animals must be in a crate during transportation.",
    
    # Chunk 3 – behavior & waste
    "In all cases, owners must have complete control of their animal at all times. Animals cannot occupy a seat, block the aisle, wander or disturb other passengers. "
    "Animals cannot run at large without a leash or obstruct the flow of passenger or bus traffic. "
    "Animals may occupy a passenger’s lap or under the seat while in a transit vehicle or facility. "
    "All animals are prohibited from leaving waste on transit property.",
    
    # Chunk 4 – Sound Transit
    "On Sound Transit buses, only service animals and small animals in carrying containers are allowed."
]

len(kb_chunks), kb_chunks


(5,
 ['We have policies for service and non-service animals on Community Transit buses to ensure safety.',
  'In accordance with the Americans with Disabilities Act, service or guide animals are allowed on all Community Transit buses. A service animal means any dog that is individually trained to do work or perform tasks for the benefit of an individual with a disability, including a physical, sensory, psychiatric, intellectual, or other mental disability. Emotional Support Animals are not considered service animals by the ADA or Washington state law. Service animals are limited to dogs. Service animals are not required to be muzzled. If a service animal is not harnessed, leashed, crated, or tethered, it must remain under the control of the individual with disabilities through voice, signal, or other effective controls.',
  'Some pets and non-service animals are allowed on local and Swift buses. Dogs and cats are not required to be in a crate. However, pets that are not in a crate are 

In [4]:
# Load Embedding Model & Embed KB

embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
embedder = SentenceTransformer(embedding_model_name)

kb_embeddings = embedder.encode(kb_chunks, convert_to_numpy=True, normalize_embeddings=True)
kb_embeddings.shape


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

(5, 384)

In [6]:
# Simple In-Memory “Index” + Retrieval Function

# Simple index structure
kb_index: List[Dict] = [
    {"id": i, "text": chunk, "embedding": kb_embeddings[i]}
    for i, chunk in enumerate(kb_chunks)
]

def embed_query(query: str) -> np.ndarray:
    return embedder.encode([query], convert_to_numpy=True, normalize_embeddings=True)[0]

def retrieve(query: str, top_k: int = 2):
    q_emb = embed_query(query)
    doc_embs = np.stack([item["embedding"] for item in kb_index])
    sims = cosine_similarity([q_emb], doc_embs)[0]  # shape: (num_chunks,)
    
    ranked = sorted(
        zip(range(len(kb_index)), sims),
        key=lambda x: x[1],
        reverse=True
    )
    top = ranked[:top_k]
    results = []
    for idx, score in top:
        item = kb_index[idx]
        results.append(
            {
                "id": item["id"],
                "text": item["text"],
                "score": float(score),
            }
        )
    return results

# Quick test
test_query = "Do service animals have to be muzzled on Community Transit buses?"
retrieve(test_query, top_k=2)


[{'id': 0,
  'text': 'We have policies for service and non-service animals on Community Transit buses to ensure safety.',
  'score': 0.7903959155082703},
 {'id': 1,
  'text': 'In accordance with the Americans with Disabilities Act, service or guide animals are allowed on all Community Transit buses. A service animal means any dog that is individually trained to do work or perform tasks for the benefit of an individual with a disability, including a physical, sensory, psychiatric, intellectual, or other mental disability. Emotional Support Animals are not considered service animals by the ADA or Washington state law. Service animals are limited to dogs. Service animals are not required to be muzzled. If a service animal is not harnessed, leashed, crated, or tethered, it must remain under the control of the individual with disabilities through voice, signal, or other effective controls.',
  'score': 0.745667576789856}]

In [7]:
# Load LLM for Generation (FLAN-T5)

gen_model_name = "google/flan-t5-small"

tokenizer = AutoTokenizer.from_pretrained(gen_model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(gen_model_name)

rag_pipeline = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer
)


Device set to use cpu


In [9]:
# RAG Prompt Construction + Answer Function

def build_rag_prompt(question: str, contexts: List[str]) -> str:
    context_block = "\n\n".join(f"- {c}" for c in contexts)
    prompt = f"""
You are a helpful assistant answering questions about Community Transit policies for animals on buses.

Use ONLY the information in the context below. 
If the answer is not clearly in the context, say that the information is not available in the policy.

Context:
{context_block}

Question: {question}
Answer:
""".strip()
    return prompt

def rag_answer(question: str, top_k: int = 2, max_new_tokens: int = 128):
    retrieved = retrieve(question, top_k=top_k)
    contexts = [r["text"] for r in retrieved]
    prompt = build_rag_prompt(question, contexts)
    
    llm_output = rag_pipeline(
        prompt,
        max_new_tokens=max_new_tokens,
        do_sample=False
    )[0]["generated_text"]
    
    return {
        "question": question,
        "retrieved": retrieved,
        "prompt": prompt,
        "answer": llm_output,
    }
def build_rag_prompt(question: str, contexts: List[str]) -> str:
    context_block = "\n\n".join(f"- {c}" for c in contexts)
    prompt = f"""
You are a helpful assistant answering questions about Community Transit policies for animals on buses.

Use ONLY the information in the context below. 
If the answer is not clearly in the context, say that the information is not available in the policy.

Context:
{context_block}

Question: {question}
Answer:
""".strip()
    return prompt

def rag_answer(question: str, top_k: int = 2, max_new_tokens: int = 128):
    retrieved = retrieve(question, top_k=top_k)
    contexts = [r["text"] for r in retrieved]
    prompt = build_rag_prompt(question, contexts)
    
    llm_output = rag_pipeline(
        prompt,
        max_new_tokens=max_new_tokens,
        do_sample=False
    )[0]["generated_text"]
    
    return {
        "question": question,
        "retrieved": retrieved,
        "prompt": prompt,
        "answer": llm_output,
    }


In [10]:
# “Raw LLM” Baseline (no RAG)

def raw_llm_answer(question: str, max_new_tokens: int = 128):
    prompt = f"Question: {question}\nAnswer:"
    llm_output = rag_pipeline(
        prompt,
        max_new_tokens=max_new_tokens,
        do_sample=False
    )[0]["generated_text"]
    return llm_output


In [11]:
# Define Your Three Test Cases

test_cases = {
    "factual": "Are emotional support animals considered service animals on Community Transit buses?",
    "foil": "What is the fee for bringing a large dog on an airplane?",
    "synthesis": "Can I bring my cat without a crate on a Community Transit bus, and where is it allowed to sit during the trip?"
}

test_cases


{'factual': 'Are emotional support animals considered service animals on Community Transit buses?',
 'foil': 'What is the fee for bringing a large dog on an airplane?',
 'synthesis': 'Can I bring my cat without a crate on a Community Transit bus, and where is it allowed to sit during the trip?'}

In [12]:
# Run RAG for All Test Cases

rag_results = {}

for label, question in test_cases.items():
    print("="*80)
    print(f"RAG – Test case: {label.upper()}")
    result = rag_answer(question, top_k=2)
    rag_results[label] = result
    
    print("\nQuestion:")
    print(result["question"])
    
    print("\nRetrieved chunks (id, score):")
    for r in result["retrieved"]:
        print(f"  - id={r['id']}, score={r['score']:.3f}")
        print(f"    text: {r['text'][:200]}...")
    
    print("\nAnswer:")
    print(result["answer"])
    print()


RAG – Test case: FACTUAL

Question:
Are emotional support animals considered service animals on Community Transit buses?

Retrieved chunks (id, score):
  - id=1, score=0.804
    text: In accordance with the Americans with Disabilities Act, service or guide animals are allowed on all Community Transit buses. A service animal means any dog that is individually trained to do work or p...
  - id=0, score=0.724
    text: We have policies for service and non-service animals on Community Transit buses to ensure safety....

Answer:
Emotional Support Animals are not considered service animals by the ADA or Washington state law. Service animals are limited to dogs. Service animals are not required to be muzzled. If a service animal is not harnessed, leashed, crated, or tethered, it must remain under the control of the individual with disabilities through voice, signal, or other effective controls.

RAG – Test case: FOIL

Question:
What is the fee for bringing a large dog on an airplane?

Retriev

In [13]:
# Run Raw LLM Baseline

raw_results = {}

for label, question in test_cases.items():
    print("="*80)
    print(f"RAW LLM – Test case: {label.upper()}")
    ans = raw_llm_answer(question)
    raw_results[label] = ans
    
    print("\nQuestion:")
    print(question)
    print("\nAnswer:")
    print(ans)
    print()


RAW LLM – Test case: FACTUAL

Question:
Are emotional support animals considered service animals on Community Transit buses?

Answer:
no

RAW LLM – Test case: FOIL

Question:
What is the fee for bringing a large dog on an airplane?

Answer:
$2.50

RAW LLM – Test case: SYNTHESIS

Question:
Can I bring my cat without a crate on a Community Transit bus, and where is it allowed to sit during the trip?

Answer:
no



## Result Analysis

- **Test 1 (Factual)**  
  - RAG: Correctly states that emotional support animals are *not* considered service animals and that service animals are limited to dogs.  
  - Raw LLM: [Describe if it guessed or hallucinated anything inconsistent with the KB.]

- **Test 2 (Foil / General)**  
  - RAG: States that the information is not available in the policy, which is desired behavior since the KB is only about bus animal policy.  
  - Raw LLM: May try to invent a fee for bringing dogs on airplanes, which is hallucination.

- **Test 3 (Synthesis)**  
  - RAG: Combines information that cats do not need a crate but must be leashed and muzzled, and also that animals may sit on the passenger’s lap or under the seat and cannot block aisles or seats.  
  - Raw LLM: Might miss one of the constraints or talk about rules that are not in the KB.

Overall, the RAG system grounded the answers in the explicit “Animals on buses” policy and reduced hallucinations compared to using the LLM alone.
