# Kenya Travel chatbbot


## Import all packages


In [None]:
import os
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.schema import Document
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI


ModuleNotFoundError: No module named 'langchain_huggingface'

In [2]:
%pwd

'p:\\Generative AI projects\\Kenya-Travel-and-Hospitality-Assistant\\research'

In [3]:
# Change directory to access data folder
os.chdir("../")

In [4]:
# Check current directory
%pwd

'p:\\Generative AI projects\\Kenya-Travel-and-Hospitality-Assistant'

## Load data


In [5]:
# load documents
loader = DirectoryLoader(
    "data/", # Specifies directory
    glob="*.pdf", # load all pdf'd
    loader_cls=PyPDFLoader #Specifies its pdf loaded
)
documents = loader.load()

# Split data into chunks


In [6]:
splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=20
)
text_chunks = splitter.split_documents(documents)

In [7]:
len(text_chunks)

833

In [8]:
embeddings = HuggingFaceEmbeddings(model_name = "intfloat/multilingual-e5-base")
embeddings


  embeddings = HuggingFaceEmbeddings(model_name = "intfloat/multilingual-e5-base")
  from .autonotebook import tqdm as notebook_tqdm


HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False, 'architecture': 'XLMRobertaModel'})
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='intfloat/multilingual-e5-base', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [9]:
query_embeddings = embeddings.embed_query("query: HEllo world")
len(query_embeddings)

768

In [10]:
# 1. Path to save/load FAISS index
faiss_path = "faiss_index"

# 2. If index exists, load it
if os.path.exists(faiss_path):
    print("🔄 Loading existing FAISS index...")
    vectorstore = FAISS.load_local(faiss_path, embeddings, allow_dangerous_deserialization=True)

else:
    print("✨ Creating new FAISS index...")
    # Wrap chunks in Document objects
    docs = [Document(page_content=f"passage: {chunk}") for chunk in text_chunks]

    # Build FAISS index
    vectorstore = FAISS.from_documents(docs, embeddings)

    # Save index locally
    vectorstore.save_local(faiss_path)
    print("💾 Saved FAISS index locally!")

🔄 Loading existing FAISS index...


In [11]:
vectorstore

<langchain_community.vectorstores.faiss.FAISS at 0x1adf0769360>

In [23]:
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")


model = "gpt-3.5-turbo-0125"
OPENAI_API_KEY
llm = ChatOpenAI(
    model_name = model,
    temperature=0.3,
    max_tokens=200
)
llm

ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x000001ADE9B91C30>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x000001ADE9B91330>, root_client=<openai.OpenAI object at 0x000001ADE9B91270>, root_async_client=<openai.AsyncOpenAI object at 0x000001ADE9B93460>, model_name='gpt-3.5-turbo-0125', temperature=0.3, model_kwargs={}, openai_api_key=SecretStr('**********'), max_tokens=200)

In [13]:
llm.invoke("What is known about kenya?")

AIMessage(content='Kenya is a country located in East Africa, known for its diverse wildlife, stunning landscapes, and vibrant culture. Some key facts about Kenya include:\n\n1. Capital: Nairobi\n2. Population: Approximately 53 million people\n3. Official languages: Swahili and English\n4. Currency: Kenyan Shilling\n5. Government: Republic\n6. Famous for its national parks and game reserves, including the Maasai Mara, Amboseli National Park, and Tsavo National Park.\n7. Home to Mount Kenya, the second-highest mountain in Africa.\n8. Known for its rich cultural heritage, including traditional music, dance, and art.\n9. Major industries include agriculture, tourism, and manufacturing.\n10. Kenya gained independence from British colonial rule in 1963.\n11. The country has a diverse population with over 40 different ethnic groups, each with its own unique traditions and customs.\n12. Kenya is a popular destination for safari tours, offering the chance', additional_kwargs={'refusal': None},

## Set retriever object


In [14]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

In [15]:
# Test retriever
query = "Ni hifadhi maarufu gani nchini Kenya?"
results = retriever.invoke(f"query: {query}")
for r in results:
    print(r.page_content)

passage: page_content='of Kenya, this hidden gem - set away from the hassle and 
hustle of the city - is a gorgeous grotto of a place.
1.Watamu Beach
Perched on a small headland between the Blue Lagoon 
and Watamu Bay, it stocks pristine beaches, lush tropical 
rain forest and two protected areas: Watamu Marine 
National Park & Arabuko Sokoke National Park.' metadata={'producer': 'Adobe PDF Library 15.0', 'creator': 'Adobe InDesign 16.0 (Macintosh)', 'creationdate': '2022-05-12T14:41:42+05:30', 'moddate': '2022-05-12T14:42:36+05:30', 'trapped': '/False', 'source': 'data\\48-hours-in-Mombasa-brochure.pdf', 'total_pages': 62, 'page': 28, 'page_label': '29'}
passage: page_content='“In Kenya, black and white rhinos have become 
emblematic of the struggle for African wildlife 
conservation. Although their populations face threats 
from poaching, measures such as secure sanctuaries 
and intensive patrols have significantly enhanced 
their protection,” explains Jonathan.
Elsa the Lioness
The 

## Make Chain


In [16]:
from langchain_core.prompts import ChatPromptTemplate

system_prompt = """
You are SafariBot, a travel and hospitality assistant specialized in East Africa.
Answer only questions related to travel, tourism, accommodations, transport, culture, and hospitality in Kenya.

Use the retrieved context to answer; if the answer is not found, say you don’t know.
Politely refuse unrelated questions and guide the user back to travel topics.
Support both English and Kiswahili, replying in the user’s language.
Be concise, clear, friendly, and include practical details when possible.

Context:
{context}
"""

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "query: {input}")
    ]
)


In [17]:
from langchain.chains import create_retrieval_chain
from langchain.chains.history_aware_retriever import create_history_aware_retriever
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.memory import ConversationBufferMemory

history_aware_retriever = create_history_aware_retriever(
    llm,
    retriever,
    prompt=ChatPromptTemplate.from_messages([
        ("system", "Reformulate the user’s question based on the chat history."),
        ("human", "{input}")
    ])
)

qa_chain = create_stuff_documents_chain(
    llm, qa_prompt
)

rag_chain = create_retrieval_chain(
    history_aware_retriever, qa_chain
)

memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

  memory = ConversationBufferMemory(


In [18]:
def ask_bot(query: str):
    response = rag_chain.invoke({
        "input": query,
        "chat_history": memory.load_memory_variables({})["chat_history"]
    })
    memory.save_context({"input": query}, {"output": response["answer"]})
    return response["answer"]

In [19]:
print(ask_bot("Tell me about Maasai Mara"))

The Maasai Mara is a renowned wildlife reserve in Kenya, known for its diverse ecosystem and abundant wildlife. It covers over 400,000 acres and is managed by 24 conservancies involving more than 14,500 Maasai landowners. The Maasai Mara Conservancies have become a crucial part of the Greater Mara Ecosystem, showcasing a successful model that benefits both wildlife conservation and the local Maasai community. It all began in 1991 with the establishment of the first conservancy, Ol Choro Oirouwa. Today, visitors can experience the magic of the Maasai Mara through safaris, witnessing wildlife thriving in their natural habitat alongside the Maasai people. Karibu Maasai Mara!


In [20]:
print(ask_bot("What about in Tanzania?"))

In Tanzania, tourism is a significant industry, offering diverse attractions like Mount Kilimanjaro, Serengeti National Park, and Zanzibar's beaches. The country is known for its wildlife safaris, cultural experiences, and beautiful landscapes. If you need specific information or travel tips about Tanzania, feel free to ask!


In [21]:
print(ask_bot("Who was the first president of the USA?"))

I focus on travel-related questions about Kenya. If you have any inquiries about travel, tourism, accommodations, transport, culture, or hospitality in Kenya, feel free to ask!


In [24]:
ask_bot("What are dome good places to visit while in Kenya?")

'While in Kenya, you can explore various attractions beyond traditional safaris. Some recommended places to visit include:\n\n1. **Heritage Trail Expedition**: This expedition takes you through counties like Laikipia, Elgeyo Marakwet, Uasin Gishu, Baringo, and Nandi, showcasing hidden gems and unexplored landscapes.\n\n2. **Subukia Viewpoint**: Enjoy stunning views at the Subukia Viewpoint, a popular stop on the Heritage Trail Expedition.\n\n3. **Ol Pejeta Conservancy**: Engage in voluntourism projects like tree planting and wildlife monitoring in this sanctuary known for conservation efforts.\n\n4. **Karura Forest and Ngong Hills**: Explore these urban green spaces for serene escapes within city limits.\n\n5. **Koobi Fora Site**: Visit this site to see where the famous Turkana Boy was found, offering insight into human evolution.\n\n6. **Nairobi National Museum, Karen Blixen Museum, and Fort Jesus'

In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.history_aware_retriever import create_history_aware_retriever
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.memory import ConversationBufferMemory

# 1) History-aware reformulation prompt -> returns a *standalone* query
#    IMPORTANT: Always begin with "query: " for E5 models.
condense_prompt = ChatPromptTemplate.from_messages([
    ("system",
     "You are a question rewriter for a travel RAG system. "
     "Given the chat history and a follow-up, rewrite it as a standalone search query. "
     "Preserve the user's language. "
     "ALWAYS begin the result with exactly: 'query: '"),
    ("human", "Chat history:\n{chat_history}\n\nFollow-up question: {input}\n\nStandalone query:")
])

# 2) Build history-aware retriever
history_aware_retriever = create_history_aware_retriever(
    llm,                      # your ChatOpenAI
    retriever,                # vectorstore.as_retriever(...)
    prompt=condense_prompt
)

# 3) QA prompt that actually uses retrieved context
qa_system = """
You are SafariBot, a travel and hospitality assistant specialized in East Africa.
Answer only questions related to travel, tourism, accommodations, transport, culture, and hospitality in Kenya, Tanzania, Uganda, Rwanda, and nearby regions.
Use ONLY the retrieved context to answer. If the answer is not in the context, say you don’t know.
Politely refuse unrelated questions and guide the user back to travel topics.
Support both English and Kiswahili, replying in the user’s language.
Be concise, clear, friendly, and include practical details when possible.

<context>
{context}
</context>
"""

qa_prompt = ChatPromptTemplate.from_messages([
    ("system", qa_system),
    ("human", "{input}")   # user message (we don’t add 'query:' here; only the retriever needs it)
])

qa_chain = create_stuff_documents_chain(llm, qa_prompt)

# 4) Final RAG chain
rag_chain = create_retrieval_chain(history_aware_retriever, qa_chain)

# 5) Conversation memory so you don't manually append history
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

def ask_bot(q: str):
    hist = memory.load_memory_variables({})["chat_history"]
    resp = rag_chain.invoke({"input": q, "chat_history": hist})
    memory.save_context({"input": q}, {"output": resp["answer"]})
    return resp["answer"], resp  # resp also contains 'context' (retrieved docs)


In [26]:
ans, resp = ask_bot("Tell me about Maasai Mara")
print(ans)

ans, resp = ask_bot("What about in Tanzania?")
print(ans)

# Inspect what was retrieved (to confirm it's using context)
for d in resp["context"][:2]:
    print("—", (d.metadata.get("source") or d.metadata), "\n", d.page_content[:200], "\n")


Maasai Mara is a renowned wildlife reserve in Kenya, known for its diverse ecosystem and abundant wildlife. The Maasai Mara Conservancies cover over 400,000 acres and are managed by 24 conservancies with more than 14,500 Maasai landowners. These conservancies have become a vital part of the Greater Mara Ecosystem, benefiting both wildlife and the local Maasai people through conservation and economic empowerment initiatives. It all began in 1991 with the first conservancy, Ol Choro Oirouwa, and has since evolved into a successful model where wildlife thrives alongside community development. Karibu Maasai Mara!
I specialize in travel and hospitality information for Kenya, Tanzania, Uganda, Rwanda, and nearby regions. How can I assist you with your travel inquiries in Tanzania specifically?
— {} 
 passage: page_content='corridor connecting the Boni forest area and Galana-
Tsavo ecosystems and is a maternity elephant 
ground.
But your safari can not be complete without a visit of 
the neig