# OLLAMA FINANCE BILL 2025 RAG MULTILINGUAL

## Import Libraries

In [1]:
import pandas as pd
import numpy as np
from langchain_community.document_loaders import PyPDFLoader
from langchain_ollama import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever


# Jupyter imports
from IPython.display import display, Markdown # Neat Version Output

In [2]:
import warnings
warnings.filterwarnings('ignore')
# If needed

## Load PDF

In [3]:
local_path = "The Finance Bill 2025 (1).pdf"
if local_path:
    loader = PyPDFLoader(local_path)
    data = loader.load()
    print(f"PDF WAS SUCCESSFULLY LOADED: {local_path}")
else:
    print("Upload a PDF File!")

PDF WAS SUCCESSFULLY LOADED: The Finance Bill 2025 (1).pdf


In [4]:
# Preview The First Page
data[0].page_content

"SPECIAL ISSUE \nNATION I. '01.INCI I.FOR \nk P OR TI NC \nLIPRAPY  \n \n   \nKenya Gazette Supplement No. 63 (National Assembly Bills No. 19) \nREPUBLIC OF KENYA \nKENYA GAZETTE SUPPLEMENT \nNATIONAL ASSEMBLY BILLS, 2025 \nNAIROBI, 6th May, 2025 \nCONTENT \nBill for Introduction into the National Assembly— \nPAGE \nThe Finance Bill, 2025  \t 335 \nNATIONAL COUNCIL FOR \nLAW REPORTING \n0 9 MAY 2025 \nLIBRARY ARCHIVE \nPRINTED AND PUBLISHED BY THE GOVERNMENT PRINTER, NAIROBI"

## SPLIT TEXT INTO CHUNKS

In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=200)
chunks = text_splitter.split_documents(data)

In [6]:

# Print the number of chunks
print(len(chunks))
print(f"The text was split into {len (chunks)} chunks")

257
The text was split into 257 chunks


## Add to a vector database

In [7]:
vector_db = Chroma.from_documents(
    documents=chunks,
    embedding=OllamaEmbeddings(model="nomic-embed-text"),
    collection_name="local-rag"
)
print("The Vector Database Was Created Succcessfully!")

The Vector Database Was Created Succcessfully!


## SETUP LLM AND RETRIEVAL

In [8]:
local_model = "llama3.1"
llm = ChatOllama(model=local_model)

In [9]:
# Query Prmpt Template
QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI assistant. Your task is to take the user’s question
    and, if needed, rephrase it slightly so that it retrieves the most relevant 
    and precise context from a vector database built on the provided Finance Bill document.

    The goal is to maximize the chance of finding exact or very close answers 
    within the document.

    User question: {question}"""
)

# Answer Prompt
ANSWER_PROMPT = PromptTemplate(
    input_variables=["context", "question"],
    template="""You are an AI assistant for question-answering tasks.
Use only the following context extracted from the Finance Bill PDF:

{context}

Question: {question}

Instructions:
- If the answer is in the context, provide a clear and concise response.
- If the information is NOT in the context, say:
  "The provided Finance Bill document does not contain information to answer that question."

Answer:"""
)

In [10]:
# Setup Retriever
retriever = MultiQueryRetriever.from_llm(
    vector_db.as_retriever(),
    llm,
    prompt=QUERY_PROMPT
)
# CREATE A CHAIN
# RAG prompt Template
template = """Answer the question based ONLY on the following context:
{context}

Question: {question}

Your response must be in the same language as the question. If the question is in Swahili, respond in Swahili. If it is in English, respond in English.
"""
prompt = ChatPromptTemplate.from_template(template)

## # Create Chain

In [11]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

## Chat With The PDF

In [12]:
def chat_with_pdf(question):
    """
    Chat with the PDF using RAG chain.
    """
    return display(Markdown(chain.invoke(question)))

In [13]:
# First Question
chat_with_pdf("What is the main idea of this document?")

The main idea of this document appears to be a compilation of proposed amendments and changes to various tax laws and regulations in Kenya, specifically the Income Tax Act and the Excise Duty Act. The document outlines several sections that are being amended or added, including provisions related to income tax exemptions, interest income, and excise duty on digital services.

Some specific topics covered include:

* Exemptions from income tax for certain individuals and activities
* Changes to the calculation of interest income and allowable deductions
* Amendments to the Excise Duty Act related to digital services and online transactions
* Provisions for compensating tax for companies undertaking human vaccine manufacturing

Overall, the document appears to be a legislative draft or proposal that outlines changes to Kenya's tax laws and regulations.

In [14]:
# Second Question
chat_with_pdf("Mswada wa fedha wa 2025 unahusu nini?")

Mswada wa fedha wa 2025 unaangazia mabadiliko mbalimbali katika sera za kifedha za nchi, ikiwa ni pamoja na malipo ya riba, malipo ya ajira, na uwekezaji. Pia inasema kuwa mashirika yasiyotokana na Kenya yanayokuwa na maeneo ya kitaifa ndani ya nchi hii yakibaki kutolipia fedha kama hizo.

Mswada huo pia unaongeza malipo ya shuleni, ambayo itakuwa 8% juu ya thamani ya kuuza wa nyumba. Pia inaangazia kuongezeka kwa malipo ya maji na umeme, na kuongeza uwekezaji katika sekta ya mifumo ikolojia.

Pia mwada huo unaangazia ukusanyaji wa ushuru wa fedha kutoka kwa mashirika ambayo yalipokea mapato kutokana na digital content monetization.

In [15]:
# Third Question
chat_with_pdf("Vipengele vikuu katika hii hati ni ipi?")

Vipengele vikuu kwenye hii hati ni muundo wa kisheria na maendeleo ya kifedha, hasa katika uwezo wa kupiga vibali vya mapato kutoka kwa wafanyabiashara wa nje.

In [16]:
# Fourth Question
chat_with_pdf("What Taxes does the document address?")

The document addresses various taxes, including:

1. Income Tax
2. Excise Duty
3. Presumptive Income Tax
4. Advance Tax

Specifically, it outlines the rates and conditions for payment of these taxes, such as:

* Income tax rates for different types of income (e.g. rent, premium or similar consideration)
* Presumptive income tax rate for agricultural produce (2% of gross amount)
* Advance tax rates for vehicles (e.g. vans, pick-ups, trucks) and aircraft spare parts
* Exemptions for certain goods and services, such as:
	+ Goods imported by the Kenya Airports Authority or exempted under the East African Community Customs Union Protocol
	+ Specially designed locally assembled motor vehicles for transportation of tourists
	+ Goods purchased before clearance through Customs by tour operators

The document also addresses electronic tax invoices, including requirements for content and records to be maintained.

In [17]:
# Fifth Question
chat_with_pdf("What does the document say about World War 2?")

There is no information about World War 2 in the provided documents. The documents appear to relate to financial and tax legislation, specifically "The Finance Bill 2025".