# OLLAMA FINANCE BILL 2025 RAG MULTILINGUAL

## Import Libraries

In [None]:
import pandas as pd
import numpy as np
from langchain_community.document_loaders import PyPDFLoader
from langchain_ollama import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever


# Jupyter imports
from IPython.display import display, Markdown # Neat Version Output

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import warnings
warnings.filterwarnings('ignore')
# If needed

## Load PDF

In [3]:
local_path = "The Finance Bill 2025 (1).pdf"
if local_path:
    loader = PyPDFLoader(local_path)
    data = loader.load()
    print(f"PDF WAS SUCCESSFULLY LOADED: {local_path}")
else:
    print("Upload a PDF File!")

PDF WAS SUCCESSFULLY LOADED: The Finance Bill 2025 (1).pdf


In [4]:
# Preview The First Page
data[0].page_content

"SPECIAL ISSUE \nNATION I. '01.INCI I.FOR \nk P OR TI NC \nLIPRAPY  \n \n   \nKenya Gazette Supplement No. 63 (National Assembly Bills No. 19) \nREPUBLIC OF KENYA \nKENYA GAZETTE SUPPLEMENT \nNATIONAL ASSEMBLY BILLS, 2025 \nNAIROBI, 6th May, 2025 \nCONTENT \nBill for Introduction into the National Assembly— \nPAGE \nThe Finance Bill, 2025  \t 335 \nNATIONAL COUNCIL FOR \nLAW REPORTING \n0 9 MAY 2025 \nLIBRARY ARCHIVE \nPRINTED AND PUBLISHED BY THE GOVERNMENT PRINTER, NAIROBI"

## SPLIT TEXT INTO CHUNKS

In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(data)

In [6]:

# Print the number of chunks
print(len(chunks))
print(f"The text was split into {len (chunks)} chunks")

367
The text was split into 367 chunks


## Add to a vector database

In [7]:
vector_db = Chroma.from_documents(
    documents=chunks,
    embedding=OllamaEmbeddings(model="nomic-embed-text"),
    collection_name="local-rag"
)
print("The Vector Database Was Created Succcessfully!")

The Vector Database Was Created Succcessfully!


## SETUP LLM AND RETRIEVAL

In [8]:
local_model = "llama3.1"
llm = ChatOllama(model=local_model)

In [9]:
# Query Prmpt Template
QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate 2
    different versions of the given user question to retrieve relevant documents from
    a vector database. By generating multiple perspectives on the user question, your
    goal is to help the user overcome some of the limitations of the distance-based
    similarity search. Provide these alternative questions separated by newlines.
    Original question: {question}""",
)

In [10]:
# Setup Retriever
retriever = MultiQueryRetriever.from_llm(
    vector_db.as_retriever(),
    llm,
    prompt=QUERY_PROMPT
)
# CREATE A CHAIN
# RAG prompt Template
template = """Answer the question based ONLY on the following context:
{context}

Question: {question}

Your response must be in the same language as the question. If the question is in Swahili, respond in Swahili. If it is in English, respond in English.
"""
prompt = ChatPromptTemplate.from_template(template)

## # Create Chain

In [11]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

## Chat With The PDF

In [12]:
def chat_with_pdf(question):
    """
    Chat with the PDF using RAG chain.
    """
    return display(Markdown(chain.invoke(question)))

In [13]:
# First Question
chat_with_pdf("What is the main idea of this document?")

The main idea of this document appears to be a set of regulations and guidelines related to taxation and financial reporting for multinational enterprise groups operating in Kenya. Specifically, it outlines the requirements for filing country-by-country reports, including information on constituent entities' activities, revenue, profit or loss, taxes paid, tangible assets, and employee numbers, among other details. The document also mentions exemptions from excise duty and taxable goods, as well as amendments to various tax-related sections of existing laws.

In [14]:
# Second Question
chat_with_pdf("Mswada wa fedha wa 2025 unahusu nini?")

Mswada wa fedha wa 2025 unaungwa mkono na sheria mbalimbali kama vile Sheria ya Kifedha (Cap. 470), Sheria ya Kigusuko cha Uwekezaji (Cap. 476), Sheria ya Kodi ya Mashirika (Cap. 472), Sheria ya Mfumo wa Ushuru wa Taifa (Cap. 469B) na Sheria ya Malipo ya Mishahara na Malipizi ya Kitaalamu (Cap. 469C). Mswada pia unaongeza shirika ya mali, ambapo kuhamishwa kwa mali za kampuni hadi wa washikadau wake kama sehemu ya ukarabati wa ndani wa kampuni huchukuliwa kuwa huru kutoka kodi ya stamp.

In [15]:
# Third Question
chat_with_pdf("Vipengele vikuu katika hii hati ni ipi?")

Hati hiyo ya ujasusi iliyotolewa ina kipengele muhimu ambacho kinawasilisha mabadiliko yanayohusika katika sheria za fedha nchini Kenya. Hili linajumuisha mauzo ya digital, upatikanaji wa sifa, na malipo ya taasisi binafsi katika shirika kuu.

In [16]:
# Fourth Question
chat_with_pdf("What Taxes does the document address?")

The document addresses various taxes including Income Tax, Value Added Tax (VAT), Excise Duty, and Stamp Duty.