# FINANCE BILL 2025 RAG MODEL USING OLLAMA AND LANGCHAIN

## GETTING THE PDF FILE FOR THE RAG PIPELINE

In [1]:
import pandas as pd
import numpy as np
from langchain_community.document_loaders import PyPDFLoader # Best use case download and use it in case you do not have access to the online file
from langchain_community.document_loaders import OnlinePDFLoader # Gives you an option if you do not have the files locally

In [2]:
# Local PDF File Upload 
local_path = (r"C:\Users\user\Agents_Chatbots_Models\Rag-Finance-Bill2025-Eng-Swa\The Finance Bill 2025 (1).pdf")

if local_path:
    loader = PyPDFLoader(local_path)
    data = loader.load()
else:
    print("Upload a PDF File!")

In [3]:
# Preview The First Page
data[0].page_content

"SPECIAL ISSUE \nNATION I. '01.INCI I.FOR \nk P OR TI NC \nLIPRAPY  \n \n   \nKenya Gazette Supplement No. 63 (National Assembly Bills No. 19) \nREPUBLIC OF KENYA \nKENYA GAZETTE SUPPLEMENT \nNATIONAL ASSEMBLY BILLS, 2025 \nNAIROBI, 6th May, 2025 \nCONTENT \nBill for Introduction into the National Assembly— \nPAGE \nThe Finance Bill, 2025  \t 335 \nNATIONAL COUNCIL FOR \nLAW REPORTING \n0 9 MAY 2025 \nLIBRARY ARCHIVE \nPRINTED AND PUBLISHED BY THE GOVERNMENT PRINTER, NAIROBI"

## VECTOR EMBEDDINGS

In [4]:
# Install the nomic embed text model
!ollama pull nomic-embed-text

[?2026h[?25l[1Gpulling manifest â ‹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ™ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ¹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ¸ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ¼ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ´ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ¦ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â § [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ‡ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â � [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ‹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ™ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ¹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest [K
pulling 970aa74c0a90: 100% â–•â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–� 274 MB                         [K
pulling c71d239df917: 100% â–•â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–�  11 KB                

In [5]:
# Show all models within the local system
!ollama list

NAME                       ID              SIZE      MODIFIED               
nomic-embed-text:latest    0a109f422b47    274 MB    Less than a second ago    
gpt-oss:20b                aa4295ac10c3    13 GB     2 hours ago               
llama2:latest              78e26419b446    3.8 GB    3 hours ago               
llama3.1:latest            46e0c10c039e    4.9 GB    16 hours ago              


In [6]:
# Import Libraries
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma

In [7]:
# Split and Chunk
text_splitter = RecursiveCharacterTextSplitter(chunk_size=7500, chunk_overlap=100)
chunks = text_splitter.split_documents(data)

In [8]:
# Add to a vector database
vector_db = Chroma.from_documents(
    documents=chunks,
    embedding=OllamaEmbeddings(model="nomic-embed-text",show_progress=True),
    collection_name="local-rag"
)

  embedding=OllamaEmbeddings(model="nomic-embed-text",show_progress=True),
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████| 134/134 [06:17<00:00,  2.82s/it]


## RETRIEVAL: CHATTING WITH PDF USING OLLAMA RAG

In [9]:
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever

In [10]:
# LLM from Ollama
local_model = "llama3.1"
llm = ChatOllama(model=local_model)

  llm = ChatOllama(model=local_model)


In [11]:
QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate 2
    different versions of the given user question to retrieve relevant documents from
    a vector database. By generating multiple perspectives on the user question, your
    goal is to help the user overcome some of the limitations of the distance-based
    similarity search. Provide these alternative questions separated by newlines.
    Original question: {question}""",
)

In [12]:
retriever = MultiQueryRetriever.from_llm(
    vector_db.as_retriever(),
    llm,
    prompt=QUERY_PROMPT
)

# RAG prompt
template = """Answer the question based ONLY on the following context:
{context}
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

In [13]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [14]:
chain.invoke(input(""))

 what is the document about?


OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.15s/it]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.11s/it]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.10s/it]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.15s/it]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.07s/it]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.07s/it]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.19s/it]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.12s/it]


'The document appears to be a draft of the Finance Bill 2025, which outlines proposed amendments to various tax laws in Kenya. The bill covers a range of topics, including:\n\n1. Tax exemptions for certain goods and services, such as:\n\t* Medicaments manufactured by pharmaceutical companies in Kenya\n\t* Transportation of sugarcane from farms to milling factories\n\t* Locally assembled and manufactured mobile phones\n2. Penalties for late submission of tax returns and failure to submit required documents\n3. Requirements for payment of withholding tax on certain types of income\n4. Exemptions from withholding tax for certain types of income, such as:\n\t* Interest earned by exempt persons or financial institutions\n\t* Dividends paid to shareholders in companies that are exempt from tax\n5. Amendments to the definition of "locally manufactured passenger motor vehicle" and the requirements for local content\n\nOverall, the document appears to be a detailed outline of proposed changes t

In [15]:
chain.invoke(input(""))

 What are this documents chapters


OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.53s/it]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.26s/it]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.30s/it]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.25s/it]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.29s/it]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.29s/it]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.24s/it]


"Based on the provided text, it appears that these documents are related to taxation laws in Kenya, specifically the Finance Bill 2025.\n\nAs for identifying chapters within these documents, I'll attempt to break down their content:\n\n1. **Chapter 1**: The first few sections (e.g., Section 89) seem to relate to exemptions from taxes, such as imported goods and services.\n2. **Chapter 2**: The next section (Section 112) discusses taxable goods for geothermal, oil, or mining prospecting/exploration.\n3. **Chapter 3**: This chapter appears to focus on objections and appeals processes, including sections like Section 59A, which introduces a data management and reporting system.\n\nPlease note that this is an approximation based solely on the provided text snippets. Without access to the full documents, it's challenging to accurately identify chapters or their exact topics."