# Prerequisites

# Python        - install via windows store
# Ollama        - https://ollama.com/

# Setup the Virtual Environment

In [1]:
! python3 -m venv venv

# Activate venv - windows

In [7]:
! .\venv\Scripts\activate

# Activate venv - linux / mac

In [None]:
! source venv/bin/activate

# Install python libraries

In [None]:
! pip install -r requirements.txt

# Code - Setup the LLM

# REFERENCE DOC - https://python.langchain.com/docs/introduction/

In [None]:
# Define local llm
from langchain_ollama import ChatOllama

# use model name running in ollama localy
# List all available models:    ollama list
# Run specific model:           ollama run llama3.2:latest
# Check running model:          ollama ps
# https://ollama.com/search
llm = ChatOllama(model="llama3.2:latest")

# Test LLM

In [12]:
from langchain_core.messages import AIMessage

messages = [
    (
        "system",
        "You are a helpful assistant that translates English to French. Translate the user sentence.",
    ),
    ("human", "I love programming."),
]
ai_msg = llm.invoke(messages)
ai_msg

AIMessage(content='J\'aime programmer.\n\nNote: I used the formal "je" instead of the informal "tu", which is typically used with friends or people you know well in French. If you want to use the more casual form, you can say "J\'adore le programmement".', additional_kwargs={}, response_metadata={'model': 'llama3.2:latest', 'created_at': '2025-04-01T03:10:39.3574431Z', 'done': True, 'done_reason': 'stop', 'total_duration': 1218333800, 'load_duration': 26410400, 'prompt_eval_count': 45, 'prompt_eval_duration': 86000000, 'eval_count': 58, 'eval_duration': 1104000000, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-076747b8-5bfa-4825-ac0c-48558a985fab-0', usage_metadata={'input_tokens': 45, 'output_tokens': 58, 'total_tokens': 103})

# Code - Prepare document

In [None]:
# Define embeddings to convert documents to vectors
from langchain_ollama import OllamaEmbeddings

# can use specialize models for embeddings
# https://ollama.com/search?c=embedding
embeddings = OllamaEmbeddings( model="llama3.2:latest")

# Define vector store to index documents
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

In [None]:
# Define document loader to load PDFs
from langchain_community.document_loaders import PyPDFLoader

# Load PDF document
docs = []
loader = PyPDFLoader(file_path = "./path/to/data.pdf")
docs_lazy = loader.lazy_load()
for doc in docs_lazy:
    docs.append(doc)

# Index documents
vector_store = InMemoryVectorStore.from_documents(docs,embeddings)

# Check loaded docuent

In [28]:
import pprint
print(len(docs))
pprint.pp(docs[0].metadata)

2
{'producer': 'Microsoft: Print To PDF',
 'creator': 'PyPDF',
 'creationdate': '2021-06-10T13:29:17+08:00',
 'author': 'jinkybiscocho',
 'moddate': '2021-06-10T13:29:17+08:00',
 'title': 'Microsoft Word - BOTS chap08e - Response Codes',
 'source': './data/bancnet_response_codes_june2021.pdf',
 'total_pages': 2,
 'page': 0,
 'page_label': '1'}


# Define Prompt - How should the AI answer

In [19]:
# Define prompt for question-answering
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")

# You are an assistant for question-answering tasks. 
# Use the following pieces of retrieved context to answer the question. 
# If you don't know the answer, just say that you don't know. 
# Use three sentences maximum and keep the answer concise.
# Question: {question} 
# Context: {context} 
# Answer:



# Retriever - Getting the Context

In [24]:
from typing_extensions import List, TypedDict
from langchain_core.documents import Document
from langgraph.graph import START, StateGraph

# Define state for application
# being updated by Retriever and Generator steps
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str

# Define application steps
# Retrieve top documents close to the question as context
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}

# Test retriever

In [34]:
# invoke graph by passing question (required by retrieve step)
state = State(
    question="What does response code of 91 mean?",
    context=[],
    answer=""
)

response = retrieve(state);
pprint.pp(response.get("context"))


[Document(id='dbdeef0d-037e-4170-bcbc-38fad82e9e20', metadata={'producer': 'Microsoft: Print To PDF', 'creator': 'PyPDF', 'creationdate': '2021-06-10T13:29:17+08:00', 'author': 'jinkybiscocho', 'moddate': '2021-06-10T13:29:17+08:00', 'title': 'Microsoft Word - BOTS chap08e - Response Codes', 'source': './data/bancnet_response_codes_june2021.pdf', 'total_pages': 2, 'page': 1, 'page_label': '2'}, page_content='TITLE OF MANUAL \n \nBancNet Online System \nTechnical Specifications \nTITLE OF SECTION \n \nAppendices \n \nDOCUMENT \nCODE \nBOTS \nAUTHORIZATION \n \nAGL \nINITIAL ISSUE \nDATE \nMAY 1995 \nREVISION DATE \n \nAPRIL 2021 \nREVISION NO. \n \n6 \nPAGE NO. \nE-2 \n \nBancNet IST \nCODE \nReject Code \nIn ISO \nDescription Card \nRetained \nScreen \nMessage \n706 76 Atalla device time-out No 17 \n79,81,87,114 77 Synchronization error; Atalla transmission key \nunmatched \nNo 17 \n93 78 Undefined bank code; Invalid bank No 37 \n17 79 Invalid business date No 17 \n613 80 Bank not acce

# Generator - Generate Answer based on Context + Question

In [35]:
# Generate answer using retrieved documents in context
# State contains the question and retrieved documents
def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}



# Test Generator

In [None]:
# invoke graph by passing question (required by retrieve step)
state = State(
    question="What does response code of 61 mean?",
    context= retrieve(state).get("context"), # retrieved from previous step
    answer=""
)

response = generate(state)
pprint.pp(response.get("answer"))

('The response code of 61 means "Exceeds amount limit" and indicates that the '
 'transaction exceeds the allowed amount limit. This is a rejection code. The '
 'corresponding IST (Interbank Settlement Transfer) code for this response is '
 'also listed as 61, and the actual response code from the transferee bank is '
 'not specified.')


# Setup Graph - Retrieve > Generate

In [42]:
# Setup graph (retrieve -> generate)
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

# Test Graph

In [53]:
# invoke graph by passing question (required by retrieve step)
response = graph.invoke({"question": "What does response code of 91 mean?"})
print(response["answer"])

Response code 91 indicates that the "Fail to send message to Issuer; Issuer is closed; Issuer is down; No line to TB; Failed to send request to TB; BancNet links all closed; Switch deinitialization; BancNet is closed; No".


# Compare Model size

In [None]:
# https://enclaveai.app/blog/2024/05/13/understanding-llm-model-sizes/
# llm = ChatOllama(model="llama3.2:1b") # use model with 1b params
# llm = ChatOllama(model="llama3.2:latest") # use model with 7b params

# Demo on implementing a Chat Interface

# REFERENCE DOC - https://docs.chainlit.io/get-started/overview

In [57]:
! pip install chainlit

Defaulting to user installation because normal site-packages is not writeable


In [58]:
! chainlit run app.py

2025-04-01 11:58:14 - HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
2025-04-01 11:58:15 - Your app is available at http://localhost:8000


ERROR:    [Errno 10048] error while attempting to bind on address ('127.0.0.1', 8000): [winerror 10048] only one usage of each socket address (protocol/network address/port) is normally permitted
