# Prerequisites

### Python        - install via windows store
### Ollama        - https://ollama.com/

# Setup the Virtual Environment

In [1]:
! python3 -m venv venv

# Activate venv - windows

In [2]:
! .\venv\Scripts\activate

# Activate venv - linux / mac

In [None]:
! source venv/bin/activate

# Install python libraries

In [None]:
! pip install -r requirements.txt

# Code - Setup the LLM

### REFERENCE DOC - https://python.langchain.com/docs/introduction/

In [2]:
# Define local llm
from langchain_ollama import ChatOllama

# use model name running in ollama localy
# List all available models:    ollama list
# Run specific model:           ollama run llama3.2:latest
# Check running model:          ollama ps
# https://ollama.com/search
llm = ChatOllama(model="llama3.2:latest")

### Test LLM

In [None]:
from langchain_core.messages import AIMessage

messages = [
    (
        "system",
        "You are a helpful assistant that translates English to French. Translate the user sentence.",
    ),
    ("human", "I love programming."),
]
ai_msg = llm.invoke(messages)
ai_msg

# Code - Prepare document

In [4]:
# Define embeddings to convert documents to vectors
from langchain_ollama import OllamaEmbeddings

# can use specialize models for embeddings
# https://ollama.com/search?c=embedding
embeddings = OllamaEmbeddings( model="llama3.2:latest")

# Define vector store to index documents
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

In [27]:
# Define document loader to load PDFs
from langchain_community.document_loaders import PyPDFDirectoryLoader

# Load PDF document
docs = []
loader = PyPDFDirectoryLoader("data/")
docs_lazy = loader.lazy_load()
for doc in docs_lazy:
    docs.append(doc)

# Index documents
vector_store = InMemoryVectorStore.from_documents(docs,embeddings)

# Check loaded docuent

In [None]:
import pprint
print(len(docs))
pprint.pp(docs[0].metadata)

# Define Prompt - How should the AI answer

In [None]:
# Define prompt for question-answering
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")

# You are an assistant for question-answering tasks. 
# Use the following pieces of retrieved context to answer the question. 
# If you don't know the answer, just say that you don't know. 
# Use three sentences maximum and keep the answer concise.
# Question: {question} 
# Context: {context} 
# Answer:

# Retriever - Getting the Context

In [9]:
from typing_extensions import List, TypedDict
from langchain_core.documents import Document
from langgraph.graph import START, StateGraph

# Define state for application
# being updated by Retriever and Generator steps
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str

# Define application steps
# Retrieve top documents close to the question as context
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}

# Test retriever

In [None]:
# invoke graph by passing question (required by retrieve step)
state = State(
    question="What does response code of 91 mean?",
    context=[],
    answer=""
)

response = retrieve(state);
pprint.pp(response.get("context"))


# Generator - Generate Answer based on Context + Question

In [11]:
# Generate answer using retrieved documents in context
# State contains the question and retrieved documents
def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}



# Test Generator

In [None]:
# invoke graph by passing question (required by retrieve step)
state = State(
    question="What does response code of 61 mean?",
    context= retrieve(state).get("context"), # retrieved from previous step
    answer=""
)

response = generate(state)
pprint.pp(response.get("answer"))

# Setup Graph - Retrieve > Generate

In [13]:
# Setup graph (retrieve -> generate)
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

# Test Graph

In [None]:
# invoke graph by passing question (required by retrieve step)
response = graph.invoke({"question": "What does response code of 91 mean?"})
print(response["answer"])

# Compare Model size

In [17]:
# https://enclaveai.app/blog/2024/05/13/understanding-llm-model-sizes/
# llm = ChatOllama(model="llama3.2:1b") # use model with 1b params
llm = ChatOllama(model="llama3.2:latest") # use model with 7b params

# Demo on implementing a Chat Interface

### REFERENCE DOC - https://docs.chainlit.io/get-started/overview

In [None]:
! pip install chainlit

In [None]:
! chainlit run app.py