In [1]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

#loader = PyPDFLoader("752750_core_why_visual_analytics_whitepaper_0.pdf")
loader = PyPDFLoader("annual-report-2024.pdf")
documents = loader.load()

splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
)
chunks = splitter.split_documents(documents)


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma

# Free local embedding model (384-dim, fast, good quality)
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

vectorstore = Chroma.from_documents(
    documents=chunks,
    embedding=embeddings,
    persist_directory="./chroma_db"
)


In [3]:
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

#model = ChatOpenAI(model="gpt-4o", temperature=0)

model = ChatOllama(
    model="llama3.2:1b",      # or llama3.2:3b
    temperature=0
)

retriever = vectorstore.as_retriever(search_kwargs={"k":4})

prompt = ChatPromptTemplate.from_messages([
    ("system", """Answer based only on the following context. 
    If the context doesn't contain the answer, say "I don't have that information"
    
    Context: {context}"""),
    ("human", "{question}"),
])

#def format_docs(docs):
#    return "\n\n".join(doc.page_content for doc in docs)

def format_docs(docs):
    """Handle both Document objects AND dicts from retriever"""
    texts = []
    for doc in docs:
        if hasattr(doc, 'page_content'):  # LangChain Document
            texts.append(doc.page_content)
        elif isinstance(doc, dict):       # Dict from retriever
            texts.append(doc.get('page_content', str(doc)))
        else:                             # Fallback
            texts.append(str(doc))
    return "\n\n".join(texts)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt | model | StrOutputParser()
)

answer = rag_chain.invoke("What is the power of visual ai?")
print(answer)

In [4]:
answer = rag_chain.invoke("What were the company's total revenues?")
print(answer)

The company's total revenue for the year is not explicitly stated in the provided context. However, it can be inferred from the information given.

At 1 January 2023:
- Total comprehensive income for the year: $2,914,264
- Profit for the year: -$617,917 (loss)
- Cash flows from operating activities: $929,765

This suggests that the company's revenue was not sufficient to cover its losses and cash outflows.

At 31 December 2023:
- Total comprehensive income for the year: $3,363,981
- Profit for the year: -$697,767 (loss)
- Cash flows from operating activities: $929,765

This suggests that the company's revenue was sufficient to cover its losses and cash outflows.

At 31 December 2024:
- Total comprehensive income for the year: $3,781,548
- Profit for the year: -$697,767 (loss)
- Cash flows from operating activities: $929,765

This suggests that the company's revenue was sufficient to cover its losses and cash outflows.

Based on this information, it appears that the company's total reve

In [None]:
for chunk in rag_chain.stream("What were the main risk factors?"):
    print(chunk, end="", flush=True)

In [9]:
from langchain_core.runnables import RunnableLambda
from langchain_core.documents import Document

def format_docs(docs):
    """Convert ANYTHING to clean text"""
    if not docs:
        return ""
    
    texts = []
    for item in docs:
        if isinstance(item, dict):
            content = item.get('page_content', '') or str(item)
        elif hasattr(item, 'page_content'):
            content = item.page_content
        else:
            content = str(item)
        texts.append(content)
    return "\n\n".join(texts)

# Force retriever to return Documents
def fix_retriever_output(docs):
    if isinstance(docs, list):
        return [Document(page_content=str(doc)) if not hasattr(doc, 'page_content') else doc for doc in docs]
    return docs

rag_chain_with_memory = (
    {
        "context": retriever | fix_retriever_output | format_docs,
        "question": RunnablePassthrough(),
        "chat_history": lambda x: chat_history
    }
    | prompt
    | model
    | StrOutputParser()
)


In [10]:
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.messages import HumanMessage, AIMessage

chat_history = []

#rag_chain_with_memory = (
#    {
#        "context": retriever | format_docs,
#        "question": RunnablePassthrough(),
#        "chat_history": lambda x: chat_history  # Your chat_history object
#    }
#    | prompt
#    | model
#    | StrOutputParser()
#)

def ask(question): 
    response = rag_chain_with_memory.invoke(question)
    chat_history.append(HumanMessage(content=question))
    chat_history.append(AIMessage(content=response))
    return response

In [11]:
print(ask("What were the company's total revenues?"))

The company's total revenue for the year is not explicitly stated in the provided context. However, it can be inferred from the information given.

At 1 January 2023:
- Total comprehensive income for the year: $2,914,264
- Profit for the year: -$617,917 (loss)
- Cash flows from operating activities: $929,765

This suggests that the company's revenue was not sufficient to cover its losses and cash outflows.

At 31 December 2023:
- Total comprehensive income for the year: $3,363,981
- Profit for the year: -$697,767 (loss)
- Cash flows from operating activities: $929,765

This suggests that the company's revenue was sufficient to cover its losses and cash outflows.

At 31 December 2024:
- Total comprehensive income for the year: $3,781,548
- Profit for the year: -$697,767 (loss)
- Cash flows from operating activities: $929,765

This suggests that the company's revenue was sufficient to cover its losses and cash outflows.

Based on this information, it appears that the company's total reve

In [18]:
print(ask("How does that compare to last year?"))

There is no information about the previous year's financial statements in the provided context. The only information available is for this year, specifically the notes to the financial statements for the annual report 2024 of Prudential Singapore.
