In [17]:
! pip install -U langchain-nomic langchain_community tiktoken langchainhub chromadb langchain langgraph tavily-python gpt4all firecrawl-py PyMuPDF sentence_transformers python-dotenv langchain_intro


[31mERROR: Could not find a version that satisfies the requirement langchain_intro (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for langchain_intro[0m[31m
[0m

In [2]:
## index

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.document_loaders import PyMuPDFLoader
from langchain.docstore.document import Document

# Load PDF documents
pdf_file_paths = ["./data/Medical Policy FY 23-24.pdf", "./data/Provident fund policy.pdf"]

docs_list = []
for pdf_path in pdf_file_paths:
    loader = PyMuPDFLoader(pdf_path)
    try:
        loaded_docs = loader.load()
        docs_list.extend(loaded_docs)
    except Exception as e:
        print(f"Error loading {pdf_path}: {e}")

# Split documents
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=250, chunk_overlap=0)
doc_splitter = text_splitter.split_documents(docs_list)

# Filter and clean metadata
filtered_doc = []
for doc in doc_splitter:
    if isinstance(doc, Document) and hasattr(doc, 'metadata'):
        clean_metadata = {k: v for k, v in doc.metadata.items() if isinstance(v, (str, int, float, bool))}
        filtered_doc.append(Document(page_content=doc.page_content, metadata=clean_metadata))

# Add to vectorDB
embedding = GPT4AllEmbeddings(model_name="all-MiniLM-L6-v2.gguf2.f16.gguf", gpt4all_kwargs={'allow_download': 'True'})
vectorstore = Chroma.from_documents(
    documents=filtered_doc,
    collection_name="rag-chroma",
    embedding=embedding,
)

retriever = vectorstore.as_retriever()

In [3]:
# local_llm = 'gemma'
local_llm = 'llama3'
# local_llm = 'mistral'

In [26]:
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import StrOutputParser


llm = ChatOllama(model=local_llm, temperature=0)

output_parser = StrOutputParser()


In [29]:
from langchain.prompts import (
    PromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    ChatPromptTemplate,
)
from langchain.schema.runnable import RunnablePassthrough

context_template_str = """system You are an assistant for question-answering tasks. Use the following context to answer the question. Avoid phrases like "Based on the provided context". Explain the answer in the end. and make a heading with paragraph.
Question: {question}
Context: {context}
Answer: assistant"""

context_system_prompt = SystemMessagePromptTemplate(
    prompt=PromptTemplate(
        input_variables=["context", "question"],
        template=context_template_str,
    )
)

context_human_prompt = HumanMessagePromptTemplate(
    prompt=PromptTemplate(
        input_variables=["question"],
        template="{question}",
    )
)

context_messages = [context_system_prompt, context_human_prompt]

context_prompt_template = ChatPromptTemplate(
    input_variables=["context", "question"],
    messages=context_messages,
)


context_chain = {"context": retriever, "question": RunnablePassthrough()} | context_prompt_template | llm | output_parser

context_question = "Does original receipts must be submitted?"
context_answer = context_chain.invoke(context_question)
print(context_answer)

**Original Receipt Submission Requirement**

According to the Provident Fund Policy V2.0, original receipts must be submitted within 30 days of loan disbursement for construction on a plot. This is stated in point 2 under "Documents to be submitted" on page 6.

However, it's worth noting that this requirement only applies to construction on a plot and not to other types of withdrawals or expenses. For medicines, tests, procedures, etc., original receipts are required as per the Medical Policy FY 23-24.

**Key Takeaway**

Original receipts must be submitted within 30 days of loan disbursement for construction on a plot, but this requirement may vary depending on the type of expense or withdrawal.


In [37]:
no_context_system_template_str = """system You are an assistant for question-answering tasks. Answer the question concisely and directly. If you dont know the answer the say dont know.
Question: {question}
Answer: assistant"""

no_context_system_prompt = SystemMessagePromptTemplate(
    prompt=PromptTemplate(
        input_variables=["question"],
        template=no_context_system_template_str,
    )
)

no_context_human_prompt = HumanMessagePromptTemplate(
    prompt=PromptTemplate(
        input_variables=["question"],
        template="{question}",
    )
)

no_context_messages = [no_context_system_prompt, no_context_human_prompt]

no_context_prompt_template = ChatPromptTemplate(
    input_variables=["question"],
    messages=no_context_messages,
)

no_context_chain = {"question": RunnablePassthrough()} | no_context_prompt_template | llm | output_parser

no_context_question = "what is 911 event"
no_context_answer = no_context_chain.invoke(no_context_question)
print(no_context_answer)

The 9/11 event, also known as the September 11 attacks, was a series of coordinated terrorist attacks that occurred on September 11, 2001. On that day, four commercial airplanes were hijacked by terrorists affiliated with al-Qaeda. Two planes crashed into the Twin Towers of the World Trade Center in New York City, causing massive damage and loss of life. Another plane crashed into the Pentagon in Arlington, Virginia, and the fourth plane, believed to be heading for the White House or the U.S. Capitol Building, crashed in a field in Pennsylvania after passengers attempted to overpower the hijackers. In total, nearly 3,000 people were killed in the attacks.


In [16]:
import random
import time

def get_current_wait_time(hospital: str) -> int | str:
    """Dummy function to generate fake wait times"""

    if hospital not in ["A", "B", "C", "D"]:
        return f"Hospital {hospital} does not exist"

    # Simulate API call delay
    time.sleep(1)

    return random.randint(0, 10000)

In [None]:
from langchain.agents import (
    create_openai_functions_agent,
    Tool,
    AgentExecutor,
)
from langchain import hub

tools = [
    Tool(
        name="Reviews",
        func=context_chain.invoke,
        description="""Useful when you need to answer questions
        about patient reviews or experiences at the hospital.
        Not useful for answering questions about specific visit
        details such as payer, billing, treatment, diagnosis,
        chief complaint, hospital, or physician information.
        Pass the entire question as input to the tool. For instance,
        if the question is "What do patients think about the triage system?",
        the input should be "What do patients think about the triage system?"
        """,
    ),
    Tool(
        name="Waits",
        func=get_current_wait_time,
        description="""Use when asked about current wait times
        at a specific hospital. This tool can only get the current
        wait time at a hospital and does not have any information about
        aggregate or historical wait times. This tool returns wait times in
        minutes. Do not pass the word "hospital" as input,
        only the hospital name itself. For instance, if the question is
        "What is the wait time at hospital A?", the input should be "A".
        """,
    ),
]
