In [None]:
"""
!pip install llama_index
!pip install langchain_openai
!pip install langchain_community
!pip install langgraph
!pip install retriever
!pip install llama-index-utils-workflow
"""

# Chunking

In [None]:
import os
import warnings
from glob import glob
import openai
import nest_asyncio
from dotenv import load_dotenv, find_dotenv
from llama_index.llms.openai import OpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from llama_index.core.node_parser import LangchainNodeParser, SentenceWindowNodeParser,SentenceSplitter
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.extractors import BaseExtractor,KeywordExtractor,TitleExtractor
from llama_index.core import Settings,SimpleDirectoryReader,StorageContext,VectorStoreIndex,load_index_from_storage, Document
from llama_index.core.schema import MetadataMode
warnings.filterwarnings('ignore')

# Settings---------- #

_ = load_dotenv(find_dotenv()) 
openai.api_key  = os.environ['OPENAI_API_KEY']

Settings.llm = OpenAI(
    model="gpt-4o-mini",
    api_key=openai.api_key,
    temperature=0.1
)
Settings.embed_model = OpenAIEmbedding(
    model="text-embedding-ada-002",
    api_key=openai.api_key,
    embed_batch_size=100
)
Settings.text_splitter = SentenceSplitter(chunk_size=512,chunk_overlap=50)

node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=5,
    window_metadata_key="window",
    original_text_metadata_key="original_text",
)
Settings.node_parser = node_parser

# Paths for storage
DB_DIR = os.getenv("DB_DIR", os.path.join(os.getcwd(), "docs", "chroma"))
INDEX_DIR = os.getenv("INDEX_DIR", os.path.join(os.getcwd(), "index"))
METADATA_ENRICHMENT_INDEX_DIR = os.getenv("METADATA_ENRICHMENT_INDEX_DIR", os.path.join(os.getcwd(), "enriched_index"))

# Folder containing the PDF files
BASE_DIR = os.getenv("BASE_DIR", os.getcwd())
DATA_FOLDER = os.path.join(BASE_DIR, "docs")

# Settings end----- #

class CustomExtractor(BaseExtractor):
    def extract(self, nodes):
        metadata_list = [
            {
                "custom": (
                    node.metadata["document_title"]
                    + "\n"
                    + node.metadata["excerpt_keywords"]
                )
            }
            for node in nodes
        ]
        return metadata_list

def metadata_enrichment_index(files=DATA_FOLDER, documents=None):
    """Create an enriched index with transformations."""
    nest_asyncio.apply()
    extractors = [
        TitleExtractor(nodes=5, llm=Settings.llm),
        KeywordExtractor(keywords=10, llm=Settings.llm)
    ]
    transformations = [Settings.node_parser] + extractors
    doc_lis=files
    docs_nodes=[] #[node]
    pipeline = IngestionPipeline(transformations=transformations)
    if documents:
        docs_nodes.extend(pipeline.run(documents=docs))
    else:
        for doc in doc_lis:
            docs = SimpleDirectoryReader(input_files=[doc]).load_data()
            docs_nodes.extend(pipeline.run(documents=docs))

    index = VectorStoreIndex(nodes=docs_nodes,embed_model=Settings.embed_model)
    index.storage_context.persist(persist_dir=METADATA_ENRICHMENT_INDEX_DIR)
    return index

def llama_index_chunk_pdf(files=DATA_FOLDER, index_dir=METADATA_ENRICHMENT_INDEX_DIR):
    '''load/create index'''
    file_paths = [os.path.join(files, f) for f in os.listdir(files) if f.endswith('.pdf')]
    documents = SimpleDirectoryReader(input_files=file_paths).load_data()
    document = Document(text="\n\n".join([doc.text for doc in documents]))
    
    if os.path.exists(index_dir):
        for file in os.listdir(index_dir):
            os.remove(os.path.join(index_dir, file))
    else:
        os.makedirs(index_dir)

    index = VectorStoreIndex.from_documents([document])
    index.storage_context.persist(persist_dir=index_dir)
    return index

def main():
    """execute chunking process"""
    llama_index_chunk_pdf(files=DATA_FOLDER)


if __name__ == "__main__":
    main()

# Retriever

In [None]:
#from transformers import AutoModel, AutoTokenizer
from langchain_core.tools import tool
from llama_index.core import Settings, StorageContext, load_index_from_storage
from llama_index.core.indices.postprocessor import MetadataReplacementPostProcessor#, SentenceTransformerRerank
from llama_index.core.query_engine import SubQuestionQueryEngine
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.llms.openai import OpenAI
from llama_index.core.question_gen import LLMQuestionGenerator
from llama_index.core.question_gen.prompts import DEFAULT_SUB_QUESTION_PROMPT_TMPL

#BAAI method
# Define the model and the local directory to save the model
#model_name_or_path = "BAAI/bge-small-en-v1.5"
#local_model_path = "local_models/bge-small-en-v1.5"

# Load the model and tokenizer
#model = AutoModel.from_pretrained(model_name_or_path)
#tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

# Save the model and tokenizer to the local path
#model.save_pretrained(local_model_path)
#tokenizer.save_pretrained(local_model_path)

#print(f"Model and tokenizer saved to {local_model_path}")

'''settings'''
node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=10,
    window_metadata_key="window",
    original_text_metadata_key="original_text",
)
Settings.llm = OpenAI(model="gpt-4o-mini", temperature= 0.1, timeout=60)
#Settings.embed_model = "local:BAAI/bge-small-en-v1.5"
Settings.node_parser = node_parser


def get_sentence_window_query_engine(sentence_index, similarity_top_k=6):
    postproc = MetadataReplacementPostProcessor(target_metadata_key="window")
    #rerank = SentenceTransformerRerank(top_n=rerank_top_n, model="BAAI/bge-reranker-base")
    sentence_window_engine = sentence_index.as_query_engine(similarity_top_k=similarity_top_k,
                                                            node_postprocessors=[postproc])
    return sentence_window_engine

def final_engine(engine, verbose=False):
    question_gen = LLMQuestionGenerator.from_defaults(
        llm=Settings.llm,
        prompt_template_str="""
            Follow the example, but instead of giving a question, always prefix the question
            with: 'By first identifying and quoting the most relevant sources, '.
            """
        + DEFAULT_SUB_QUESTION_PROMPT_TMPL,
    )
    return SubQuestionQueryEngine.from_defaults(
        query_engine_tools=[
            QueryEngineTool(
                query_engine=engine,
                metadata=ToolMetadata(
                    name="docs",
                    description="ESG information and portfolio constructions on companies.",
                ),
            )
        ],
        question_gen=question_gen,
        use_async=True,
        verbose=verbose
    )

def llama_index_retriever_tool(index_path: str = METADATA_ENRICHMENT_INDEX_DIR, index_type='sentence', similarity_top_k=6):
    """
    A tool that allows searching and retrieving information from documents using llama-index.\n
    params: Path to the VectorStoreIndex.\n
    return: query engine \n
    usage: llama_index_retriever_tool(path)
    """
    # Load the index 
    storage_context = StorageContext.from_defaults(persist_dir=index_path)
    index = load_index_from_storage(storage_context)

    # Create the sentence window query engine from the index
    query_engine = get_sentence_window_query_engine(index, similarity_top_k=similarity_top_k) if 'sentence' in index_type else index.as_query_engine(similarity_top_k=similarity_top_k)
    query_engine = final_engine(query_engine, verbose=True)

    @tool
    def engine(query=''):
        """
        A tool that utilizes RAG. It allows searching and retrieving information from documents using llama-index.\n
        params:
            query: query to RAG; user query or rephrase the query to find or infer what information needs to be searched.\n
        return: Retrieved information from the vector store of information\n
        usage: engine.invoke(query)
        """
        res=query_engine.query(query)
        # display
        print(f'---RAG---:\n {res}')
        return res

    return engine

# Graph

In [None]:
from typing import Annotated, Literal, Sequence, TypedDict
from langchain import hub
from langchain_core.messages import BaseMessage, HumanMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI
from langgraph.graph.message import add_messages

MODEL_NAME = "gpt-4o-mini"
MAX_ATTEMPT = 5

class AgentState(TypedDict):
    # The add_messages function defines how an update should be processed
    # Default is to replace. add_messages says "append"
    messages: Annotated[Sequence[BaseMessage], add_messages]
    attempt_num: int


### Edges
# def grade_documents(state) -> Literal["generate", "generate_no_ans"]:
def grade_documents(state) -> Literal["generate", "rewrite", "generate_no_ans"]:
    """
    Determines whether the retrieved documents are relevant to the question.

    Args:
        state (messages): The current state

    Returns:
        str: A decision for whether the documents are relevant or not
    """

    print("---CHECK RELEVANCE---")
    
    # Data model
    class grade(BaseModel):
        """Binary score for relevance check."""

        binary_score: str = Field(description="Relevance score 'yes' or 'no'")

    # LLM
    model = ChatOpenAI(temperature=0.1, model=MODEL_NAME, streaming=True)

    # LLM with tool and validation
    llm_with_tool = model.with_structured_output(grade)

    # Prompt
    prompt = PromptTemplate(
        template="""You are a grader assessing relevance of a retrieved document to a user question. \n 
        Here is the retrieved document: \n\n {context} \n\n
        Here is the user question: {question} \n
        If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
        Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.""",
        input_variables=["context", "question"],
    )

    # Chain
    chain = prompt | llm_with_tool

    messages = state["messages"]
    print("messages:", messages)
    last_message = messages[-1]

    question = messages[0].content
    print('Question:', question)
    docs = last_message.content

    scored_result = chain.invoke({"question": question, "context": docs})

    score = scored_result.binary_score

    if score == "yes":
        print("---DECISION: DOCS RELEVANT---")
        print("docs:")
        print(docs)
        return "generate"

    elif state["attempt_num"] < MAX_ATTEMPT:
        print("---DECISION: DOCS NOT RELEVANT---")
        print(score)
        print("docs:")
        print(docs)
        return "rewrite"
    else:
        print("---DECISION: DOCS NOT RELEVANT, MAX_ATTEMPT achieved---")
        print(score)
        print("docs:")
        print(docs)
        return "generate_no_ans"


### Nodes

def agent_with_tools(tools):
    def agent(state):
        """
        Invokes the agent model to generate a response based on the current state. Given
        the question, it will decide to retrieve using the retriever tool, or simply end.

        Args:
            state (messages): The current state

        Returns:
            dict: The updated state with the agent response appended to messages
        """
        print("---CALL AGENT---")
        messages = state["messages"]
        if not state.get("attempt_num"):
            state["attempt_num"] = 0  # Initialize attempt number
        state["attempt_num"] += 1  # Increment attempt number
        model = ChatOpenAI(temperature=0.1, streaming=True, model=MODEL_NAME)
        model = model.bind_tools(tools)
        response = model.invoke(messages)
        # We return a list, because this will get added to the existing list
        return {"messages": [response], "attempt_num": state["attempt_num"]}
    return agent



def rewrite(state):
    """
    Transform the query to produce a better question.

    Args:
        state (messages): The current state

    Returns:
        dict: The updated state with re-phrased question
    """

    print("---TRANSFORM QUERY---")
    messages = state["messages"]
    question = messages[0].content

    msg = [
        HumanMessage(
            content=f""" \n 
    Look at the input and try to reason about the underlying semantic intent / meaning. \n 
    Here is the initial question:
    \n ------- \n
    {question} 
    \n ------- \n
    Formulate an improved question: """,
        )
    ]

    # Grader
    model = ChatOpenAI(temperature=0.1, model=MODEL_NAME, streaming=True)
    response = model.invoke(msg)
    return {"messages": [response], "attempt_num": state["attempt_num"]+1}

def generate_no_ans(state):
    """
    Generate response when no answer found

    Args:
        state (messages): The current state

    Returns:
         dict: The updated state with message stating no relevant info found
    """
    print("---GENERATE_NO_ANS---")
    return {"messages": ["No Relevant Info found in the documents"], "attempt_num": 0}

def generate(state):
    """
    Generate answer

    Args:
        state (messages): The current state

    Returns:
         dict: The updated state with re-phrased question
    """
    print("---GENERATE---")
    messages = state["messages"]
    question = messages[0].content
    last_message = messages[-1]

    question = messages[0].content
    docs = last_message.content

    # Prompt
    prompt = hub.pull("rlm/rag-prompt")

    # LLM
    llm = ChatOpenAI(model_name=MODEL_NAME, temperature=0, streaming=True)

    # Post-processing
    def format_docs(docs):
        return "\n\n".join(doc.page_content for doc in docs)

    # Chain
    rag_chain = prompt | llm | StrOutputParser()

    # Run
    response = rag_chain.invoke({"context": docs, "question": question})
    return {"messages": [response], "attempt_num": 0}



# Interface

In [None]:
import sys
import gradio as gr
from typing import TypedDict
from langgraph.graph import END, StateGraph, START
from langgraph.prebuilt import ToolNode, tools_condition
from langchain_core.messages import HumanMessage
from langchain_openai import OpenAIEmbeddings


def build_workflow(vecdb, framework):
    retriever_tool = llama_index_retriever_tool(vecdb)
    tools = [retriever_tool]

    # Define a new graph
    workflow = StateGraph(AgentState)
    workflow.attempt_num=0

    # Define the nodes we will cycle between
    workflow.add_node("agent", agent_with_tools(tools))  # agent
    retrieve = ToolNode(tools)
    workflow.add_node("retrieve", retrieve)  # retrieval
    workflow.add_node("rewrite", rewrite)  # Re-writing the question
    workflow.add_node("generate_no_ans", generate_no_ans)  #  Generating a response after we know no document is relevant
    workflow.add_node(
        "generate", generate
    )  # Generating a response after we know the documents are relevant
    # Call agent node to decide to retrieve or not
    workflow.add_edge(START, "agent")

    # Decide whether to retrieve
    workflow.add_conditional_edges(
        "agent",
        # Assess agent decision
        tools_condition,
        {
            # Translate the condition outputs to nodes in our graph
            "tools": "retrieve",
            END: END,
        },
    )

    # Edges taken after the `action` node is called.
    workflow.add_conditional_edges(
        "retrieve",
        # Assess agent decision
        grade_documents,
    )
    workflow.add_edge("generate", END)
    workflow.add_edge("generate_no_ans", END)
    workflow.add_edge("rewrite", "agent")

    # Compile
    return workflow.compile()


# Gradio Integration
def get_answer_func(graph):
    def get_answer_chat(question, history):
        print("User question:", question)
        result = graph.invoke(
            {"messages": [HumanMessage(content=question)]},
            config={"configurable": {"thread_id": 42, "max_attempt": 5}}
        )
        return result["messages"][-1].content
    return get_answer_chat

index_path = os.path.join(os.getcwd(), "enriched_index")
graph = build_workflow(index_path, sys.argv[1])

gr.ChatInterface(
    get_answer_func(graph),
    chatbot=gr.Chatbot(height=300),
    title="Agent",
    description="Ask me any question",
    theme="soft"
).launch()

To be improved:

1. Runtime
2. Attempt Time
3. Data loading process