In [1]:
# Necessary for Colab, not necessary for course environment
%pip install -qq langchain langchain-nvidia-ai-endpoints gradio
%pip install -qq arxiv pymupdf
%pip install -U langchain-community

import os
os.environ["NVIDIA_API_KEY"] = "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"



In [2]:
from functools import partial
from rich.console import Console
from rich.style import Style
from rich.theme import Theme

console = Console()
base_style = Style(color="#76B900", bold=True)
pprint = partial(console.print, style=base_style)

In [3]:
from langchain_nvidia_ai_endpoints import ChatNVIDIA
ChatNVIDIA.get_available_models()

[Model(id='mistralai/mistral-7b-instruct-v0.3', model_type='chat', client='ChatNVIDIA', endpoint=None, aliases=['ai-mistral-7b-instruct-v03'], supports_tools=False, supports_structured_output=False, base_model=None),
 Model(id='mediatek/breeze-7b-instruct', model_type='chat', client='ChatNVIDIA', endpoint=None, aliases=['ai-breeze-7b-instruct'], supports_tools=False, supports_structured_output=False, base_model=None),
 Model(id='nvidia/llama-3.1-nemotron-70b-instruct', model_type='chat', client='ChatNVIDIA', endpoint=None, aliases=None, supports_tools=False, supports_structured_output=True, base_model=None),
 Model(id='meta/llama3-70b-instruct', model_type='chat', client='ChatNVIDIA', endpoint=None, aliases=['ai-llama3-70b'], supports_tools=False, supports_structured_output=False, base_model=None),
 Model(id='meta/llama-3.1-8b-instruct', model_type='chat', client='ChatNVIDIA', endpoint=None, aliases=None, supports_tools=True, supports_structured_output=True, base_model=None),
 Model(id

In [4]:
%%time
from langchain.document_loaders import UnstructuredFileLoader
from langchain.document_loaders import ArxivLoader

## Loading in the file

## Unstructured File Loader: Good for arbitrary "probably good enough" loader
# documents = UnstructuredFileLoader("llama2_paper.pdf").load()

## More specialized loader, won't work for everything, but simple API and usually better results
documents = ArxivLoader(query="2404.16130").load()  ## GraphRAG
# documents = ArxivLoader(query="2404.03622").load()  ## Visualization-of-Thought
# documents = ArxivLoader(query="2404.19756").load()  ## KAN: Kolmogorov-Arnold Networks
# documents = ArxivLoader(query="2404.07143").load()  ## Infini-Attention
# documents = ArxivLoader(query="2210.03629").load()  ## ReAct

CPU times: user 407 ms, sys: 57.6 ms, total: 465 ms
Wall time: 1.03 s


In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1200,
    chunk_overlap=100,
    separators=["\n\n", "\n", ".", ";", ",", " ", ""],
)
docs_split = text_splitter.split_documents(documents)
print(len(docs_split))

50


In [6]:
from langchain_core.runnables import RunnableLambda
from langchain_core.runnables.passthrough import RunnableAssign
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.output_parsers import PydanticOutputParser

from langchain_nvidia_ai_endpoints import ChatNVIDIA

from pydantic import BaseModel, Field
from typing import List
from IPython.display import clear_output

# Definition of the Pydantic model for document summaries
class DocumentSummaryBase(BaseModel):
    running_summary: str = Field("", description="Running description of the document. Do not override; only update!")
    main_ideas: List[str] = Field([], description="Most important information from the document (max 3)")
    loose_ends: List[str] = Field([], description="Open questions that would be good to incorporate into summary, but that are yet unknown (max 3)")

# Creating a prompt template for generating summaries
summary_prompt = ChatPromptTemplate.from_template(
    "You are generating a running summary of the document. Make it readable by a technical user. "
    "After this, the old knowledge base will be replaced by the new one. Make sure a reader can still understand everything. "
    "Keep it short, but as dense and useful as possible! The information should flow from chunk to (loose ends or main ideas) to running_summary. "
    "The updated knowledge base keep all of the information from running_summary here: {info_base}. "
    "\n\n{format_instructions}. Follow the format precisely, including quotations and commas "
    "\n\nWithout losing any of the info, update the knowledge base with the following: {input}"
)

# Definition of the extraction function using a Pydantic model
def RExtract(pydantic_class, llm, prompt):
    '''
    Runnable Extraction module
    Returns a knowledge dictionary populated by slot-filling extraction
    '''
    parser = PydanticOutputParser(pydantic_object=pydantic_class)
    instruct_merge = RunnableAssign({'format_instructions' : lambda x: parser.get_format_instructions()})
    def preparse(string):
        if '{' not in string: string = '{' + string
        if '}' not in string: string = string + '}'
        string = string.replace("\\_", "_").replace("\n", " ").replace("\]", "]").replace("\[", "[")
        return string
    return instruct_merge | prompt | llm | preparse | parser

# Function to summarize document chunks
def RSummarizer(knowledge, llm, prompt, verbose=False):
    def summarize_docs(docs):
        parse_chain = RunnableAssign({'info_base': RExtract(knowledge.__class__, llm, prompt)})
        state = {'info_base': knowledge}

        global latest_summary
        for i, doc in enumerate(docs):
            state['input'] = doc.page_content
            state = parse_chain.invoke(state)

            assert 'info_base' in state
            if verbose:
                print(f"Considered {i+1} documents")
                pprint(state['info_base'])
                latest_summary = state['info_base']
                clear_output(wait=True)

        return state['info_base']
    return RunnableLambda(summarize_docs)

# Setup the language model
instruct_model = ChatNVIDIA(model="mistralai/mixtral-8x22b-instruct-v0.1").bind(max_tokens=4096)
instruct_llm = instruct_model | StrOutputParser()

# Assume docs_split contains document chunks to be summarized
# Example usage with verbose output
summarizer = RSummarizer(DocumentSummaryBase(), instruct_llm, summary_prompt, verbose=True)

# Correctly invoking the RunnableLambda
summary = summarizer.invoke(docs_split[:15])  # Use invoke here instead of calling summarizer as a function

# Print the latest summary
print(latest_summary)  # Ensure latest_summary is updated appropriately in the summarize_docs function



running_summary="This document proposes a Graph RAG approach for scaling query-focused abstractive summarization over private text corpora, utilizing a two-stage LLM-based process involving entity detection, extraction, and summarization from source documents, followed by community detection for partitioning the graph index. The approach generates partial responses from each community summary, which are then summarized into a final response to the user. The proposed method outperforms naive RAG baselines for global sensemaking questions but faces challenges with scaling complex QFS tasks over entire corpora that may exceed LLM context window limits. The approach uses an LLM-derived knowledge graph for global summarization, leveraging graph modularity for partitioning. Evaluation is conducted using an LLM to generate sense-making questions from representative real-world datasets. The implementation is forthcoming in open-source Python. The study also explores varying the hierarchical le