In [1]:
import autogen
from autogen import AssistantAgent, LLMConfig
from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent
from autogen.agents.experimental import DocAgent
import os
from langchain_community.document_loaders import PyPDFLoader
from dotenv import load_dotenv
from autogen.agents.experimental import DocAgent

In [2]:
llm_config = {'cache_seed': 42,
                    'temperature': 1.,
                    'top_p': 0.05,
                    'config_list': [{'model': 'gpt-4o-mini',
                                    'api_key': os.getenv('OPENAI_API_KEY'),
                                    'api_type': 'openai'}],
                    'timeout': 1200}

In [3]:
load_dotenv()
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
DOC_PATH = "data/UnderstandingDeepLearning_DiffusionModels.pdf"
CHROMA_PATH = "database"
QUERY = "What is the difference between diffusion models and GANs?"

In [5]:
DOC_PATH


'data/UnderstandingDeepLearning_DiffusionModels.pdf'

In [6]:
doc_agent = DocAgent(llm_config=llm_config, collection_name='summarise')
run_response = doc_agent.run(
    message = f"ingest {DOC_PATH} and answer {QUERY}",
    max_turns=1,
)
run_response.process()

INFO:autogen.agents.experimental.document_agent.chroma_query_engine:Using existing collection summarise from the database.


[33muser[0m (to DocAgent):

ingest data/UnderstandingDeepLearning_DiffusionModels.pdf and answer What is the difference between diffusion models and GANs?

--------------------------------------------------------------------------------
[33m_User[0m (to chat_manager):

ingest data/UnderstandingDeepLearning_DiffusionModels.pdf and answer What is the difference between diffusion models and GANs?

--------------------------------------------------------------------------------
[32m
Next speaker: DocumentTriageAgent
[0m
[33mDocumentTriageAgent[0m (to chat_manager):

{"ingestions":[{"path_or_url":"data/UnderstandingDeepLearning_DiffusionModels.pdf"}],"queries":[{"query_type":"RAG_QUERY","query":"What is the difference between diffusion models and GANs?"}]}

--------------------------------------------------------------------------------
[32m
Next speaker: TaskManagerAgent
[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[33mTaskManagerAgent[0m (to chat_manager):

[32m***** Suggested 

INFO:autogen.agents.experimental.document_agent.document_utils:Detected file. Returning file path...


[32m
Next speaker: _Group_Tool_Executor
[0m
[35m
>>>>>>>> EXECUTING FUNCTION data_ingest_task...
Call ID: call_Wkos3RxYmB0K4jEY3GJDTYm4
Input arguments: {}[0m


INFO:docling.document_converter:Going to convert document batch...
INFO:docling.document_converter:Initializing pipeline for StandardPdfPipeline with options hash abf069e0fb0f219e247d3d2b243ad857
INFO:docling.models.factories.base_factory:Loading plugin 'docling_defaults'
INFO:docling.models.factories:Registered ocr engines: ['easyocr', 'ocrmac', 'rapidocr', 'tesserocr', 'tesseract']
INFO:easyocr.easyocr:Download complete
INFO:easyocr.easyocr:Download complete.
INFO:docling.utils.accelerator_utils:Accelerator device: 'cuda:0'
INFO:docling.utils.accelerator_utils:Accelerator device: 'cuda:0'
INFO:docling.models.factories.base_factory:Loading plugin 'docling_defaults'
INFO:docling.models.factories:Registered picture descriptions: ['vlm', 'api']
INFO:docling.pipeline.base_pipeline:Processing document UnderstandingDeepLearning_DiffusionModels.pdf
INFO:docling.document_converter:Finished converting document UnderstandingDeepLearning_DiffusionModels.pdf in 90.87 sec.
INFO:autogen.agents.expe

[33m_Group_Tool_Executor[0m (to chat_manager):

[32m***** Response from calling tool (call_Wkos3RxYmB0K4jEY3GJDTYm4) *****[0m
Data Ingestion Task Completed for data/UnderstandingDeepLearning_DiffusionModels.pdf
[32m**********************************************************************[0m

--------------------------------------------------------------------------------
[32m
Next speaker: TaskManagerAgent
[0m
[33mTaskManagerAgent[0m (to chat_manager):

[Handing off to QueryAgent]

--------------------------------------------------------------------------------
[32m
Next speaker: QueryAgent
[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[33mQueryAgent[0m (to chat_manager):

[32m***** Suggested tool call (call_yqaJY8XZiI6h0kMtExrmb4eh): execute_rag_query *****[0m
Arguments: 
{}
[32m**********************************************************************************[0m

--------------------------------------------------------------------------------
[32m
Next speaker: _Group_To

In [11]:
loader = PyPDFLoader(DOC_PATH)
pages = loader.load()

In [12]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# split the doc into smaller chunks i.e. chunk_size=500
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_documents(pages)

In [13]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma

# get OpenAI Embedding model
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

# embed the chunks as vectors and load them into the database
db_chroma = Chroma.from_documents(chunks, embeddings, persist_directory=CHROMA_PATH)

INFO:chromadb.telemetry.product.posthog:Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.


In [15]:
query = "What is the difference between diffusion models and GANs?"
docs_chroma = db_chroma.similarity_search_with_score(query, k=5)
context_text = "\n\n".join([doc.page_content for doc, _score in docs_chroma])

In [16]:
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI

In [17]:
PROMPT_TEMPLATE = """
Answer the question based only on the following context:
{context}
Answer the question based on the above context: {question}.
Provide a detailed answer.
Don’t justify your answers.
Don’t give information not mentioned in the CONTEXT INFORMATION.
Do not say "according to the context" or "mentioned in the context" or similar.
"""

In [18]:
prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
prompt = prompt_template.format(context=context_text, question=query)

# call LLM model to generate the answer based on the given context and query
model = ChatOpenAI()
response_text = model.predict(prompt)

  model = ChatOpenAI()
  response_text = model.predict(prompt)


In [19]:
response_text

'Diffusion models synthesize higher quality images than other generative models, such as GANs, and are simple to train. They are a special case of a hierarchical VAE where the encoder quality of the results is quantitatively superior to GAN models in terms of Fréchet Inception Distance. Additionally, diffusion models have been shown to work with a large family of degradations that do not have to be stochastic, including masking, morphing, blurring, and pixelating. On the other hand, GANs do not define a probability distribution over the data like diffusion models do, and GANs require architectural constraints on the network for training.'