In [1]:
from pathlib import Path
import os

In [2]:
PROJECT_ROOT = Path(os.getcwd()).resolve().parents[0]

paper_folder = os.path.join(PROJECT_ROOT, 'docs')

In [3]:
files = [i for i in os.listdir(paper_folder) if i.endswith('.pdf')]
files

['2412.17149v1.pdf']

In [4]:
from marker.converters.pdf import PdfConverter
from marker.models import create_model_dict
from marker.output import text_from_rendered

In [5]:
# Load PDF and render
pdf_file = os.path.join(paper_folder, files[0])

converter = PdfConverter(artifact_dict=create_model_dict())
rendered = converter(pdf_file)  # replace with your PDF path

Recognizing layout: 100%|███████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.45s/it]
Running OCR Error Detection: 100%|██████████████████████████████████████████████████████| 1/1 [00:00<00:00, 25.62it/s]
Detecting bboxes: 0it [00:00, ?it/s]
Recognizing Text: 100%|█████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00,  4.58it/s]
Detecting bboxes: 0it [00:00, ?it/s]
Recognizing tables: 100%|███████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.88it/s]


In [6]:
dir(converter)

['__annotations__',
 '__call__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'artifact_dict',
 'build_document',
 'config',
 'default_llm_service',
 'default_processors',
 'filepath_to_str',
 'initialize_processors',
 'layout_builder_class',
 'llm_service',
 'override_map',
 'page_count',
 'processor_list',
 'renderer',
 'resolve_dependencies',
 'use_llm']

In [7]:
doc = converter.build_document(pdf_file)  # Use .render()

Recognizing layout: 100%|███████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.52s/it]
Running OCR Error Detection: 100%|██████████████████████████████████████████████████████| 1/1 [00:00<00:00, 25.40it/s]
Detecting bboxes: 0it [00:00, ?it/s]
Recognizing Text: 100%|█████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00,  6.39it/s]
Detecting bboxes: 0it [00:00, ?it/s]
Recognizing tables: 100%|███████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  8.85it/s]


In [8]:
#from marker.schema.block import TitleBlock, TextBlock, TableBlock
blocks = list(doc.contained_blocks())

In [9]:
len(blocks)

3667

In [10]:
blocks[8].__dict__

{'polygon': PolygonBox(polygon=[[87.09500122070312, 94.7900390625], [508.1847229003906, 94.7900390625], [508.1847229003906, 109.13623046875], [87.09500122070312, 109.13623046875]], bbox=[87.09500122070312, 94.7900390625, 508.1847229003906, 109.13623046875]),
 'block_description': 'A line of text.',
 'block_type': <BlockTypes.Line: '1'>,
 'block_id': 22,
 'page_id': 0,
 'text_extraction_method': 'pdftext',
 'structure': [/page/0/Span/23, /page/0/Span/24],
 'ignore_for_output': False,
 'replace_output_newlines': False,
 'source': 'layout',
 'top_k': None,
 'metadata': None,
 'lowres_image': None,
 'highres_image': None,
 'removed': False,
 'formats': None}

In [11]:
block_types = {str(i.__dict__.get('block_type')) for i in blocks}

In [12]:
block_types

{'Caption',
 'Equation',
 'Figure',
 'FigureGroup',
 'Line',
 'ListGroup',
 'ListItem',
 'PageFooter',
 'PageHeader',
 'Reference',
 'SectionHeader',
 'Span',
 'Table',
 'TableCell',
 'Text',
 'TextInlineMath'}

In [13]:
from dataclasses import dataclass

@dataclass
class MarkdownChunk:
    content: str
    type: str
    page: int
    metadata: dict


In [14]:
from collections import Counter

In [31]:
chunks = []
block_counter = 0
block_types = []

relevant_blocks = []

for page in doc.pages:
    for block in page.contained_blocks(doc):
        
        block_types.append(str(block.block_type))
        
        if str(block.block_type) in {"Text", "SectionHeader", "Caption"}:
            relevant_blocks.append(block)
            block_counter += 1


In [32]:
chunks = []

for block in relevant_blocks:
    text = getattr(block, "text", None)
    if text and text.strip():
        chunk = MarkdownChunk(
            content=text.strip(),
            type=str(block.block_type),
            page=block.page_num,
            metadata={
                "block_type": str(block.block_type),
                "page_num": block.page_num
            }
        )
        chunks.append(chunk)


In [45]:
block.raw_text(doc)

'Agentic AI systems use specialized agents to\nhandle tasks within complex workflows, en-\nabling automation and efficiency. However,\noptimizing these systems often requires labor-\nintensive, manual adjustments to refine roles,\ntasks, and interactions. This paper introduces a\nframework for autonomously optimizing Agen-\ntic AI solutions across industries, such as NLP-\ndriven enterprise applications. The system em-\nploys agents for Refinement, Execution, Evalu-\nation, Modification, and Documentation, lever-\naging iterative feedback loops powered by an\nLLM (Llama 3.2-3B). The framework achieves\noptimal performance without human input by\nautonomously generating and testing hypothe-\nses to improve system configurations. This\napproach enhances scalability and adaptability,\noffering a robust solution for real-world appli-\ncations in dynamic environments. Case studies\nacross diverse domains illustrate the transfor-\nmative impact of this framework, showcasing\nsignificant impr

In [49]:
for page in doc.pages:
    print(page.page_id)

0
1
2
3
4
5
6
7
8
9
10


In [51]:
chunks = []

for page in doc.pages:
    for block in page.contained_blocks(doc):
        block_type_str = str(block.block_type)

        if block_type_str in {"Text", "SectionHeader", "Caption"}:
            content = block.raw_text(doc)

            if content and content.strip():
                chunks.append(MarkdownChunk(
                    content=content.strip(),
                    type=block_type_str,
                    page=page.page_id,
                    metadata={
                        "block_type": block_type_str,
                        "page_num": page.page_id
                    }
                ))


In [52]:
len(chunks)

106

In [58]:
chunks[3]

MarkdownChunk(content='Abstract', type='SectionHeader', page=0, metadata={'block_type': 'SectionHeader', 'page_num': 0})

In [57]:
for i in chunks:
    print(len(i.content))

130
33
61
8
1190
14
430
233
291
530
142
12
500
1
394
693
835
649
36
638
9
10
18
38
10
14
1057
956
444
780
23
889
779
24
1157
22
1683
14
501
490
25
1150
42
30
1079
39
27
494
443
44
18
852
18
244
35
601
35
17
849
34
25
641
185
42
22
355
244
451
12
1350
12
599
685
13
838
10
35
41
36
147
186
22
187
168
31
144
166
54
147
208
20
84
43
36
40
30
31
28
30
54
52
35
38
29
79
85


In [74]:
chunks = []
current_section = None
for page in doc.pages:
    for block in page.contained_blocks(doc):
        if str(block.block_type) == "SectionHeader":
            current_section = block.raw_text(doc).strip()
    
        if str(block.block_type) == "Text":
            content = block.raw_text(doc)
            bbox = block.polygon.bbox if block.polygon else None
            chunks.append({
                "content": content,
                "section": current_section,
                "page": page.page_id,
                "bbox": bbox 
            })


In [75]:
len(chunks)

54

In [86]:
chunks[3]

{'content': 'Agentic AI systems, composed of specialized\nagents working collaboratively to achieve com-\nplex objectives, have transformed industries such\nas market research, business process optimization,\nand product recommendation. These systems excel\nin automating decision-making and streamlining\nworkflows. However, their optimization remains\nchallenging due to the complexity of agent interac-\ntions and reliance on manual configurations.\n',
 'section': '1 Introduction',
 'page': 0,
 'bbox': [69.26171875,
  588.3491973876953,
  290.9458923339844,
  707.8177642822266]}

In [76]:
for i in chunks:
    print(i['section'])
    print(5*'-')

A Multi-AI Agent System for Autonomous Optimization of Agentic AI
Solutions via Iterative Refinement and LLM-Driven Feedback Loops
-----
A Multi-AI Agent System for Autonomous Optimization of Agentic AI
Solutions via Iterative Refinement and LLM-Driven Feedback Loops
-----
Abstract
-----
1 Introduction
-----
1 Introduction
-----
1 Introduction
-----
1 Introduction
-----
1 Introduction
-----
2 Background
-----
2 Background
-----
2 Background
-----
2 Background
-----
2 Background
-----
2 Background
-----
2 Background
-----
2 Background
-----
9: Initialization:
-----
3 Architecture
-----
3 Architecture
-----
3 Architecture
-----
3 Architecture
-----
3.1 Synthesis Framework
-----
3.1 Synthesis Framework
-----
3.2 Evaluation Framework
-----
3.3 Refinement Process
-----
4 Case Studies
-----
4 Case Studies
-----
4.1 Market Research Agent
-----
4.2 Medical AI Architect Agent
-----
4.3 Career Transition Agent
-----
4.3 Career Transition Agent
-----
4.4 Outreach Agent
-----
4.5 LinkedIn Agent
--

In [78]:
for page in doc.pages:
    bbox = page.polygon.bbox  # [x0, y0, x1, y1]
    width = bbox[2] - bbox[0]
    height = bbox[3] - bbox[1]
    print(f"Page {page.page_id}: width={width}, height={height}")


Page 0: width=596.0, height=842.0
Page 1: width=596.0, height=842.0
Page 2: width=596.0, height=842.0
Page 3: width=596.0, height=842.0
Page 4: width=596.0, height=842.0
Page 5: width=596.0, height=842.0
Page 6: width=596.0, height=842.0
Page 7: width=596.0, height=842.0
Page 8: width=596.0, height=842.0
Page 9: width=596.0, height=842.0
Page 10: width=596.0, height=842.0


In [87]:
import fastapi.responses as m
print(m)

<module 'fastapi.responses' from '/root/.cache/pypoetry/virtualenvs/chat-bot-ui-v5-xwCijAw3-py3.10/lib/python3.10/site-packages/fastapi/responses.py'>


In [88]:
dir(m)

['Any',
 'FileResponse',
 'HTMLResponse',
 'JSONResponse',
 'ORJSONResponse',
 'PlainTextResponse',
 'RedirectResponse',
 'Response',
 'StreamingResponse',
 'UJSONResponse',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 'orjson',
 'ujson']