amd · jjasinsk-amd · Feb 6, 2026 · Jan 27, 2026 · Jan 27, 2026 · Jan 28, 2026
@@ -272,6 +272,46 @@ Step 4: Evaluate and report
 gaia eval -d ./output/experiments -o ./output/evaluations
 ```
 
+```bash
+gaia report -d ./output/evaluations -o ./reports/pdf_summarization_report.md
+```
+
+Step 5: View results interactively
+
+```bash
+gaia visualize --experiments-dir ./output/experiments --evaluations-dir ./output/evaluations
+```
+
+### Workflow 3: PDF document summarization
+
+**Complete example: Evaluate PDF document summarization capabilities**
+
+Step 1: Generate synthetic PDF documents
+
+```bash
+gaia generate --pdf -o ./output/test_data/pdfs --pdf-types technical_spec white_paper --count-per-type 2
+```
+
+Step 2: Create evaluation standards
+
+```bash
+gaia groundtruth -d ./output/test_data/pdfs -p "*.pdf" --use-case pdf -o ./output/groundtruth
+```
+
+Step 3: Run experiments
+
+Note: Edit `./src/gaia/eval/configs/pdf_summarization.json` to customize models or create your own config
+
+```bash
+gaia batch-experiment -c ./src/gaia/eval/configs/pdf_summarization.json -i ./output/groundtruth/consolidated_pdf_groundtruth.json -o ./output/experiments
+```
+
+Step 4: Evaluate and report
+
+```bash
+gaia eval -d ./output/experiments -o ./output/evaluations
+```
+
 ```bash
 gaia report -d ./output/evaluations -o ./reports/email_summarization_report.md
 ```
@@ -282,7 +322,7 @@ Step 5: View results interactively
 gaia visualize --experiments-dir ./output/experiments --evaluations-dir ./output/evaluations
 ```
 
-### Workflow 3: Document Q&A
+### Workflow 4: Document Q&A
 
 **Complete example: Test question-answering capabilities on documents**
 
@@ -320,7 +360,7 @@ Note: Include `test-data-dir` and `groundtruth-dir` to view source PDFs and Q&A
 gaia visualize --experiments-dir ./output/experiments --evaluations-dir ./output/evaluations --test-data-dir ./data/pdf --groundtruth-dir ./output/groundtruth
 ```
 
-### Workflow 4: Third-Party Model Testing
+### Workflow 5: Third-Party Model Testing
 
 **Complete example: Test external models (OpenAI, etc.) that can't be integrated directly**
 
@@ -555,6 +595,24 @@ gaia generate --email -o ./output/test_data/emails --email-types project_update
 - `vendor_communication` - External vendor interactions
 - `performance_feedback` - Performance review communications
 
+#### PDF Documents
+
+Generate PDF documents with full options:
+
+```bash
+gaia generate --pdf -o ./output/test_data/pdfs --pdf-types technical_spec white_paper --count-per-type 1 --target-tokens 2000
+```
+
+**Available PDF types:**
+- `technical_spec` - Technical specification documents
+- `business_proposal` - Business proposals and investment pitches
+- `research_report` - Research findings and analysis reports
+- `project_plan` - Project scope, timeline, and deliverables
+- `policy_document` - Corporate policy and procedures manuals
+- `white_paper` - Industry white papers on emerging technology
+- `user_manual` - Product user manuals and documentation
+- `financial_report` - Quarterly financial reports and analysis
+
 ### Ground Truth Creation
 
 Transform synthetic data into evaluation standards.
@@ -573,6 +631,10 @@ gaia groundtruth -d ./output/test_data -p "*.pdf" --use-case qa -o ./output/grou
 gaia groundtruth -d ./output/test_data -p "*.txt" --use-case email -o ./output/groundtruth
 ```
 
+```bash
+gaia groundtruth -d ./output/test_data -p "*.pdf" --use-case pdf -o ./output/groundtruth
+```
+
 Default behavior: skip files that already have ground truth generated
 
 ```bash
@@ -589,6 +651,7 @@ gaia groundtruth -d ./output/test_data -o ./output/groundtruth --force
 - `qa` - Question-answer pair generation (requires groundtruth input for batch-experiment)
 - `summarization` - Summary generation tasks (supports both embedded and external groundtruth)
 - `email` - Email processing tasks (supports both embedded and external groundtruth)
+- `pdf` - PDF document summarization (supports embedded groundtruth workflow)
 
 #### Skip Existing Ground Truth Files
 

@@ -136,6 +136,7 @@
             "scikit-learn>=1.5.0",
             "numpy>=2.0,<2.3.0",
             "pypdf",
+            "reportlab",
         ],
         "talk": [
             "pyaudio",

@@ -21,7 +21,8 @@
     DETECTION_PROMPT_TEMPLATE,
     DOCUMENT_SUMMARY_TEMPLATE,
     ITERATIVE_SUMMARY_TEMPLATE,
-    SUMMARY_STYLES,
+    SUMMARY_STYLES_PDF,
+    SUMMARY_STYLES_TRANSCRIPTS,
     SYSTEM_PROMPTS,
 )
 
@@ -120,21 +121,28 @@ def __init__(
         model: Optional[str] = None,
         max_tokens: int = 1024,
         max_ctx_size: int = 8192,
+        temperature: Optional[float] = None,
         styles: Optional[List[str]] = None,
         combined_prompt: bool = False,
         use_claude: bool = False,
         use_chatgpt: bool = False,
+        system_prompt_override: Optional[str] = None,
+        generation_params: Optional[Dict[str, Any]] = None,
     ):
         self.model = model or self.DEFAULT_MODEL
         self.max_tokens = max_tokens
+        self.temperature = temperature
         self.styles = styles or ["executive", "participants", "action_items"]
         self.combined_prompt = combined_prompt
         self.use_claude = use_claude
         self.use_chatgpt = use_chatgpt
+        self.system_prompt_override = system_prompt_override
+        self.generation_params = generation_params or {}
         self.log = get_logger(__name__)
         chat_config = ChatConfig(
             model=self.model,
             max_tokens=self.max_tokens,
+            temperature=self.temperature,
             use_claude=self.use_claude,
             use_chatgpt=self.use_chatgpt,
             show_stats=True,
@@ -154,7 +162,8 @@ def __init__(
         self.overlap_tokens = int(self.chunk_tokens * self.overlap_tokens_ratio)
 
         # Load prompts from prompts.py
-        self.summary_styles = SUMMARY_STYLES
+        self.summary_styles_transcripts = SUMMARY_STYLES_TRANSCRIPTS
+        self.summary_styles_pdf = SUMMARY_STYLES_PDF
         self.system_prompts = SYSTEM_PROMPTS
         self.iterative_summary_template = ITERATIVE_SUMMARY_TEMPLATE
         self.document_summary_template = DOCUMENT_SUMMARY_TEMPLATE
@@ -180,6 +189,8 @@ def _get_system_prompt(self, content_type: Optional[str] = None) -> str:
         Returns:
             System prompt string for the specified content type.
         """
+        if self.system_prompt_override:
+            return self.system_prompt_override
         if content_type is None:
             content_type = "transcript"
         return self.system_prompts.get(
@@ -200,9 +211,14 @@ def _prepare_chat(self, input_type: str) -> None:
             raise KeyError(f"Missing system prompt for '{input_type}' in prompts")
         self.chat_sdk.config.system_prompt = system_prompt
 
-    def _validate_styles(self, styles: Any) -> None:
+    def _get_summary_styles(self, content_type: Optional[str]) -> Dict[str, str]:
+        if content_type == "pdf":
+            return self.summary_styles_pdf
+        return self.summary_styles_transcripts
+
+    def _validate_styles(self, styles: Any, content_type: Optional[str] = None) -> None:
         """Validate provided style or list of styles against prompt definitions."""
-        allowed = set((self.summary_styles or {}).keys())
+        allowed = set(self._get_summary_styles(content_type).keys())
         provided = styles if isinstance(styles, list) else [styles]
         invalid = [s for s in provided if s not in allowed]
         if invalid:
@@ -286,7 +302,7 @@ def _stream_summary_content(self, content: str, input_type: str, style: str):
         self._prepare_chat(input_type)
         if not self._should_use_iterative(content):
             prompt = self.generate_summary_prompt(content, input_type, style)
-            for chunk in self.chat_sdk.send_stream(prompt):
+            for chunk in self.chat_sdk.send_stream(prompt, **self.generation_params):
                 if chunk.is_complete:
                     yield {
                         "text": "",
@@ -327,7 +343,7 @@ def _iterative_summary_events(self, content: str, input_type: str, style: str):
         overlap_tokens = int(chunk_tokens * self.overlap_tokens_ratio)
         chunks = self.chunker.chunk_text(content, chunk_tokens, overlap_tokens)
         for i, chunk in enumerate(chunks):
-            style_instruction = (self.summary_styles or {}).get(style)
+            style_instruction = self._get_summary_styles(input_type).get(style)
             if not style_instruction:
                 raise KeyError(f"Missing style '{style}' in prompts")
             if i == 0:
@@ -420,16 +436,15 @@ def detect_content_type(self, content: str, input_type: str = "auto") -> str:
             detected_type = "transcript"
         else:
             # Fall back to LLM only if score is ambiguous
-            if self.detection_prompt_template:
-                detection_prompt = self.detection_prompt_template.format(
-                    text_excerpt=content
-                )
+            detection_prompt = self.detection_prompt_template.format(
+                text_excerpt=content
+            )
 
             # Add strict output constraints
             for attempt in range(self.max_retries):
                 try:
                     response = self.llm_client.generate(
-                        detection_prompt, model=self.model
+                        detection_prompt, model=self.model, max_tokens=32
                     )
                     text = (response or "").strip().lower()
                     m = re.findall(r"[a-z]+", text)
@@ -462,23 +477,19 @@ def detect_content_type(self, content: str, input_type: str = "auto") -> str:
     def generate_summary_prompt(
         self, content: str, content_type: str, style: str
     ) -> str:
-        style_instruction = (self.summary_styles or {}).get(style)
+        style_instruction = self._get_summary_styles(content_type).get(style)
         if not style_instruction:
             raise KeyError(f"Missing style '{style}' in prompts")
-        if style == "participants" and content_type == "email":
-            prompt = f"""Extract the sender and all recipients from this email.\n\nFormat your response as JSON:\n{{\n    \"sender\": \"sender email/name\",\n    \"recipients\": [\"recipient1\", \"recipient2\"],\n    \"cc\": [\"cc1\", \"cc2\"] (if any),\n    \"bcc\": [\"bcc1\"] (if any)\n}}\n\nEmail content:\n{content}"""
-        elif style == "action_items":
-            prompt = f"""Extract all action items from this {content_type}.\n\n{style_instruction}\n\nFormat each action item with:\n- The specific action required\n- Who is responsible (if mentioned)\n- Any deadline or timeline (if mentioned)\n\nIf no action items are found, respond with \"No specific action items identified.\"\n\nContent:\n{content}"""
-        else:
-            prompt = f"""Analyze this {content_type} and {style_instruction}\n\nContent:\n{content}"""
+        prompt = f"""Analyze this {content_type} and {style_instruction}\n\nContent:\n{content}"""
         return prompt
 
     def generate_combined_prompt(
         self, content: str, content_type: str, styles: List[str]
     ) -> str:
         sections = []
+        style_map = self._get_summary_styles(content_type)
         for style in styles:
-            style_instruction = (self.summary_styles or {}).get(style)
+            style_instruction = style_map.get(style)
             if not style_instruction:
                 raise KeyError(f"Missing style '{style}' in prompts")
             sections.append(f"- {style.upper()}: {style_instruction}")
@@ -489,8 +500,12 @@ def summarize_with_style(
         self, content: str, content_type: str, style: str
     ) -> Dict[str, Any]:
         start_time = time.time()
+        try:
+            self.chat_sdk.clear_history()
+        except Exception as e:
+            self.log.warning(f"Failed to clear chat history: {e}")
         system_prompt = self._get_system_prompt(content_type)
-        style_instruction = (self.summary_styles or {}).get(style)
+        style_instruction = self._get_summary_styles(content_type).get(style)
         if not style_instruction:
             raise KeyError(f"Missing style '{style}' in prompts")
         # Merge style guidance into the system prompt for consistent behavior
@@ -500,7 +515,9 @@ def summarize_with_style(
         last_error = None
         for attempt in range(self.max_retries):
             try:
-                response = self.chat_sdk.send(prompt)
+                response = self.chat_sdk.send(
+                    prompt, no_history=True, **self.generation_params
+                )
                 break
             except Exception as e:
                 last_error = e
@@ -583,7 +600,7 @@ def summarize_combined(
         system_prompt = self._get_system_prompt(content_type)
         self.chat_sdk.config.system_prompt = system_prompt
         prompt = self.generate_combined_prompt(content, content_type, styles)
-        response = self.chat_sdk.send(prompt)
+        response = self.chat_sdk.send(prompt, **self.generation_params)
         perf_stats = self.llm_client.get_performance_stats()
         processing_time_ms = int((time.time() - start_time) * 1000)
         response_text = response.text
@@ -659,7 +676,7 @@ def summarize(
         content_type = self.detect_content_type(content, input_type)
         applicable_styles = styles or self.styles.copy()
         # Early validation: fail fast with clear guidance if a style is unsupported
-        self._validate_styles(applicable_styles)
+        self._validate_styles(applicable_styles, content_type)
         if content_type == "email" and "participants" in applicable_styles:
             pass
         if (
@@ -722,8 +739,9 @@ def summarize_stream(
         self, content: str, input_type: str = "auto", style: str = "brief"
     ) -> Generator[Dict[str, Any], None, None]:
         """Stream a single-style summary, using iterative folding for large inputs."""
-        self._validate_styles(style)
-        yield from self._stream_summary_content(content, input_type, style)
+        content_type = input_type if input_type != "auto" else "transcript"
+        self._validate_styles(style, content_type)
+        yield from self._stream_summary_content(content, content_type, style)
 
     def _ensure_path(self, file_path) -> Path:
         """Convert file_path to Path object if it's not already."""

@@ -5,8 +5,19 @@
 Prompts and styles for the SummarizerAgent.
 """
 
-# Summary styles
-SUMMARY_STYLES = {
+# Summary styles for transcripts and mails
+SUMMARY_STYLES_TRANSCRIPTS = {
+    "brief": """Provide a brief executive summary (2-3 sentences) of the key outcomes and decisions from this transcript.""",
+    "detailed": """Provide a detailed summary of the transcript, covering all major topics, discussions, and outcomes in paragraph form.""",
+    "executive": """Provide a brief executive summary (2-3 sentences) of the key outcomes and decisions from this transcript.""",
+    "participants": "List the participants mentioned in this transcript. Include their roles or titles when available. Provide as a simple list.",
+    "action_items": "List the specific action items that were assigned during this meeting. Include who is responsible for each item when mentioned. Provide as a simple list.",
+    "key_decisions": "List the key decisions that were made during this meeting. Focus on concrete decisions and outcomes. Provide as a simple list.",
+    "topics_discussed": "List the main topics and subjects that were discussed in this meeting. Provide as a simple list.",
+}
+
+# Summary styles for PDFs and business documents (focused on quantitative data extraction)
+SUMMARY_STYLES_PDF = {
     "brief": """Write a VERY concise summary.
 STRICT RULES:
 - Limit to at most 3 sentences.
@@ -47,21 +58,6 @@
 - Exclude qualitative marketing descriptions unless no metrics exist
 - Maintain a formal, board-ready, outcome-focused tone.
 - Do not use bullet points.""",
-    "participants": """Extract ONLY meeting participants.
-STRICT RULES:
-- Produce a bullet list as the only output.
-- Format every line exactly as: "- Name — Role".
-- If the role is unclear, infer only when safe; otherwise use "Role not specified".
-- Do not add any narrative or summary.
-- Do not mention actions or outcomes.""",
-    "action_items": """Extract ONLY action items.
-STRICT RULES:
-- Output only a bullet list.
-- Use this exact format for each bullet:
-  "- Action: <description>; Owner: <person or team>; Deadline: <date or 'Not specified'>"
-- Do not include any additional text.
-- Include items that are explicitly stated or clearly implied.
-- Avoid any inference beyond what is safely justified.""",
 }
 
 # System prompts for different input types