In [1]:
!pip install openai pypdf markdown

Collecting openai
  Downloading openai-2.8.1-py3-none-any.whl.metadata (29 kB)
Collecting pypdf
  Downloading pypdf-6.4.0-py3-none-any.whl.metadata (7.1 kB)
Collecting markdown
  Downloading markdown-3.10-py3-none-any.whl.metadata (5.1 kB)
Collecting distro<2,>=1.7.0 (from openai)
  Downloading distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)
Collecting jiter<1,>=0.10.0 (from openai)
  Downloading jiter-0.12.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (5.2 kB)
Collecting pydantic<3,>=1.9.0 (from openai)
  Downloading pydantic-2.12.4-py3-none-any.whl.metadata (89 kB)
Collecting tqdm>4 (from openai)
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting annotated-types>=0.6.0 (from pydantic<3,>=1.9.0->openai)
  Downloading annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)
Collecting pydantic-core==2.41.5 (from pydantic<3,>=1.9.0->openai)
  Downloading pydantic_core-2.41.5-cp310-cp310-macosx_11_0_arm64.whl.metadata (7.3 kB)
Collecting typing-inspection>=0.4.2 (from 

In [3]:
import os
from openai import OpenAI

os.environ["OPENAI_API_KEY"] = "My_OPEN_API_KEY"

client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])

In [4]:
resp = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[{"role": "user", "content": "Hello from Jupyter!"}]
)
print(resp.choices[0].message.content)

Hello! How can I assist you today in Jupyter? If you have any questions or need help with your code, feel free to ask!


In [8]:
from pathlib import Path
from pypdf import PdfReader
from typing import List, Dict
import json, textwrap, markdown

def read_pdf_file(path: Path, max_chars: int = 20000) -> str:
    reader = PdfReader(str(path))
    texts = []
    total = 0
    for page in reader.pages:
        page_text = page.extract_text() or ""
        if not page_text:
            continue
        if total + len(page_text) > max_chars:
            page_text = page_text[: max_chars - total]
        texts.append(page_text)
        total += len(page_text)
        if total >= max_chars:
            break
    return "\n".join(texts)

FAQ_SYSTEM_PROMPT = """You are an AI assistant that generates helpful, clear FAQs (question–answer pairs) from documents.

Given a document, you will:
- Identify 3–7 of the most important, likely user questions.
- Provide concise, accurate answers based ONLY on the given text.
- Avoid speculation.
Return valid JSON:
[
  {"question": "...", "answer": "..."},
  ...
]
"""

FAQ_USER_PROMPT_TEMPLATE = """DOCUMENT:

{chunk}

Based ONLY on this document content, generate 3–7 FAQ-style question and answer pairs.
Return ONLY valid JSON in the specified format.
"""

def call_llm_generate_faqs_one_chunk(text: str) -> List[Dict[str, str]]:
    user_prompt = FAQ_USER_PROMPT_TEMPLATE.format(chunk=text)

    resp = client.chat.completions.create(
        model="gpt-4o-mini", 
        messages=[
            {"role": "system", "content": FAQ_SYSTEM_PROMPT},
            {"role": "user", "content": user_prompt},
        ],
        temperature=0.3,
    )

    raw = resp.choices[0].message.content.strip()

    try:
        start = raw.index("[")
        end = raw.rindex("]") + 1
        raw_json = raw[start:end]
    except ValueError:
        raw_json = raw

    try:
        data = json.loads(raw_json)
    except json.JSONDecodeError:
        print("JSON decode failed. Raw output:")
        print(raw)
        return []

    faqs: List[Dict[str, str]] = []
    if isinstance(data, list):
        for item in data:
            q = str(item.get("question", "")).strip()
            a = str(item.get("answer", "")).strip()
            if q and a:
                faqs.append({"question": q, "answer": a})
    return faqs


In [7]:
docs_dir = Path("documents")

doc_path = docs_dir / "LLM as a judge.pdf"  
text = read_pdf_file(doc_path, max_chars=20000)
print("Loaded text length:", len(text))

faqs = call_llm_generate_faqs_one_chunk(text)

for i, faq in enumerate(faqs, 1):
    print(f"\nQ{i}: {faq['question']}\nA{i}: {faq['answer']}\n")


Loaded text length: 20006

Q1: What is LLM-as-a-Judge?
A1: LLM-as-a-Judge refers to the use of Large Language Models (LLMs) to evaluate objects, actions, or decisions based on predefined rules, criteria, or preferences, encompassing roles such as graders, evaluators, critics, and verifiers.


Q2: What are the main challenges in implementing LLM-as-a-Judge systems?
A2: The main challenges include the absence of a systematic review leading to fragmented understanding and inconsistent practices, as well as ensuring the reliability of evaluations aligned with established standards.


Q3: How does the paper propose to enhance the reliability of LLM-as-a-Judge systems?
A3: The paper explores strategies to enhance reliability by improving consistency, mitigating biases, and adapting to diverse assessment scenarios, along with proposing methodologies for evaluating the reliability of these systems.


Q4: What methodologies are suggested for evaluating LLM-as-a-Judge systems?
A4: The paper exam

In [None]:
##another documents

In [10]:
from pathlib import Path
import textwrap, markdown

output_dir = Path("faq_outputs")
output_dir.mkdir(exist_ok=True)

def faqs_to_markdown(title: str, faqs: List[Dict[str, str]]) -> str:
    lines = [f"# FAQs for {title}", ""]
    for idx, faq in enumerate(faqs, start=1):
        lines.append(f"## Q{idx}. {faq['question']}")
        lines.append("")
        wrapped_answer = textwrap.fill(faq["answer"], width=80)
        lines.append(wrapped_answer)
        lines.append("")
    return "\n".join(lines)

def save_markdown_and_html(out_dir: Path, doc_name: str, faqs: List[Dict[str, str]]):
    out_dir.mkdir(parents=True, exist_ok=True)

    md_text = faqs_to_markdown(doc_name, faqs)
    md_path = out_dir / f"{doc_name}_faqs.md"
    md_path.write_text(md_text, encoding="utf-8")
    print("Markdown saved to:", md_path)

    html_body = markdown.markdown(md_text)
    html_template = f"""<!DOCTYPE html>
<html>
<head>
  <meta charset="utf-8">
  <title>FAQs for {doc_name}</title>
</head>
<body>
{html_body}
</body>
</html>
"""
    html_path = out_dir / f"{doc_name}_faqs.html"
    html_path.write_text(html_template, encoding="utf-8")
    print("HTML saved to:", html_path)


In [12]:
docs_dir = Path("documents")
output_dir = Path("faq_outputs")

doc_files = [
    "LLM as a judge.pdf",
    "Generative AI project ideas.pdf",
    "Module 7 Homework.pdf",
    "NeurIPS-2023-judging-llm-as-a-judge-with-mt-bench-and-chatbot-arena-Paper-Datasets_and_Benchmarks.pdf",
]

for fname in doc_files:
    doc_path = docs_dir / fname
    print(f"\n=== Processing: {fname} ===")
    text = read_pdf_file(doc_path, max_chars=20000)
    print("Loaded text length:", len(text))

    faqs = call_llm_generate_faqs_one_chunk(text)
    print(f"Generated {len(faqs)} FAQs.")

    save_markdown_and_html(output_dir, doc_path.stem, faqs)



=== Processing: LLM as a judge.pdf ===
Loaded text length: 20006
Generated 6 FAQs.
Markdown saved to: faq_outputs/LLM as a judge_faqs.md
HTML saved to: faq_outputs/LLM as a judge_faqs.html

=== Processing: Generative AI project ideas.pdf ===
Loaded text length: 13950
Generated 7 FAQs.
Markdown saved to: faq_outputs/Generative AI project ideas_faqs.md
HTML saved to: faq_outputs/Generative AI project ideas_faqs.html

=== Processing: Module 7 Homework.pdf ===
Loaded text length: 5385
Generated 7 FAQs.
Markdown saved to: faq_outputs/Module 7 Homework_faqs.md
HTML saved to: faq_outputs/Module 7 Homework_faqs.html

=== Processing: NeurIPS-2023-judging-llm-as-a-judge-with-mt-bench-and-chatbot-arena-Paper-Datasets_and_Benchmarks.pdf ===
Loaded text length: 20004
Generated 7 FAQs.
Markdown saved to: faq_outputs/NeurIPS-2023-judging-llm-as-a-judge-with-mt-bench-and-chatbot-arena-Paper-Datasets_and_Benchmarks_faqs.md
HTML saved to: faq_outputs/NeurIPS-2023-judging-llm-as-a-judge-with-mt-bench-an