# Prepare data

In [4]:
from data import Data, split, export_page_as_pdf
from pathlib import Path


data_path = Path("./data")

data_path.mkdir(exist_ok=True)

data = Data.model_validate_json((data_path / "data.json").read_text())
_, valid_data = split(data.examples, only_with_refs=True)

In [5]:
# Export pages of the validation dataset as PDFs
pdfs_path = data_path / "PLOS_1000"
valid_path = data_path / "pdfs_valid"

pdfs_path.mkdir(exist_ok=True)
valid_path.mkdir(exist_ok=True)

In [None]:
for i, example in enumerate(valid_data):
    pdf_file = pdfs_path / example.file / f"{example.file}.pdf"
    output = valid_path / f"{i:03d}.pdf"
    export_page_as_pdf(pdf_file, example.page, output)

In [6]:
output_path = data_path / "gemini_valid"
output_path.mkdir(exist_ok=True)

# Inference

In [None]:
import pymupdf
from llamore import GeminiExtractor, References
import tempfile
from pathlib import Path
from rich.progress import track

with open(".env", "r") as f:
    GEMINI_API_KEY = f.readline().split("=")[1].strip()[1:-1]

extractor = GeminiExtractor(api_key=GEMINI_API_KEY, model="gemini-2.5-flash")

In [8]:
input_files = sorted(list(valid_path.glob("*.pdf")))
input_files;

In [None]:
for i, file in track(enumerate(input_files), total=len(valid_data)):
    refs = extractor(pdf=file.absolute())
    refs.to_xml(output_path / f"{i}.xml")

# Results

In [9]:
inputs = sorted(list(output_path.glob("*.xml")), key=lambda x: int(x.stem))
gemini_references = []
for input_file in inputs:
    refs = References.from_xml(input_file)
    gemini_references.append(refs)

In [10]:
gold_references = [References(example.refs) for example in valid_data]

In [15]:
from llamore import F1

F1().compute_micro_average(gemini_references, gold_references)

Output()

{'Reference.analytic_title': {'precision': 0.3280865715691097,
  'recall': 0.3280865715691097,
  'f1': 0.3280865715691097},
 'Reference.journal_title': {'precision': 0.6438356164383562,
  'recall': 0.5985849056603774,
  'f1': 0.6203862136396968},
 'Reference.authors.Person.first_name': {'precision': 0.8390296367112811,
  'recall': 0.8264861683343143,
  'f1': 0.8327106683271066},
 'Reference.authors.Person.surname': {'precision': 0.9619138300404666,
  'recall': 0.9231296402055968,
  'f1': 0.942122748732296},
 'Reference.editors.Person.first_name': {'precision': 0.125,
  'recall': 0.14545454545454545,
  'f1': 0.13445378151260504},
 'Reference.editors.Person.surname': {'precision': 0.140625,
  'recall': 0.16363636363636364,
  'f1': 0.15126050420168066},
 'Reference.publisher': {'precision': 0.38333333333333336,
  'recall': 0.4,
  'f1': 0.39148936170212767},
 'Reference.publication_date': {'precision': 0.849169741697417,
  'recall': 0.8663529411764705,
  'f1': 0.8576752853482413},
 'Refere

In [16]:
F1().compute_macro_average(gemini_references, gold_references)

Output()

0.6381822900623727