From 05af000ca38870bc64c2df1c0f86cafc30ec13de Mon Sep 17 00:00:00 2001 From: Acuspeedster Date: Sun, 1 Mar 2026 17:24:55 +0530 Subject: [PATCH 1/4] feat(tests): register AppError exception handler in FastAPI app and template tests with detailed assertions and mock setups --- Makefile | 2 +- api/main.py | 3 ++ tests/test_forms.py | 92 ++++++++++++++++++++++++++++++----------- tests/test_templates.py | 70 +++++++++++++++++++++++-------- 4 files changed, 125 insertions(+), 42 deletions(-) diff --git a/Makefile b/Makefile index 53eb56a..90fae26 100644 --- a/Makefile +++ b/Makefile @@ -55,7 +55,7 @@ pull-model: docker compose exec ollama ollama pull mistral test: - docker compose exec app python3 -m pytest src/test/ + docker compose exec app python3 -m pytest tests/ -v clean: docker compose down -v diff --git a/api/main.py b/api/main.py index d0b8c79..4b996ac 100644 --- a/api/main.py +++ b/api/main.py @@ -1,7 +1,10 @@ from fastapi import FastAPI from api.routes import templates, forms +from api.errors.handlers import register_exception_handlers app = FastAPI() +register_exception_handlers(app) + app.include_router(templates.router) app.include_router(forms.router) \ No newline at end of file diff --git a/tests/test_forms.py b/tests/test_forms.py index 8f432bf..756bb1b 100644 --- a/tests/test_forms.py +++ b/tests/test_forms.py @@ -1,25 +1,69 @@ +from unittest.mock import patch + + def test_submit_form(client): - pass - # First create a template - # form_payload = { - # "template_id": 3, - # "input_text": "Hi. The employee's name is John Doe. His job title is managing director. His department supervisor is Jane Doe. His phone number is 123456. His email is jdoe@ucsc.edu. The signature is , and the date is 01/02/2005", - # } - - # template_res = client.post("/templates/", json=template_payload) - # template_id = template_res.json()["id"] - - # # Submit a form - # form_payload = { - # "template_id": template_id, - # "data": {"rating": 5, "comment": "Great service"}, - # } - - # response = client.post("/forms/", json=form_payload) - - # assert response.status_code == 200 - - # data = response.json() - # assert data["id"] is not None - # assert data["template_id"] == template_id - # assert data["data"] == form_payload["data"] + # Step 1: Create a template first + with patch("api.routes.templates.Controller") as MockController: + MockController.return_value.create_template.return_value = "src/inputs/file_template.pdf" + + template_payload = { + "name": "Test Template", + "pdf_path": "src/inputs/file.pdf", + "fields": { + "reporting_officer": "string", + "incident_location": "string", + "amount_of_victims": "string", + "victim_name_s": "string", + "assisting_officer": "string", + }, + } + template_res = client.post("/templates/create", json=template_payload) + assert template_res.status_code == 200 + template_id = template_res.json()["id"] + + # Step 2: Fill form using that template + with patch("api.routes.forms.Controller") as MockController: + MockController.return_value.fill_form.return_value = "src/outputs/filled_test.pdf" + + form_payload = { + "template_id": template_id, + "input_text": ( + "Officer Voldemort here, at an incident reported at 456 Oak Street. " + "Two victims, Mark Smith and Jane Doe. " + "Handed off to Sheriff's Deputy Alvarez. End of transmission." + ), + } + + response = client.post("/forms/fill", json=form_payload) + + assert response.status_code == 200 + data = response.json() + assert data["template_id"] == template_id + assert data["output_pdf_path"] == "src/outputs/filled_test.pdf" + assert data["input_text"] == form_payload["input_text"] + assert "id" in data + + +def test_submit_form_invalid_template(client): + with patch("api.routes.forms.Controller") as MockController: + MockController.return_value.fill_form.return_value = "src/outputs/filled_test.pdf" + + form_payload = { + "template_id": 99999, + "input_text": "Some random incident text here.", + } + + response = client.post("/forms/fill", json=form_payload) + assert response.status_code == 404 + + +def test_submit_form_missing_input_text(client): + with patch("api.routes.forms.Controller") as MockController: + MockController.return_value.fill_form.return_value = "src/outputs/filled_test.pdf" + + form_payload = { + "template_id": 1, + } + + response = client.post("/forms/fill", json=form_payload) + assert response.status_code == 422 diff --git a/tests/test_templates.py b/tests/test_templates.py index bbced2b..fef0f86 100644 --- a/tests/test_templates.py +++ b/tests/test_templates.py @@ -1,18 +1,54 @@ +from unittest.mock import patch + + def test_create_template(client): - payload = { - "name": "Template 1", - "pdf_path": "src/inputs/file.pdf", - "fields": { - "Employee's name": "string", - "Employee's job title": "string", - "Employee's department supervisor": "string", - "Employee's phone number": "string", - "Employee's email": "string", - "Signature": "string", - "Date": "string", - }, - } - - response = client.post("/templates/create", json=payload) - - assert response.status_code == 200 + with patch("api.routes.templates.Controller") as MockController: + MockController.return_value.create_template.return_value = "src/inputs/file_template.pdf" + + payload = { + "name": "Template 1", + "pdf_path": "src/inputs/file.pdf", + "fields": { + "Employee's name": "string", + "Employee's job title": "string", + "Employee's department supervisor": "string", + "Employee's phone number": "string", + "Employee's email": "string", + "Signature": "string", + "Date": "string", + }, + } + + response = client.post("/templates/create", json=payload) + + assert response.status_code == 200 + data = response.json() + assert data["name"] == "Template 1" + assert data["pdf_path"] == "src/inputs/file_template.pdf" + assert "id" in data + + +def test_create_template_missing_name(client): + with patch("api.routes.templates.Controller") as MockController: + MockController.return_value.create_template.return_value = "src/inputs/file_template.pdf" + + payload = { + "pdf_path": "src/inputs/file.pdf", + "fields": {"Employee's name": "string"}, + } + + response = client.post("/templates/create", json=payload) + assert response.status_code == 422 + + +def test_create_template_missing_fields(client): + with patch("api.routes.templates.Controller") as MockController: + MockController.return_value.create_template.return_value = "src/inputs/file_template.pdf" + + payload = { + "name": "Bad Template", + "pdf_path": "src/inputs/file.pdf", + } + + response = client.post("/templates/create", json=payload) + assert response.status_code == 422 From 42b51f0bf0bd64b570bf81b8beb86a2e93f3ab02 Mon Sep 17 00:00:00 2001 From: Acuspeedster Date: Sun, 1 Mar 2026 17:27:10 +0530 Subject: [PATCH 2/4] feat(llm): implement batch processing for LLM field extraction to reduce API calls and improve performance --- src/filler.py | 7 +- src/llm.py | 90 +++++++++++++++++++++++ tests/test_llm.py | 177 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 272 insertions(+), 2 deletions(-) create mode 100644 tests/test_llm.py diff --git a/src/filler.py b/src/filler.py index e31e535..b1f5efb 100644 --- a/src/filler.py +++ b/src/filler.py @@ -19,8 +19,11 @@ def fill_form(self, pdf_form: str, llm: LLM): + "_filled.pdf" ) - # Generate dictionary of answers from your original function - t2j = llm.main_loop() + # Generate dictionary of answers from your original function. + # main_loop_batch() extracts all fields in a single LLM call instead of + # one call per field, significantly reducing latency for large forms. + # Falls back to the sequential main_loop() if the LLM returns invalid JSON. + t2j = llm.main_loop_batch() textbox_answers = t2j.get_data() # This is a dictionary answers_list = list(textbox_answers.values()) diff --git a/src/llm.py b/src/llm.py index 70937f9..2be2417 100644 --- a/src/llm.py +++ b/src/llm.py @@ -131,5 +131,95 @@ def handle_plural_values(self, plural_value): return values + def build_batch_prompt(self): + """ + Builds a single prompt that asks the LLM to extract ALL target fields + at once and return them as a JSON object. + This replaces N sequential API calls with a single round-trip. + """ + fields_list = json.dumps(list(self._target_fields.keys()), indent=2) + prompt = f""" +SYSTEM PROMPT: +You are an AI assistant that extracts structured data from incident transcriptions. +Extract values for ALL of the following JSON fields from the text below. +Return ONLY a valid JSON object with no extra explanation, commentary, or markdown fences. +If a field is plural and multiple values exist in the text, use a list of strings. +If a value cannot be found in the text, use null. + +FIELDS TO EXTRACT: +{fields_list} + +TEXT: +{self._transcript_text} + +OUTPUT FORMAT: +{{ + "field_name": "extracted value or null", + ... +}} +""" + return prompt + + def main_loop_batch(self): + """ + Single-call extraction — replaces the N sequential calls in main_loop(). + Sends one prompt containing all target fields and parses the JSON response. + Falls back to main_loop() if the LLM does not return valid JSON. + """ + prompt = self.build_batch_prompt() + ollama_host = os.getenv("OLLAMA_HOST", "http://localhost:11434").rstrip("/") + ollama_url = f"{ollama_host}/api/generate" + + payload = { + "model": "mistral", + "prompt": prompt, + "stream": False, + } + + try: + response = requests.post(ollama_url, json=payload) + response.raise_for_status() + except requests.exceptions.ConnectionError: + raise ConnectionError( + f"Could not connect to Ollama at {ollama_url}. " + "Please ensure Ollama is running and accessible." + ) + except requests.exceptions.HTTPError as e: + raise RuntimeError(f"Ollama returned an error: {e}") + + raw = response.json()["response"].strip() + + # Strip markdown code fences if the model wrapped the output + if raw.startswith("```"): + parts = raw.split("```") + # parts[1] is the fenced block; drop a leading "json" language tag if present + raw = parts[1].lstrip("json").strip() + + try: + extracted = json.loads(raw) + except json.JSONDecodeError as e: + print( + f"\t[WARN] main_loop_batch: LLM did not return valid JSON ({e}). " + "Falling back to sequential main_loop()." + ) + return self.main_loop() + + # Populate self._json using the existing add_response_to_json logic + for field in self._target_fields.keys(): + value = extracted.get(field) + if value is None: + self.add_response_to_json(field, "-1") + elif isinstance(value, list): + self.add_response_to_json(field, "; ".join(str(v) for v in value)) + else: + self.add_response_to_json(field, str(value)) + + print("----------------------------------") + print("\t[LOG] Resulting JSON created from the input text (batch mode):") + print(json.dumps(self._json, indent=2)) + print("--------- extracted data ---------") + + return self + def get_data(self): return self._json diff --git a/tests/test_llm.py b/tests/test_llm.py new file mode 100644 index 0000000..bfd1b05 --- /dev/null +++ b/tests/test_llm.py @@ -0,0 +1,177 @@ +import json +from unittest.mock import patch, MagicMock +from src.llm import LLM + + +SAMPLE_TRANSCRIPT = ( + "Officer Voldemort here, at an incident reported at 456 Oak Street. " + "Two victims, Mark Smith and Jane Doe. " + "Handed off to Sheriff's Deputy Alvarez. End of transmission." +) + +SAMPLE_FIELDS = { + "reporting_officer": "string", + "incident_location": "string", + "victim_name_s": "string", + "assisting_officer": "string", +} + + +def _make_mock_response(payload: dict) -> MagicMock: + """Helper: build a mock requests.Response that returns payload as JSON.""" + mock_resp = MagicMock() + mock_resp.json.return_value = {"response": json.dumps(payload)} + mock_resp.raise_for_status = MagicMock() + return mock_resp + + +# --------------------------------------------------------------------------- +# build_batch_prompt +# --------------------------------------------------------------------------- + +def test_build_batch_prompt_contains_all_fields(): + llm = LLM(transcript_text=SAMPLE_TRANSCRIPT, target_fields=SAMPLE_FIELDS) + prompt = llm.build_batch_prompt() + + for field in SAMPLE_FIELDS: + assert field in prompt, f"Expected field '{field}' in batch prompt" + + assert SAMPLE_TRANSCRIPT in prompt + + +def test_build_batch_prompt_contains_transcript(): + llm = LLM(transcript_text=SAMPLE_TRANSCRIPT, target_fields=SAMPLE_FIELDS) + prompt = llm.build_batch_prompt() + assert SAMPLE_TRANSCRIPT in prompt + + +# --------------------------------------------------------------------------- +# main_loop_batch — happy path +# --------------------------------------------------------------------------- + +def test_main_loop_batch_single_api_call(): + """main_loop_batch must call the Ollama API exactly once, regardless of field count.""" + llm_response = { + "reporting_officer": "Officer Voldemort", + "incident_location": "456 Oak Street", + "victim_name_s": ["Mark Smith", "Jane Doe"], + "assisting_officer": "Deputy Alvarez", + } + + with patch("requests.post", return_value=_make_mock_response(llm_response)) as mock_post: + llm = LLM(transcript_text=SAMPLE_TRANSCRIPT, target_fields=SAMPLE_FIELDS) + llm.main_loop_batch() + + assert mock_post.call_count == 1, ( + f"Expected exactly 1 API call, got {mock_post.call_count}. " + "main_loop_batch should not loop per-field." + ) + + +def test_main_loop_batch_populates_all_fields(): + llm_response = { + "reporting_officer": "Officer Voldemort", + "incident_location": "456 Oak Street", + "victim_name_s": None, # missing value + "assisting_officer": "Deputy Alvarez", + } + + with patch("requests.post", return_value=_make_mock_response(llm_response)): + llm = LLM(transcript_text=SAMPLE_TRANSCRIPT, target_fields=SAMPLE_FIELDS) + result = llm.main_loop_batch().get_data() + + assert result["reporting_officer"] == "Officer Voldemort" + assert result["incident_location"] == "456 Oak Street" + assert result["victim_name_s"] is None # null maps to None + assert result["assisting_officer"] == "Deputy Alvarez" + + +def test_main_loop_batch_handles_list_values(): + """Plural values returned as a JSON list should be joined into '; ' separated string.""" + llm_response = { + "reporting_officer": "Officer Voldemort", + "incident_location": "456 Oak Street", + "victim_name_s": ["Mark Smith", "Jane Doe"], + "assisting_officer": "Deputy Alvarez", + } + + with patch("requests.post", return_value=_make_mock_response(llm_response)): + llm = LLM(transcript_text=SAMPLE_TRANSCRIPT, target_fields=SAMPLE_FIELDS) + result = llm.main_loop_batch().get_data() + + assert result["victim_name_s"] == ["Mark Smith", "Jane Doe"] + + +# --------------------------------------------------------------------------- +# main_loop_batch — markdown code-fence stripping +# --------------------------------------------------------------------------- + +def test_main_loop_batch_strips_markdown_fences(): + raw_with_fences = ( + "```json\n" + + json.dumps({ + "reporting_officer": "Officer Voldemort", + "incident_location": "456 Oak Street", + "victim_name_s": None, + "assisting_officer": "Deputy Alvarez", + }) + + "\n```" + ) + + mock_resp = MagicMock() + mock_resp.json.return_value = {"response": raw_with_fences} + mock_resp.raise_for_status = MagicMock() + + with patch("requests.post", return_value=mock_resp): + llm = LLM(transcript_text=SAMPLE_TRANSCRIPT, target_fields=SAMPLE_FIELDS) + result = llm.main_loop_batch().get_data() + + assert result["reporting_officer"] == "Officer Voldemort" + + +# --------------------------------------------------------------------------- +# main_loop_batch — fallback to sequential main_loop on bad JSON +# --------------------------------------------------------------------------- + +def test_main_loop_batch_falls_back_on_invalid_json(): + """If the LLM returns garbage instead of JSON, fall back to main_loop().""" + bad_resp = MagicMock() + bad_resp.json.return_value = {"response": "Sorry, I cannot help with that."} + bad_resp.raise_for_status = MagicMock() + + with patch("requests.post", return_value=bad_resp): + with patch.object(LLM, "main_loop", return_value=MagicMock()) as mock_fallback: + llm = LLM(transcript_text=SAMPLE_TRANSCRIPT, target_fields=SAMPLE_FIELDS) + llm.main_loop_batch() + mock_fallback.assert_called_once() + + +# --------------------------------------------------------------------------- +# main_loop_batch vs main_loop — call count comparison +# --------------------------------------------------------------------------- + +def test_main_loop_batch_fewer_calls_than_main_loop(): + """ + Explicitly show that main_loop_batch makes 1 call while main_loop + makes len(fields) calls — the core performance improvement. + """ + n_fields = len(SAMPLE_FIELDS) + llm_response = {k: "value" for k in SAMPLE_FIELDS} + + with patch("requests.post", return_value=_make_mock_response(llm_response)) as mock_post: + llm = LLM(transcript_text=SAMPLE_TRANSCRIPT, target_fields=SAMPLE_FIELDS) + llm.main_loop_batch() + batch_calls = mock_post.call_count + + single_resp = MagicMock() + single_resp.json.return_value = {"response": "some value"} + single_resp.raise_for_status = MagicMock() + + with patch("requests.post", return_value=single_resp) as mock_post: + llm2 = LLM(transcript_text=SAMPLE_TRANSCRIPT, target_fields=SAMPLE_FIELDS) + llm2.main_loop() + sequential_calls = mock_post.call_count + + assert batch_calls == 1 + assert sequential_calls == n_fields + assert batch_calls < sequential_calls From f3a565412154613144790b6c1f32c89ee334314b Mon Sep 17 00:00:00 2001 From: Acuspeedster Date: Sun, 1 Mar 2026 17:39:11 +0530 Subject: [PATCH 3/4] feat: add pytest configuration and lazy import for commonforms in FileManipulator --- pytest.ini | 3 +++ src/file_manipulator.py | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 pytest.ini diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..4584de7 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +testpaths = tests +pythonpath = . diff --git a/src/file_manipulator.py b/src/file_manipulator.py index b7815cc..63b1cb0 100644 --- a/src/file_manipulator.py +++ b/src/file_manipulator.py @@ -1,7 +1,6 @@ import os from src.filler import Filler from src.llm import LLM -from commonforms import prepare_form class FileManipulator: @@ -12,7 +11,9 @@ def __init__(self): def create_template(self, pdf_path: str): """ By using commonforms, we create an editable .pdf template and we store it. + Lazy import prevents ultralytics/YOLO from loading during test collection. """ + from commonforms import prepare_form # lazy import template_path = pdf_path[:-4] + "_template.pdf" prepare_form(pdf_path, template_path) return template_path From 56f60c7c33fcc772b64445cf6b55c13ff255b6da Mon Sep 17 00:00:00 2001 From: Acuspeedster Date: Tue, 3 Mar 2026 00:08:34 +0530 Subject: [PATCH 4/4] fix: replace deprecated datetime.utcnow() with datetime.now(timezone.utc) datetime.utcnow() is deprecated since Python 3.12 and will be removed in a future release. It returns a naive datetime with no timezone info, which can cause silent bugs in comparisons. Replace both default_factory calls in Template and FormSubmission with a private _utcnow() helper that returns a timezone-aware UTC datetime via datetime.now(timezone.utc). --- api/db/models.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/api/db/models.py b/api/db/models.py index f76c93b..6de9866 100644 --- a/api/db/models.py +++ b/api/db/models.py @@ -1,13 +1,19 @@ from sqlmodel import SQLModel, Field from sqlalchemy import Column, JSON -from datetime import datetime +from datetime import datetime, timezone + + +def _utcnow() -> datetime: + """Return the current UTC time as a timezone-aware datetime (Python 3.12+ safe).""" + return datetime.now(timezone.utc) + class Template(SQLModel, table=True): id: int | None = Field(default=None, primary_key=True) name: str fields: dict = Field(sa_column=Column(JSON)) pdf_path: str - created_at: datetime = Field(default_factory=datetime.utcnow) + created_at: datetime = Field(default_factory=_utcnow) class FormSubmission(SQLModel, table=True): @@ -15,4 +21,4 @@ class FormSubmission(SQLModel, table=True): template_id: int input_text: str output_pdf_path: str - created_at: datetime = Field(default_factory=datetime.utcnow) \ No newline at end of file + created_at: datetime = Field(default_factory=_utcnow) \ No newline at end of file