From 05af000ca38870bc64c2df1c0f86cafc30ec13de Mon Sep 17 00:00:00 2001
From: Acuspeedster <arnavrajsingh@gmail.com>
Date: Sun, 1 Mar 2026 17:24:55 +0530
Subject: [PATCH 1/4] feat(tests): register AppError exception handler in
 FastAPI app and template tests with detailed assertions and mock setups

---
 Makefile                |  2 +-
 api/main.py             |  3 ++
 tests/test_forms.py     | 92 ++++++++++++++++++++++++++++++-----------
 tests/test_templates.py | 70 +++++++++++++++++++++++--------
 4 files changed, 125 insertions(+), 42 deletions(-)

diff --git a/Makefile b/Makefile
index 53eb56a..90fae26 100644
--- a/Makefile
+++ b/Makefile
@@ -55,7 +55,7 @@ pull-model:
 	docker compose exec ollama ollama pull mistral
 
 test:
-	docker compose exec app python3 -m pytest src/test/
+	docker compose exec app python3 -m pytest tests/ -v
 
 clean:
 	docker compose down -v
diff --git a/api/main.py b/api/main.py
index d0b8c79..4b996ac 100644
--- a/api/main.py
+++ b/api/main.py
@@ -1,7 +1,10 @@
 from fastapi import FastAPI
 from api.routes import templates, forms
+from api.errors.handlers import register_exception_handlers
 
 app = FastAPI()
 
+register_exception_handlers(app)
+
 app.include_router(templates.router)
 app.include_router(forms.router)
\ No newline at end of file
diff --git a/tests/test_forms.py b/tests/test_forms.py
index 8f432bf..756bb1b 100644
--- a/tests/test_forms.py
+++ b/tests/test_forms.py
@@ -1,25 +1,69 @@
+from unittest.mock import patch
+
+
 def test_submit_form(client):
-    pass
-    # First create a template
-    # form_payload = {
-    #     "template_id": 3,
-    #     "input_text": "Hi. The employee's name is John Doe. His job title is managing director. His department supervisor is Jane Doe. His phone number is 123456. His email is jdoe@ucsc.edu. The signature is <Mamañema>, and the date is 01/02/2005",
-    # }
-
-    # template_res = client.post("/templates/", json=template_payload)
-    # template_id = template_res.json()["id"]
-
-    # # Submit a form
-    # form_payload = {
-    #     "template_id": template_id,
-    #     "data": {"rating": 5, "comment": "Great service"},
-    # }
-
-    # response = client.post("/forms/", json=form_payload)
-
-    # assert response.status_code == 200
-
-    # data = response.json()
-    # assert data["id"] is not None
-    # assert data["template_id"] == template_id
-    # assert data["data"] == form_payload["data"]
+    # Step 1: Create a template first
+    with patch("api.routes.templates.Controller") as MockController:
+        MockController.return_value.create_template.return_value = "src/inputs/file_template.pdf"
+
+        template_payload = {
+            "name": "Test Template",
+            "pdf_path": "src/inputs/file.pdf",
+            "fields": {
+                "reporting_officer": "string",
+                "incident_location": "string",
+                "amount_of_victims": "string",
+                "victim_name_s": "string",
+                "assisting_officer": "string",
+            },
+        }
+        template_res = client.post("/templates/create", json=template_payload)
+        assert template_res.status_code == 200
+        template_id = template_res.json()["id"]
+
+    # Step 2: Fill form using that template
+    with patch("api.routes.forms.Controller") as MockController:
+        MockController.return_value.fill_form.return_value = "src/outputs/filled_test.pdf"
+
+        form_payload = {
+            "template_id": template_id,
+            "input_text": (
+                "Officer Voldemort here, at an incident reported at 456 Oak Street. "
+                "Two victims, Mark Smith and Jane Doe. "
+                "Handed off to Sheriff's Deputy Alvarez. End of transmission."
+            ),
+        }
+
+        response = client.post("/forms/fill", json=form_payload)
+
+        assert response.status_code == 200
+        data = response.json()
+        assert data["template_id"] == template_id
+        assert data["output_pdf_path"] == "src/outputs/filled_test.pdf"
+        assert data["input_text"] == form_payload["input_text"]
+        assert "id" in data
+
+
+def test_submit_form_invalid_template(client):
+    with patch("api.routes.forms.Controller") as MockController:
+        MockController.return_value.fill_form.return_value = "src/outputs/filled_test.pdf"
+
+        form_payload = {
+            "template_id": 99999,
+            "input_text": "Some random incident text here.",
+        }
+
+        response = client.post("/forms/fill", json=form_payload)
+        assert response.status_code == 404
+
+
+def test_submit_form_missing_input_text(client):
+    with patch("api.routes.forms.Controller") as MockController:
+        MockController.return_value.fill_form.return_value = "src/outputs/filled_test.pdf"
+
+        form_payload = {
+            "template_id": 1,
+        }
+
+        response = client.post("/forms/fill", json=form_payload)
+        assert response.status_code == 422
diff --git a/tests/test_templates.py b/tests/test_templates.py
index bbced2b..fef0f86 100644
--- a/tests/test_templates.py
+++ b/tests/test_templates.py
@@ -1,18 +1,54 @@
+from unittest.mock import patch
+
+
 def test_create_template(client):
-    payload = {
-        "name": "Template 1",
-        "pdf_path": "src/inputs/file.pdf",
-        "fields": {
-            "Employee's name": "string",
-            "Employee's job title": "string",
-            "Employee's department supervisor": "string",
-            "Employee's phone number": "string",
-            "Employee's email": "string",
-            "Signature": "string",
-            "Date": "string",
-        },
-    }
-
-    response = client.post("/templates/create", json=payload)
-
-    assert response.status_code == 200
+    with patch("api.routes.templates.Controller") as MockController:
+        MockController.return_value.create_template.return_value = "src/inputs/file_template.pdf"
+
+        payload = {
+            "name": "Template 1",
+            "pdf_path": "src/inputs/file.pdf",
+            "fields": {
+                "Employee's name": "string",
+                "Employee's job title": "string",
+                "Employee's department supervisor": "string",
+                "Employee's phone number": "string",
+                "Employee's email": "string",
+                "Signature": "string",
+                "Date": "string",
+            },
+        }
+
+        response = client.post("/templates/create", json=payload)
+
+        assert response.status_code == 200
+        data = response.json()
+        assert data["name"] == "Template 1"
+        assert data["pdf_path"] == "src/inputs/file_template.pdf"
+        assert "id" in data
+
+
+def test_create_template_missing_name(client):
+    with patch("api.routes.templates.Controller") as MockController:
+        MockController.return_value.create_template.return_value = "src/inputs/file_template.pdf"
+
+        payload = {
+            "pdf_path": "src/inputs/file.pdf",
+            "fields": {"Employee's name": "string"},
+        }
+
+        response = client.post("/templates/create", json=payload)
+        assert response.status_code == 422
+
+
+def test_create_template_missing_fields(client):
+    with patch("api.routes.templates.Controller") as MockController:
+        MockController.return_value.create_template.return_value = "src/inputs/file_template.pdf"
+
+        payload = {
+            "name": "Bad Template",
+            "pdf_path": "src/inputs/file.pdf",
+        }
+
+        response = client.post("/templates/create", json=payload)
+        assert response.status_code == 422

From 42b51f0bf0bd64b570bf81b8beb86a2e93f3ab02 Mon Sep 17 00:00:00 2001
From: Acuspeedster <arnavrajsingh@gmail.com>
Date: Sun, 1 Mar 2026 17:27:10 +0530
Subject: [PATCH 2/4] feat(llm): implement batch processing for LLM field
 extraction to reduce API calls and improve performance

---
 src/filler.py     |   7 +-
 src/llm.py        |  90 +++++++++++++++++++++++
 tests/test_llm.py | 177 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 272 insertions(+), 2 deletions(-)
 create mode 100644 tests/test_llm.py

diff --git a/src/filler.py b/src/filler.py
index e31e535..b1f5efb 100644
--- a/src/filler.py
+++ b/src/filler.py
@@ -19,8 +19,11 @@ def fill_form(self, pdf_form: str, llm: LLM):
             + "_filled.pdf"
         )
 
-        # Generate dictionary of answers from your original function
-        t2j = llm.main_loop()
+        # Generate dictionary of answers from your original function.
+        # main_loop_batch() extracts all fields in a single LLM call instead of
+        # one call per field, significantly reducing latency for large forms.
+        # Falls back to the sequential main_loop() if the LLM returns invalid JSON.
+        t2j = llm.main_loop_batch()
         textbox_answers = t2j.get_data()  # This is a dictionary
 
         answers_list = list(textbox_answers.values())
diff --git a/src/llm.py b/src/llm.py
index 70937f9..2be2417 100644
--- a/src/llm.py
+++ b/src/llm.py
@@ -131,5 +131,95 @@ def handle_plural_values(self, plural_value):
 
         return values
 
+    def build_batch_prompt(self):
+        """
+        Builds a single prompt that asks the LLM to extract ALL target fields
+        at once and return them as a JSON object.
+        This replaces N sequential API calls with a single round-trip.
+        """
+        fields_list = json.dumps(list(self._target_fields.keys()), indent=2)
+        prompt = f"""
+SYSTEM PROMPT:
+You are an AI assistant that extracts structured data from incident transcriptions.
+Extract values for ALL of the following JSON fields from the text below.
+Return ONLY a valid JSON object with no extra explanation, commentary, or markdown fences.
+If a field is plural and multiple values exist in the text, use a list of strings.
+If a value cannot be found in the text, use null.
+
+FIELDS TO EXTRACT:
+{fields_list}
+
+TEXT:
+{self._transcript_text}
+
+OUTPUT FORMAT:
+{{
+  "field_name": "extracted value or null",
+  ...
+}}
+"""
+        return prompt
+
+    def main_loop_batch(self):
+        """
+        Single-call extraction — replaces the N sequential calls in main_loop().
+        Sends one prompt containing all target fields and parses the JSON response.
+        Falls back to main_loop() if the LLM does not return valid JSON.
+        """
+        prompt = self.build_batch_prompt()
+        ollama_host = os.getenv("OLLAMA_HOST", "http://localhost:11434").rstrip("/")
+        ollama_url = f"{ollama_host}/api/generate"
+
+        payload = {
+            "model": "mistral",
+            "prompt": prompt,
+            "stream": False,
+        }
+
+        try:
+            response = requests.post(ollama_url, json=payload)
+            response.raise_for_status()
+        except requests.exceptions.ConnectionError:
+            raise ConnectionError(
+                f"Could not connect to Ollama at {ollama_url}. "
+                "Please ensure Ollama is running and accessible."
+            )
+        except requests.exceptions.HTTPError as e:
+            raise RuntimeError(f"Ollama returned an error: {e}")
+
+        raw = response.json()["response"].strip()
+
+        # Strip markdown code fences if the model wrapped the output
+        if raw.startswith("```"):
+            parts = raw.split("```")
+            # parts[1] is the fenced block; drop a leading "json" language tag if present
+            raw = parts[1].lstrip("json").strip()
+
+        try:
+            extracted = json.loads(raw)
+        except json.JSONDecodeError as e:
+            print(
+                f"\t[WARN] main_loop_batch: LLM did not return valid JSON ({e}). "
+                "Falling back to sequential main_loop()."
+            )
+            return self.main_loop()
+
+        # Populate self._json using the existing add_response_to_json logic
+        for field in self._target_fields.keys():
+            value = extracted.get(field)
+            if value is None:
+                self.add_response_to_json(field, "-1")
+            elif isinstance(value, list):
+                self.add_response_to_json(field, "; ".join(str(v) for v in value))
+            else:
+                self.add_response_to_json(field, str(value))
+
+        print("----------------------------------")
+        print("\t[LOG] Resulting JSON created from the input text (batch mode):")
+        print(json.dumps(self._json, indent=2))
+        print("--------- extracted data ---------")
+
+        return self
+
     def get_data(self):
         return self._json
diff --git a/tests/test_llm.py b/tests/test_llm.py
new file mode 100644
index 0000000..bfd1b05
--- /dev/null
+++ b/tests/test_llm.py
@@ -0,0 +1,177 @@
+import json
+from unittest.mock import patch, MagicMock
+from src.llm import LLM
+
+
+SAMPLE_TRANSCRIPT = (
+    "Officer Voldemort here, at an incident reported at 456 Oak Street. "
+    "Two victims, Mark Smith and Jane Doe. "
+    "Handed off to Sheriff's Deputy Alvarez. End of transmission."
+)
+
+SAMPLE_FIELDS = {
+    "reporting_officer": "string",
+    "incident_location": "string",
+    "victim_name_s": "string",
+    "assisting_officer": "string",
+}
+
+
+def _make_mock_response(payload: dict) -> MagicMock:
+    """Helper: build a mock requests.Response that returns payload as JSON."""
+    mock_resp = MagicMock()
+    mock_resp.json.return_value = {"response": json.dumps(payload)}
+    mock_resp.raise_for_status = MagicMock()
+    return mock_resp
+
+
+# ---------------------------------------------------------------------------
+# build_batch_prompt
+# ---------------------------------------------------------------------------
+
+def test_build_batch_prompt_contains_all_fields():
+    llm = LLM(transcript_text=SAMPLE_TRANSCRIPT, target_fields=SAMPLE_FIELDS)
+    prompt = llm.build_batch_prompt()
+
+    for field in SAMPLE_FIELDS:
+        assert field in prompt, f"Expected field '{field}' in batch prompt"
+
+    assert SAMPLE_TRANSCRIPT in prompt
+
+
+def test_build_batch_prompt_contains_transcript():
+    llm = LLM(transcript_text=SAMPLE_TRANSCRIPT, target_fields=SAMPLE_FIELDS)
+    prompt = llm.build_batch_prompt()
+    assert SAMPLE_TRANSCRIPT in prompt
+
+
+# ---------------------------------------------------------------------------
+# main_loop_batch — happy path
+# ---------------------------------------------------------------------------
+
+def test_main_loop_batch_single_api_call():
+    """main_loop_batch must call the Ollama API exactly once, regardless of field count."""
+    llm_response = {
+        "reporting_officer": "Officer Voldemort",
+        "incident_location": "456 Oak Street",
+        "victim_name_s": ["Mark Smith", "Jane Doe"],
+        "assisting_officer": "Deputy Alvarez",
+    }
+
+    with patch("requests.post", return_value=_make_mock_response(llm_response)) as mock_post:
+        llm = LLM(transcript_text=SAMPLE_TRANSCRIPT, target_fields=SAMPLE_FIELDS)
+        llm.main_loop_batch()
+
+        assert mock_post.call_count == 1, (
+            f"Expected exactly 1 API call, got {mock_post.call_count}. "
+            "main_loop_batch should not loop per-field."
+        )
+
+
+def test_main_loop_batch_populates_all_fields():
+    llm_response = {
+        "reporting_officer": "Officer Voldemort",
+        "incident_location": "456 Oak Street",
+        "victim_name_s": None,        # missing value
+        "assisting_officer": "Deputy Alvarez",
+    }
+
+    with patch("requests.post", return_value=_make_mock_response(llm_response)):
+        llm = LLM(transcript_text=SAMPLE_TRANSCRIPT, target_fields=SAMPLE_FIELDS)
+        result = llm.main_loop_batch().get_data()
+
+    assert result["reporting_officer"] == "Officer Voldemort"
+    assert result["incident_location"] == "456 Oak Street"
+    assert result["victim_name_s"] is None          # null maps to None
+    assert result["assisting_officer"] == "Deputy Alvarez"
+
+
+def test_main_loop_batch_handles_list_values():
+    """Plural values returned as a JSON list should be joined into '; ' separated string."""
+    llm_response = {
+        "reporting_officer": "Officer Voldemort",
+        "incident_location": "456 Oak Street",
+        "victim_name_s": ["Mark Smith", "Jane Doe"],
+        "assisting_officer": "Deputy Alvarez",
+    }
+
+    with patch("requests.post", return_value=_make_mock_response(llm_response)):
+        llm = LLM(transcript_text=SAMPLE_TRANSCRIPT, target_fields=SAMPLE_FIELDS)
+        result = llm.main_loop_batch().get_data()
+
+    assert result["victim_name_s"] == ["Mark Smith", "Jane Doe"]
+
+
+# ---------------------------------------------------------------------------
+# main_loop_batch — markdown code-fence stripping
+# ---------------------------------------------------------------------------
+
+def test_main_loop_batch_strips_markdown_fences():
+    raw_with_fences = (
+        "```json\n"
+        + json.dumps({
+            "reporting_officer": "Officer Voldemort",
+            "incident_location": "456 Oak Street",
+            "victim_name_s": None,
+            "assisting_officer": "Deputy Alvarez",
+        })
+        + "\n```"
+    )
+
+    mock_resp = MagicMock()
+    mock_resp.json.return_value = {"response": raw_with_fences}
+    mock_resp.raise_for_status = MagicMock()
+
+    with patch("requests.post", return_value=mock_resp):
+        llm = LLM(transcript_text=SAMPLE_TRANSCRIPT, target_fields=SAMPLE_FIELDS)
+        result = llm.main_loop_batch().get_data()
+
+    assert result["reporting_officer"] == "Officer Voldemort"
+
+
+# ---------------------------------------------------------------------------
+# main_loop_batch — fallback to sequential main_loop on bad JSON
+# ---------------------------------------------------------------------------
+
+def test_main_loop_batch_falls_back_on_invalid_json():
+    """If the LLM returns garbage instead of JSON, fall back to main_loop()."""
+    bad_resp = MagicMock()
+    bad_resp.json.return_value = {"response": "Sorry, I cannot help with that."}
+    bad_resp.raise_for_status = MagicMock()
+
+    with patch("requests.post", return_value=bad_resp):
+        with patch.object(LLM, "main_loop", return_value=MagicMock()) as mock_fallback:
+            llm = LLM(transcript_text=SAMPLE_TRANSCRIPT, target_fields=SAMPLE_FIELDS)
+            llm.main_loop_batch()
+            mock_fallback.assert_called_once()
+
+
+# ---------------------------------------------------------------------------
+# main_loop_batch vs main_loop — call count comparison
+# ---------------------------------------------------------------------------
+
+def test_main_loop_batch_fewer_calls_than_main_loop():
+    """
+    Explicitly show that main_loop_batch makes 1 call while main_loop
+    makes len(fields) calls — the core performance improvement.
+    """
+    n_fields = len(SAMPLE_FIELDS)
+    llm_response = {k: "value" for k in SAMPLE_FIELDS}
+
+    with patch("requests.post", return_value=_make_mock_response(llm_response)) as mock_post:
+        llm = LLM(transcript_text=SAMPLE_TRANSCRIPT, target_fields=SAMPLE_FIELDS)
+        llm.main_loop_batch()
+        batch_calls = mock_post.call_count
+
+    single_resp = MagicMock()
+    single_resp.json.return_value = {"response": "some value"}
+    single_resp.raise_for_status = MagicMock()
+
+    with patch("requests.post", return_value=single_resp) as mock_post:
+        llm2 = LLM(transcript_text=SAMPLE_TRANSCRIPT, target_fields=SAMPLE_FIELDS)
+        llm2.main_loop()
+        sequential_calls = mock_post.call_count
+
+    assert batch_calls == 1
+    assert sequential_calls == n_fields
+    assert batch_calls < sequential_calls

From f3a565412154613144790b6c1f32c89ee334314b Mon Sep 17 00:00:00 2001
From: Acuspeedster <arnavrajsingh@gmail.com>
Date: Sun, 1 Mar 2026 17:39:11 +0530
Subject: [PATCH 3/4] feat: add pytest configuration and lazy import for
 commonforms in FileManipulator

---
 pytest.ini              | 3 +++
 src/file_manipulator.py | 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)
 create mode 100644 pytest.ini

diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 0000000..4584de7
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,3 @@
+[pytest]
+testpaths = tests
+pythonpath = .
diff --git a/src/file_manipulator.py b/src/file_manipulator.py
index b7815cc..63b1cb0 100644
--- a/src/file_manipulator.py
+++ b/src/file_manipulator.py
@@ -1,7 +1,6 @@
 import os
 from src.filler import Filler
 from src.llm import LLM
-from commonforms import prepare_form
 
 
 class FileManipulator:
@@ -12,7 +11,9 @@ def __init__(self):
     def create_template(self, pdf_path: str):
         """
         By using commonforms, we create an editable .pdf template and we store it.
+        Lazy import prevents ultralytics/YOLO from loading during test collection.
         """
+        from commonforms import prepare_form  # lazy import
         template_path = pdf_path[:-4] + "_template.pdf"
         prepare_form(pdf_path, template_path)
         return template_path

From 56f60c7c33fcc772b64445cf6b55c13ff255b6da Mon Sep 17 00:00:00 2001
From: Acuspeedster <arnavrajsingh@gmail.com>
Date: Tue, 3 Mar 2026 00:08:34 +0530
Subject: [PATCH 4/4] fix: replace deprecated datetime.utcnow() with
 datetime.now(timezone.utc)

datetime.utcnow() is deprecated since Python 3.12 and will be removed in
a future release. It returns a naive datetime with no timezone info, which
can cause silent bugs in comparisons. Replace both default_factory calls
in Template and FormSubmission with a private _utcnow() helper that
returns a timezone-aware UTC datetime via datetime.now(timezone.utc).
---
 api/db/models.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/api/db/models.py b/api/db/models.py
index f76c93b..6de9866 100644
--- a/api/db/models.py
+++ b/api/db/models.py
@@ -1,13 +1,19 @@
 from sqlmodel import SQLModel, Field
 from sqlalchemy import Column, JSON
-from datetime import datetime
+from datetime import datetime, timezone
+
+
+def _utcnow() -> datetime:
+    """Return the current UTC time as a timezone-aware datetime (Python 3.12+ safe)."""
+    return datetime.now(timezone.utc)
+
 
 class Template(SQLModel, table=True):
     id: int | None = Field(default=None, primary_key=True)
     name: str
     fields: dict = Field(sa_column=Column(JSON))
     pdf_path: str
-    created_at: datetime = Field(default_factory=datetime.utcnow)
+    created_at: datetime = Field(default_factory=_utcnow)
 
 
 class FormSubmission(SQLModel, table=True):
@@ -15,4 +21,4 @@ class FormSubmission(SQLModel, table=True):
     template_id: int
     input_text: str
     output_pdf_path: str
-    created_at: datetime = Field(default_factory=datetime.utcnow)
\ No newline at end of file
+    created_at: datetime = Field(default_factory=_utcnow)
\ No newline at end of file