From 923c119f6f0f92fdf8356429fa021fb368cf8983 Mon Sep 17 00:00:00 2001
From: Lochit Vinay <lochitvinay111@gmail.com>
Date: Mon, 30 Mar 2026 00:58:50 +0530
Subject: [PATCH 01/10] Optimize fill_form by removing redundant get_template
 calls

---
 api/routes/forms.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/api/routes/forms.py b/api/routes/forms.py
index f3430ed..b318520 100644
--- a/api/routes/forms.py
+++ b/api/routes/forms.py
@@ -11,13 +11,17 @@
 
 @router.post("/fill", response_model=FormFillResponse)
 def fill_form(form: FormFill, db: Session = Depends(get_db)):
-    if not get_template(db, form.template_id):
-        raise AppError("Template not found", status_code=404)
-
     fetched_template = get_template(db, form.template_id)
 
+    if not fetched_template:
+        raise AppError("Template not found", status_code=404)
+
     controller = Controller()
-    path = controller.fill_form(user_input=form.input_text, fields=fetched_template.fields, pdf_form_path=fetched_template.pdf_path)
+    path = controller.fill_form(
+        user_input=form.input_text,
+        fields=fetched_template.fields,
+        pdf_form_path=fetched_template.pdf_path
+    )
 
     submission = FormSubmission(**form.model_dump(), output_pdf_path=path)
     return create_form(db, submission)

From 12697bfdf2231bcbe8745b6cbbb45a189db60a5f Mon Sep 17 00:00:00 2001
From: Lochit Vinay <lochitvinay111@gmail.com>
Date: Mon, 30 Mar 2026 01:17:27 +0530
Subject: [PATCH 02/10] validate-input-text

---
 api/routes/forms.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/api/routes/forms.py b/api/routes/forms.py
index b318520..30ce798 100644
--- a/api/routes/forms.py
+++ b/api/routes/forms.py
@@ -11,18 +11,17 @@
 
 @router.post("/fill", response_model=FormFillResponse)
 def fill_form(form: FormFill, db: Session = Depends(get_db)):
+    if not form.input_text.strip():
+        raise AppError("Input text cannot be empty", status_code=400)
     fetched_template = get_template(db, form.template_id)
-
     if not fetched_template:
         raise AppError("Template not found", status_code=404)
-
     controller = Controller()
     path = controller.fill_form(
         user_input=form.input_text,
         fields=fetched_template.fields,
         pdf_form_path=fetched_template.pdf_path
     )
-
     submission = FormSubmission(**form.model_dump(), output_pdf_path=path)
     return create_form(db, submission)
 

From 2f479ca6329851486348c39cf3aa4aba131bdbd8 Mon Sep 17 00:00:00 2001
From: Lochit Vinay <lochitvinay111@gmail.com>
Date: Mon, 30 Mar 2026 02:31:11 +0530
Subject: [PATCH 03/10] refactor: move input_text validation to schema level

---
 api/routes/forms.py  | 2 --
 api/schemas/forms.py | 8 +++++++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/api/routes/forms.py b/api/routes/forms.py
index 30ce798..5e18013 100644
--- a/api/routes/forms.py
+++ b/api/routes/forms.py
@@ -11,8 +11,6 @@
 
 @router.post("/fill", response_model=FormFillResponse)
 def fill_form(form: FormFill, db: Session = Depends(get_db)):
-    if not form.input_text.strip():
-        raise AppError("Input text cannot be empty", status_code=400)
     fetched_template = get_template(db, form.template_id)
     if not fetched_template:
         raise AppError("Template not found", status_code=404)
diff --git a/api/schemas/forms.py b/api/schemas/forms.py
index 3cce650..bf6957e 100644
--- a/api/schemas/forms.py
+++ b/api/schemas/forms.py
@@ -1,9 +1,15 @@
-from pydantic import BaseModel
+from pydantic import BaseModel, field_validator
 
 class FormFill(BaseModel):
     template_id: int
     input_text: str
 
+    @field_validator("input_text")
+    def validate_input_text(cls, value):
+        if not value or not value.strip():
+            raise ValueError("Input text cannot be empty")
+        return value
+
 
 class FormFillResponse(BaseModel):
     id: int

From 1138766bf3ae89591eeb2ab6981f4483f58d68bb Mon Sep 17 00:00:00 2001
From: Lochit Vinay <lochitvinay111@gmail.com>
Date: Tue, 31 Mar 2026 03:59:38 +0530
Subject: [PATCH 04/10] Add centralized error handling using FastAPI exception
 handlers

---
 api/main.py | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/api/main.py b/api/main.py
index d0b8c79..cec8000 100644
--- a/api/main.py
+++ b/api/main.py
@@ -1,7 +1,56 @@
 from fastapi import FastAPI
+from api.routes import templates, forms
+from fastapi import Request
+from fastapi.responses import JSONResponse
+from fastapi.exceptions import RequestValidationError
+from starlette.exceptions import HTTPException as StarletteHTTPException
+
 from api.routes import templates, forms
 
 app = FastAPI()
 
+
+@app.exception_handler(StarletteHTTPException)
+async def http_exception_handler(request: Request, exc: StarletteHTTPException):
+    return JSONResponse(
+        status_code=exc.status_code,
+        content={
+            "error": {
+                "type": "HTTPException",
+                "message": exc.detail,
+                "details": {}
+            }
+        },
+    )
+
+
+@app.exception_handler(RequestValidationError)
+async def validation_exception_handler(request: Request, exc: RequestValidationError):
+    return JSONResponse(
+        status_code=422,
+        content={
+            "error": {
+                "type": "ValidationError",
+                "message": "Invalid request data",
+                "details": exc.errors(),
+            }
+        },
+    )
+
+
+@app.exception_handler(Exception)
+async def general_exception_handler(request: Request, exc: Exception):
+    return JSONResponse(
+        status_code=500,
+        content={
+            "error": {
+                "type": "InternalServerError",
+                "message": str(exc),
+                "details": {}
+            }
+        },
+    )
+
+
 app.include_router(templates.router)
 app.include_router(forms.router)
\ No newline at end of file

From c4d59ac242aca9af372765d5c72f931ae69ac449 Mon Sep 17 00:00:00 2001
From: Lochit Vinay <lochitvinay111@gmail.com>
Date: Tue, 31 Mar 2026 04:13:00 +0530
Subject: [PATCH 05/10] add-request-id

---
 api/main.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/api/main.py b/api/main.py
index cec8000..7111bf7 100644
--- a/api/main.py
+++ b/api/main.py
@@ -1,3 +1,5 @@
+import uuid
+from starlette.middleware.base import BaseHTTPMiddleware
 from fastapi import FastAPI
 from api.routes import templates, forms
 from fastapi import Request
@@ -5,10 +7,19 @@
 from fastapi.exceptions import RequestValidationError
 from starlette.exceptions import HTTPException as StarletteHTTPException
 
-from api.routes import templates, forms
-
 app = FastAPI()
 
+class RequestIDMiddleware(BaseHTTPMiddleware):
+    async def dispatch(self, request: Request, call_next):
+        request_id = str(uuid.uuid4())
+        request.state.request_id = request_id
+        response = await call_next(request)
+        response.headers["X-Request-ID"] = request_id
+        return response
+
+
+app.add_middleware(RequestIDMiddleware)
+
 
 @app.exception_handler(StarletteHTTPException)
 async def http_exception_handler(request: Request, exc: StarletteHTTPException):

From dad800ac15719f39ece66b6ac259da2ea3401d6f Mon Sep 17 00:00:00 2001
From: Lochit Vinay <lochitvinay111@gmail.com>
Date: Wed, 1 Apr 2026 18:17:29 +0530
Subject: [PATCH 06/10] feat: add request ID middleware and structured
 exception handling

---
 api/main.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/api/main.py b/api/main.py
index 7111bf7..62ba6a6 100644
--- a/api/main.py
+++ b/api/main.py
@@ -34,21 +34,33 @@ async def http_exception_handler(request: Request, exc: StarletteHTTPException):
         },
     )
 
-
 @app.exception_handler(RequestValidationError)
 async def validation_exception_handler(request: Request, exc: RequestValidationError):
+    formatted_errors = []
+
+    for err in exc.errors():
+        loc = err.get("loc", [])
+        field = loc[-1] if loc else "unknown"  
+        issue = err.get("msg", "Invalid value")
+        expected = err.get("type", "")
+
+        formatted_errors.append({
+            "field": field,
+            "issue": issue,
+            "expected": expected
+        })
+
     return JSONResponse(
         status_code=422,
         content={
             "error": {
                 "type": "ValidationError",
                 "message": "Invalid request data",
-                "details": exc.errors(),
+                "details": formatted_errors,
             }
         },
     )
 
-
 @app.exception_handler(Exception)
 async def general_exception_handler(request: Request, exc: Exception):
     return JSONResponse(

From 9bdb2b0e77a6a7e286719d440a42bd25320a9a52 Mon Sep 17 00:00:00 2001
From: Lochit Vinay <lochitvinay111@gmail.com>
Date: Sat, 4 Apr 2026 00:43:11 +0530
Subject: [PATCH 07/10] Add prompt engineering layer for improved LLM
 extraction

---
 api/services/prompt_builder.py | 40 ++++++++++++++++++++++++++++++++++
 src/llm.py                     | 16 +++++++++++---
 2 files changed, 53 insertions(+), 3 deletions(-)
 create mode 100644 api/services/prompt_builder.py

diff --git a/api/services/prompt_builder.py b/api/services/prompt_builder.py
new file mode 100644
index 0000000..faa265f
--- /dev/null
+++ b/api/services/prompt_builder.py
@@ -0,0 +1,40 @@
+def build_extraction_prompt(input_text: str) -> str:
+    return f"""
+You are an AI system that extracts structured information from incident reports.
+
+Extract the following fields:
+- name
+- location
+- date (YYYY-MM-DD if possible)
+- incident_type
+- description
+
+Return ONLY valid JSON. Do not include any extra text.
+
+Format:
+{{
+  "name": "",
+  "location": "",
+  "date": "",
+  "incident_type": "",
+  "description": ""
+}}
+
+Example:
+
+Input:
+Fire reported near Central Park on Jan 5 involving a vehicle.
+
+Output:
+{{
+  "name": "",
+  "location": "Central Park",
+  "date": "2024-01-05",
+  "incident_type": "fire",
+  "description": "Fire involving a vehicle"
+}}
+
+Now extract from:
+
+{input_text}
+"""
\ No newline at end of file
diff --git a/src/llm.py b/src/llm.py
index 70937f9..8f8a848 100644
--- a/src/llm.py
+++ b/src/llm.py
@@ -1,7 +1,7 @@
 import json
 import os
 import requests
-
+from api.services.prompt_builder import build_extraction_prompt
 
 class LLM:
     def __init__(self, transcript_text=None, target_fields=None, json=None):
@@ -47,16 +47,26 @@ def build_prompt(self, current_field):
     def main_loop(self):
         # self.type_check_all()
         for field in self._target_fields.keys():
-            prompt = self.build_prompt(field)
             # print(prompt)
             # ollama_url = "http://localhost:11434/api/generate"
             ollama_host = os.getenv("OLLAMA_HOST", "http://localhost:11434").rstrip("/")
             ollama_url = f"{ollama_host}/api/generate"
 
+            base_prompt = build_extraction_prompt(self._transcript_text)
+
+            prompt = f"""
+            {base_prompt}
+
+            Focus specifically on extracting the value for this field:
+            {field}
+
+            Return only the extracted value as a plain string. Do not return JSON.
+            """
+            print("\n[DEBUG] Generated Prompt:\n", prompt)
             payload = {
                 "model": "mistral",
                 "prompt": prompt,
-                "stream": False,  # don't really know why --> look into this later.
+                "stream": False,  # streaming disabled; using single response mode
             }
 
             try:

From e763a723c3e71b06acc09c03cfc95fc9113378cc Mon Sep 17 00:00:00 2001
From: Lochit Vinay <lochitvinay111@gmail.com>
Date: Sat, 4 Apr 2026 01:10:22 +0530
Subject: [PATCH 08/10] llm-extraction-final

---
 src/llm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llm.py b/src/llm.py
index 8f8a848..66004e0 100644
--- a/src/llm.py
+++ b/src/llm.py
@@ -62,7 +62,7 @@ def main_loop(self):
 
             Return only the extracted value as a plain string. Do not return JSON.
             """
-            print("\n[DEBUG] Generated Prompt:\n", prompt)
+
             payload = {
                 "model": "mistral",
                 "prompt": prompt,

From 68d69cc0b5666dadfd3a51b09e9fa752ccbbfb00 Mon Sep 17 00:00:00 2001
From: Lochit Vinay <lochitvinay111@gmail.com>
Date: Sat, 11 Apr 2026 01:38:39 +0530
Subject: [PATCH 09/10] enhance: improve prompt consistency and reduce
 hallucinated LLM outputs

---
 api/services/prompt_builder.py | 28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/api/services/prompt_builder.py b/api/services/prompt_builder.py
index faa265f..843c7cd 100644
--- a/api/services/prompt_builder.py
+++ b/api/services/prompt_builder.py
@@ -1,6 +1,13 @@
 def build_extraction_prompt(input_text: str) -> str:
     return f"""
 You are an AI system that extracts structured information from incident reports.
+Your task is to extract ONLY information explicitly present in the input text.
+
+STRICT RULES:
+- Do NOT infer or guess missing information
+- If a field is not clearly mentioned, return an empty string ""
+- Do NOT add any extra fields beyond those specified
+- Do NOT modify or reinterpret values 
 
 Extract the following fields:
 - name
@@ -9,8 +16,8 @@ def build_extraction_prompt(input_text: str) -> str:
 - incident_type
 - description
 
-Return ONLY valid JSON. Do not include any extra text.
-
+Return ONLY valid JSON. Do not include any extra text, explanation, or formatting outside JSON.
+The output MUST be a valid JSON object and parsable by json.loads().
 Format:
 {{
   "name": "",
@@ -34,7 +41,22 @@ def build_extraction_prompt(input_text: str) -> str:
   "description": "Fire involving a vehicle"
 }}
 
-Now extract from:
+Negative Example (DO NOT DO THIS):
+
+Incorrect Output:
+(This output is incorrect because it includes inferred/assumed values)
+{{
+  "location": "Central Park (assumed)",
+  "date": "2024-01-05"
+}}
+
+Correct Output:
+{{
+  "location": "Central Park",
+  "date": ""
+}}
+
+Now extract strictly from the following input (follow all rules above):
 
 {input_text}
 """
\ No newline at end of file

From ceea4de2e75c2217ddb7020f3477b621dc269cac Mon Sep 17 00:00:00 2001
From: Lochit Vinay <lochitvinay111@gmail.com>
Date: Wed, 15 Apr 2026 23:22:17 +0530
Subject: [PATCH 10/10] feat: add fallback handling for unreliable LLM outputs

---
 src/llm.py | 60 +++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 39 insertions(+), 21 deletions(-)

diff --git a/src/llm.py b/src/llm.py
index 66004e0..025dede 100644
--- a/src/llm.py
+++ b/src/llm.py
@@ -3,6 +3,32 @@
 import requests
 from api.services.prompt_builder import build_extraction_prompt
 
+def safe_extract_value(response: str):
+    if not response:
+        return None
+
+    response = response.strip()
+
+    
+    response = response.replace('"', '').replace("'", "")
+
+    
+    if ":" in response:
+        response = response.split(":")[-1].strip()
+
+    
+    response = response.split("\n")[0]
+
+    
+    if response.lower() in ["-1", "none", "null", "not found"]:
+        return None
+
+
+    if len(response) > 200:
+        return None
+
+    return response
+
 class LLM:
     def __init__(self, transcript_text=None, target_fields=None, json=None):
         if json is None:
@@ -82,7 +108,8 @@ def main_loop(self):
 
             # parse response
             json_data = response.json()
-            parsed_response = json_data["response"]
+            raw_response = json_data["response"]
+            parsed_response = safe_extract_value(raw_response)
             # print(parsed_response)
             self.add_response_to_json(field, parsed_response)
 
@@ -94,17 +121,18 @@ def main_loop(self):
         return self
 
     def add_response_to_json(self, field, value):
-        """
-        this method adds the following value under the specified field,
-        or under a new field if the field doesn't exist, to the json dict
-        """
-        value = value.strip().replace('"', "")
+        value = value.strip().replace('"', "") if value else None
         parsed_value = None
 
-        if value != "-1":
+        if value:
             parsed_value = value
+        else:
+            parsed_value = {
+                "value": None,
+                "requires_review": True
+            }
 
-        if ";" in value:
+        if value and ";" in value:
             parsed_value = self.handle_plural_values(value)
 
         if field in self._json.keys():
@@ -114,30 +142,20 @@ def add_response_to_json(self, field, value):
 
         return
 
+
     def handle_plural_values(self, plural_value):
         """
-        This method handles plural values.
-        Takes in strings of the form 'value1; value2; value3; ...; valueN'
-        returns a list with the respective values -> [value1, value2, value3, ..., valueN]
+         This method handles plural values.
         """
         if ";" not in plural_value:
             raise ValueError(
                 f"Value is not plural, doesn't have ; separator, Value: {plural_value}"
             )
 
-        print(
-            f"\t[LOG]: Formating plural values for JSON, [For input {plural_value}]..."
-        )
         values = plural_value.split(";")
 
-        # Remove trailing leading whitespace
         for i in range(len(values)):
-            current = i + 1
-            if current < len(values):
-                clean_value = values[current].lstrip()
-                values[current] = clean_value
-
-        print(f"\t[LOG]: Resulting formatted list of values: {values}")
+            values[i] = values[i].strip()
 
         return values