From 923c119f6f0f92fdf8356429fa021fb368cf8983 Mon Sep 17 00:00:00 2001 From: Lochit Vinay Date: Mon, 30 Mar 2026 00:58:50 +0530 Subject: [PATCH 01/10] Optimize fill_form by removing redundant get_template calls --- api/routes/forms.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/api/routes/forms.py b/api/routes/forms.py index f3430ed..b318520 100644 --- a/api/routes/forms.py +++ b/api/routes/forms.py @@ -11,13 +11,17 @@ @router.post("/fill", response_model=FormFillResponse) def fill_form(form: FormFill, db: Session = Depends(get_db)): - if not get_template(db, form.template_id): - raise AppError("Template not found", status_code=404) - fetched_template = get_template(db, form.template_id) + if not fetched_template: + raise AppError("Template not found", status_code=404) + controller = Controller() - path = controller.fill_form(user_input=form.input_text, fields=fetched_template.fields, pdf_form_path=fetched_template.pdf_path) + path = controller.fill_form( + user_input=form.input_text, + fields=fetched_template.fields, + pdf_form_path=fetched_template.pdf_path + ) submission = FormSubmission(**form.model_dump(), output_pdf_path=path) return create_form(db, submission) From 12697bfdf2231bcbe8745b6cbbb45a189db60a5f Mon Sep 17 00:00:00 2001 From: Lochit Vinay Date: Mon, 30 Mar 2026 01:17:27 +0530 Subject: [PATCH 02/10] validate-input-text --- api/routes/forms.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/api/routes/forms.py b/api/routes/forms.py index b318520..30ce798 100644 --- a/api/routes/forms.py +++ b/api/routes/forms.py @@ -11,18 +11,17 @@ @router.post("/fill", response_model=FormFillResponse) def fill_form(form: FormFill, db: Session = Depends(get_db)): + if not form.input_text.strip(): + raise AppError("Input text cannot be empty", status_code=400) fetched_template = get_template(db, form.template_id) - if not fetched_template: raise AppError("Template not found", status_code=404) - controller = Controller() path = controller.fill_form( user_input=form.input_text, fields=fetched_template.fields, pdf_form_path=fetched_template.pdf_path ) - submission = FormSubmission(**form.model_dump(), output_pdf_path=path) return create_form(db, submission) From 2f479ca6329851486348c39cf3aa4aba131bdbd8 Mon Sep 17 00:00:00 2001 From: Lochit Vinay Date: Mon, 30 Mar 2026 02:31:11 +0530 Subject: [PATCH 03/10] refactor: move input_text validation to schema level --- api/routes/forms.py | 2 -- api/schemas/forms.py | 8 +++++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/api/routes/forms.py b/api/routes/forms.py index 30ce798..5e18013 100644 --- a/api/routes/forms.py +++ b/api/routes/forms.py @@ -11,8 +11,6 @@ @router.post("/fill", response_model=FormFillResponse) def fill_form(form: FormFill, db: Session = Depends(get_db)): - if not form.input_text.strip(): - raise AppError("Input text cannot be empty", status_code=400) fetched_template = get_template(db, form.template_id) if not fetched_template: raise AppError("Template not found", status_code=404) diff --git a/api/schemas/forms.py b/api/schemas/forms.py index 3cce650..bf6957e 100644 --- a/api/schemas/forms.py +++ b/api/schemas/forms.py @@ -1,9 +1,15 @@ -from pydantic import BaseModel +from pydantic import BaseModel, field_validator class FormFill(BaseModel): template_id: int input_text: str + @field_validator("input_text") + def validate_input_text(cls, value): + if not value or not value.strip(): + raise ValueError("Input text cannot be empty") + return value + class FormFillResponse(BaseModel): id: int From 1138766bf3ae89591eeb2ab6981f4483f58d68bb Mon Sep 17 00:00:00 2001 From: Lochit Vinay Date: Tue, 31 Mar 2026 03:59:38 +0530 Subject: [PATCH 04/10] Add centralized error handling using FastAPI exception handlers --- api/main.py | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/api/main.py b/api/main.py index d0b8c79..cec8000 100644 --- a/api/main.py +++ b/api/main.py @@ -1,7 +1,56 @@ from fastapi import FastAPI +from api.routes import templates, forms +from fastapi import Request +from fastapi.responses import JSONResponse +from fastapi.exceptions import RequestValidationError +from starlette.exceptions import HTTPException as StarletteHTTPException + from api.routes import templates, forms app = FastAPI() + +@app.exception_handler(StarletteHTTPException) +async def http_exception_handler(request: Request, exc: StarletteHTTPException): + return JSONResponse( + status_code=exc.status_code, + content={ + "error": { + "type": "HTTPException", + "message": exc.detail, + "details": {} + } + }, + ) + + +@app.exception_handler(RequestValidationError) +async def validation_exception_handler(request: Request, exc: RequestValidationError): + return JSONResponse( + status_code=422, + content={ + "error": { + "type": "ValidationError", + "message": "Invalid request data", + "details": exc.errors(), + } + }, + ) + + +@app.exception_handler(Exception) +async def general_exception_handler(request: Request, exc: Exception): + return JSONResponse( + status_code=500, + content={ + "error": { + "type": "InternalServerError", + "message": str(exc), + "details": {} + } + }, + ) + + app.include_router(templates.router) app.include_router(forms.router) \ No newline at end of file From c4d59ac242aca9af372765d5c72f931ae69ac449 Mon Sep 17 00:00:00 2001 From: Lochit Vinay Date: Tue, 31 Mar 2026 04:13:00 +0530 Subject: [PATCH 05/10] add-request-id --- api/main.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/api/main.py b/api/main.py index cec8000..7111bf7 100644 --- a/api/main.py +++ b/api/main.py @@ -1,3 +1,5 @@ +import uuid +from starlette.middleware.base import BaseHTTPMiddleware from fastapi import FastAPI from api.routes import templates, forms from fastapi import Request @@ -5,10 +7,19 @@ from fastapi.exceptions import RequestValidationError from starlette.exceptions import HTTPException as StarletteHTTPException -from api.routes import templates, forms - app = FastAPI() +class RequestIDMiddleware(BaseHTTPMiddleware): + async def dispatch(self, request: Request, call_next): + request_id = str(uuid.uuid4()) + request.state.request_id = request_id + response = await call_next(request) + response.headers["X-Request-ID"] = request_id + return response + + +app.add_middleware(RequestIDMiddleware) + @app.exception_handler(StarletteHTTPException) async def http_exception_handler(request: Request, exc: StarletteHTTPException): From dad800ac15719f39ece66b6ac259da2ea3401d6f Mon Sep 17 00:00:00 2001 From: Lochit Vinay Date: Wed, 1 Apr 2026 18:17:29 +0530 Subject: [PATCH 06/10] feat: add request ID middleware and structured exception handling --- api/main.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/api/main.py b/api/main.py index 7111bf7..62ba6a6 100644 --- a/api/main.py +++ b/api/main.py @@ -34,21 +34,33 @@ async def http_exception_handler(request: Request, exc: StarletteHTTPException): }, ) - @app.exception_handler(RequestValidationError) async def validation_exception_handler(request: Request, exc: RequestValidationError): + formatted_errors = [] + + for err in exc.errors(): + loc = err.get("loc", []) + field = loc[-1] if loc else "unknown" + issue = err.get("msg", "Invalid value") + expected = err.get("type", "") + + formatted_errors.append({ + "field": field, + "issue": issue, + "expected": expected + }) + return JSONResponse( status_code=422, content={ "error": { "type": "ValidationError", "message": "Invalid request data", - "details": exc.errors(), + "details": formatted_errors, } }, ) - @app.exception_handler(Exception) async def general_exception_handler(request: Request, exc: Exception): return JSONResponse( From 9bdb2b0e77a6a7e286719d440a42bd25320a9a52 Mon Sep 17 00:00:00 2001 From: Lochit Vinay Date: Sat, 4 Apr 2026 00:43:11 +0530 Subject: [PATCH 07/10] Add prompt engineering layer for improved LLM extraction --- api/services/prompt_builder.py | 40 ++++++++++++++++++++++++++++++++++ src/llm.py | 16 +++++++++++--- 2 files changed, 53 insertions(+), 3 deletions(-) create mode 100644 api/services/prompt_builder.py diff --git a/api/services/prompt_builder.py b/api/services/prompt_builder.py new file mode 100644 index 0000000..faa265f --- /dev/null +++ b/api/services/prompt_builder.py @@ -0,0 +1,40 @@ +def build_extraction_prompt(input_text: str) -> str: + return f""" +You are an AI system that extracts structured information from incident reports. + +Extract the following fields: +- name +- location +- date (YYYY-MM-DD if possible) +- incident_type +- description + +Return ONLY valid JSON. Do not include any extra text. + +Format: +{{ + "name": "", + "location": "", + "date": "", + "incident_type": "", + "description": "" +}} + +Example: + +Input: +Fire reported near Central Park on Jan 5 involving a vehicle. + +Output: +{{ + "name": "", + "location": "Central Park", + "date": "2024-01-05", + "incident_type": "fire", + "description": "Fire involving a vehicle" +}} + +Now extract from: + +{input_text} +""" \ No newline at end of file diff --git a/src/llm.py b/src/llm.py index 70937f9..8f8a848 100644 --- a/src/llm.py +++ b/src/llm.py @@ -1,7 +1,7 @@ import json import os import requests - +from api.services.prompt_builder import build_extraction_prompt class LLM: def __init__(self, transcript_text=None, target_fields=None, json=None): @@ -47,16 +47,26 @@ def build_prompt(self, current_field): def main_loop(self): # self.type_check_all() for field in self._target_fields.keys(): - prompt = self.build_prompt(field) # print(prompt) # ollama_url = "http://localhost:11434/api/generate" ollama_host = os.getenv("OLLAMA_HOST", "http://localhost:11434").rstrip("/") ollama_url = f"{ollama_host}/api/generate" + base_prompt = build_extraction_prompt(self._transcript_text) + + prompt = f""" + {base_prompt} + + Focus specifically on extracting the value for this field: + {field} + + Return only the extracted value as a plain string. Do not return JSON. + """ + print("\n[DEBUG] Generated Prompt:\n", prompt) payload = { "model": "mistral", "prompt": prompt, - "stream": False, # don't really know why --> look into this later. + "stream": False, # streaming disabled; using single response mode } try: From e763a723c3e71b06acc09c03cfc95fc9113378cc Mon Sep 17 00:00:00 2001 From: Lochit Vinay Date: Sat, 4 Apr 2026 01:10:22 +0530 Subject: [PATCH 08/10] llm-extraction-final --- src/llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llm.py b/src/llm.py index 8f8a848..66004e0 100644 --- a/src/llm.py +++ b/src/llm.py @@ -62,7 +62,7 @@ def main_loop(self): Return only the extracted value as a plain string. Do not return JSON. """ - print("\n[DEBUG] Generated Prompt:\n", prompt) + payload = { "model": "mistral", "prompt": prompt, From 68d69cc0b5666dadfd3a51b09e9fa752ccbbfb00 Mon Sep 17 00:00:00 2001 From: Lochit Vinay Date: Sat, 11 Apr 2026 01:38:39 +0530 Subject: [PATCH 09/10] enhance: improve prompt consistency and reduce hallucinated LLM outputs --- api/services/prompt_builder.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/api/services/prompt_builder.py b/api/services/prompt_builder.py index faa265f..843c7cd 100644 --- a/api/services/prompt_builder.py +++ b/api/services/prompt_builder.py @@ -1,6 +1,13 @@ def build_extraction_prompt(input_text: str) -> str: return f""" You are an AI system that extracts structured information from incident reports. +Your task is to extract ONLY information explicitly present in the input text. + +STRICT RULES: +- Do NOT infer or guess missing information +- If a field is not clearly mentioned, return an empty string "" +- Do NOT add any extra fields beyond those specified +- Do NOT modify or reinterpret values Extract the following fields: - name @@ -9,8 +16,8 @@ def build_extraction_prompt(input_text: str) -> str: - incident_type - description -Return ONLY valid JSON. Do not include any extra text. - +Return ONLY valid JSON. Do not include any extra text, explanation, or formatting outside JSON. +The output MUST be a valid JSON object and parsable by json.loads(). Format: {{ "name": "", @@ -34,7 +41,22 @@ def build_extraction_prompt(input_text: str) -> str: "description": "Fire involving a vehicle" }} -Now extract from: +Negative Example (DO NOT DO THIS): + +Incorrect Output: +(This output is incorrect because it includes inferred/assumed values) +{{ + "location": "Central Park (assumed)", + "date": "2024-01-05" +}} + +Correct Output: +{{ + "location": "Central Park", + "date": "" +}} + +Now extract strictly from the following input (follow all rules above): {input_text} """ \ No newline at end of file From ceea4de2e75c2217ddb7020f3477b621dc269cac Mon Sep 17 00:00:00 2001 From: Lochit Vinay Date: Wed, 15 Apr 2026 23:22:17 +0530 Subject: [PATCH 10/10] feat: add fallback handling for unreliable LLM outputs --- src/llm.py | 60 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/src/llm.py b/src/llm.py index 66004e0..025dede 100644 --- a/src/llm.py +++ b/src/llm.py @@ -3,6 +3,32 @@ import requests from api.services.prompt_builder import build_extraction_prompt +def safe_extract_value(response: str): + if not response: + return None + + response = response.strip() + + + response = response.replace('"', '').replace("'", "") + + + if ":" in response: + response = response.split(":")[-1].strip() + + + response = response.split("\n")[0] + + + if response.lower() in ["-1", "none", "null", "not found"]: + return None + + + if len(response) > 200: + return None + + return response + class LLM: def __init__(self, transcript_text=None, target_fields=None, json=None): if json is None: @@ -82,7 +108,8 @@ def main_loop(self): # parse response json_data = response.json() - parsed_response = json_data["response"] + raw_response = json_data["response"] + parsed_response = safe_extract_value(raw_response) # print(parsed_response) self.add_response_to_json(field, parsed_response) @@ -94,17 +121,18 @@ def main_loop(self): return self def add_response_to_json(self, field, value): - """ - this method adds the following value under the specified field, - or under a new field if the field doesn't exist, to the json dict - """ - value = value.strip().replace('"', "") + value = value.strip().replace('"', "") if value else None parsed_value = None - if value != "-1": + if value: parsed_value = value + else: + parsed_value = { + "value": None, + "requires_review": True + } - if ";" in value: + if value and ";" in value: parsed_value = self.handle_plural_values(value) if field in self._json.keys(): @@ -114,30 +142,20 @@ def add_response_to_json(self, field, value): return + def handle_plural_values(self, plural_value): """ - This method handles plural values. - Takes in strings of the form 'value1; value2; value3; ...; valueN' - returns a list with the respective values -> [value1, value2, value3, ..., valueN] + This method handles plural values. """ if ";" not in plural_value: raise ValueError( f"Value is not plural, doesn't have ; separator, Value: {plural_value}" ) - print( - f"\t[LOG]: Formating plural values for JSON, [For input {plural_value}]..." - ) values = plural_value.split(";") - # Remove trailing leading whitespace for i in range(len(values)): - current = i + 1 - if current < len(values): - clean_value = values[current].lstrip() - values[current] = clean_value - - print(f"\t[LOG]: Resulting formatted list of values: {values}") + values[i] = values[i].strip() return values