In [1]:
import os
import re
import json

from langchain.prompts import PromptTemplate

import os
from dotenv import load_dotenv
from langchain_groq import ChatGroq

# Load environment variables from .env file
load_dotenv()

# Read GROQ_API_KEY from the environment
groq_api_key = os.getenv("GROQ_API_KEY")

# Initialize the ChatGroq LLM
llm = ChatGroq(model="Gemma2-9b-It", groq_api_key=groq_api_key)


In [None]:
# Define the JSON validation prompt
json_validation_prompt = PromptTemplate(
    input_variables=["payload"],
    template="""
You are a JSON Validation Agent responsible for validating structured JSON payloads received via webhook in a financial services context. Your job is to detect any schema violations or real-time business logic anomalies in the payload and classify their severity.

The types of documents you will receive are typically:
- Invoices
- Payslips
- Quotations

🎯 Your Task:
Validate Schema:
- Ensure required fields are present.
- Validate data types (e.g., string, number, date).
- Accept extra fields but flag them as minor anomalies.

Check Business Rules (Real-Time Anomalies):
- Dates should be realistic (no future issue_date, pay_period).
- due_date should not be before issue_date.
- net_pay should not exceed gross salary.
- Sum of line items should match the total_amount.
- Currency should be consistent across line items and summary.
- Detect duplicate invoice_id, quote_id, etc.

Classify Each Anomaly:
- critical: Requires immediate escalation (e.g., future-dated invoice, mismatched totals, duplicate ID).
- minor: Log-only, doesn't require immediate action (e.g., extra fields, date formatting, unused optional fields).

Response Format (on each payload):
Respond strictly in valid JSON with the following format:
{{
  "status": "valid" | "invalid",
  "anomalies": [
    {{
      "field": "due_date",
      "description": "Due date is before issue date.",
      "severity": "critical"
    }},
    {{
      "field": "note",
      "description": "Extra field not in schema.",
      "severity": "minor"
    }}
  ],
  "action": "log_only" | "trigger_alert"
}}

If no critical anomalies are found, set "status": "valid" and "action": "log_only".
If one or more critical anomalies are found, set "status": "invalid" and "action": "trigger_alert".

Here is the JSON payload to validate:
{payload}
"""
)


In [6]:

# Build the chain by chaining the prompt and the shared LLM
json_agent_chain = json_validation_prompt | llm

def extract_json_from_text(raw_text: str) -> dict:
    """
    Extracts the substring between the first '{' and the last '}' in the text.
    Attempts to parse it as JSON and return the object.
    If parsing fails, returns an empty dictionary.
    """
    start = raw_text.find('{')
    end = raw_text.rfind('}')
    
    if start != -1 and end != -1 and end > start:
        json_str = raw_text[start:end + 1]
        print(f"Extracted JSON String: {json_str}")  # Debugging output
        try:
            return json.loads(json_str)
        except json.JSONDecodeError as e:
            print(f"JSON decode error: {e}")
    
    return {}

def processJson(payload_text: str) -> dict:
    """
    Processes the JSON payload using the JSON agent chain.
    Returns a dictionary with the validation result.
    """
    # Invoke the chain with the JSON payload input
    llm_output = json_agent_chain.invoke({"payload": payload_text})
    
    # Ensure we have a string output and strip whitespace
    output_str = llm_output.content.strip()
    print(f"LLM Output: {llm_output}")  # Debugging output
    print(f"Output String: {output_str}")  # Debugging output
    
    try:
        extracted = extract_json_from_text(output_str)
        if not extracted:
            raise ValueError("No valid JSON found in LLM output.")
        return extracted
    except Exception as e:
        raise ValueError(f"Error extracting JSON validation result: {e}")

In [7]:



# For testing purposes, you can run this script directly.
if __name__ == "__main__":
    # Example JSON payload (as a string) to validate.
    test_payload = """
{
  "id": "fa4da2ff-dcda-4367-a97d-0c9445147b73",
  "items": [
    {
      "name": "Canvas Slip Ons",
      "code": "CVG-096732",
      "description": "Shoes",
      "quantity": "1",
      "amount": {
        "value": 1000
      },
      "totalAmount": {
        "value": 1000
      }
    }
  ],
  "requestReferenceNumber": "5fc10b93-bdbd-4f31-b31d-4575a3785009",
  "receiptNumber": "7fa0ff6fa5a6",
  "createdAt": "2021-07-13T15:25:45.000Z",
  "updatedAt": "2021-07-13T15:26:49.000Z",
  "paymentScheme": "master-card",
  "expressCheckout": true,
  "refundedAmount": "0",
  "canPayPal": false,
  "expiredAt": "2021-07-13T16:25:45.000Z",
  "status": "COMPLETED",
  "paymentStatus": "PAYMENT_SUCCESS",
  "paymentDetails": {
    "responses": {
      "efs": {
        "paymentTransactionReferenceNo": "0bea058b-ae8e-4bfc-90d0-7fa0ff6fa5a6",
        "status": "SUCCESS",
        "receipt": {
          "transactionId": "b8bb70e1-f44f-4db3-bcb9-a939d38455a4",
          "receiptNo": "7fa0ff6fa5a6",
          "approval_code": "00001234",
          "approvalCode": "00001234"
        },
        "payer": {
          "fundingInstrument": {
            "card": {
              "cardNumber": "542482******7140",
              "expiryMonth": 9,
              "expiryYear": "2023"
            }
          }
        },
        "amount": {
          "total": {
            "currency": "PHP",
            "value": 1000
          }
        },
        "created_at": "2021-07-13T15:26:49.514Z"
      }
    },
    "paymentAt": "2021-07-13T15:26:49.000Z",
    "3ds": false
  },
  "buyer": {
    "contact": {
      "phone": "+639086216587",
      "email": "juan.delacruz@paymaya.com"
    },
    "firstName": "Juan",
    "lastName": "Dela Cruz",
    "billingAddress": {
      "line1": "6F Launchpad",
      "line2": "Reliance Street",
      "city": "Mandaluyong City",
      "state": "Metro Manila",
      "zipCode": "1552",
      "countryCode": "PH"
    },
    "shippingAddress": {
      "line1": "6F Launchpad",
      "line2": "Reliance Street",
      "city": "Mandaluyong City",
      "state": "Metro Manila",
      "zipCode": "1552",
      "countryCode": "PH"
    }
  },
  "merchant": {
    "currency": "PHP",
    "email": "merchant@gmail.com",
    "locale": "en",
    "homepageUrl": "https://www.paymaya.com",
    "isEmailToMerchantEnabled": true,
    "isEmailToBuyerEnabled": true,
    "isPaymentFacilitator": false,
    "isPageCustomized": true,
    "supportedSchemes": [
      "Visa",
      "Mastercard",
      "JCB"
    ],
    "canPayPal": false,
    "payPalEmail": null,
    "payPalWebExperienceId": null,
    "expressCheckout": true,
    "name": "Omni Merchant via Mock Processor"
  },
  "totalAmount": {
    "value": "1000",
    "currency": "PHP",
    "details": {
      "discount": "100.00",
      "serviceCharge": "0.00",
      "shippingFee": "200.00",
      "tax": "120.00",
      "subtotal": "780.00"
    }
  },
  "redirectUrl": {
    "success": "https://www.merchantsite.com/success?id=5fc10b93-bdbd-4f31-b31d-4575a3785009",
    "failure": "https://www.mechantsite.com/failure?id=5fc10b93-bdbd-4f31-b31d-4575a3785009",
    "cancel": "https://www.merchantsite.com/cancel?id=5fc10b93-bdbd-4f31-b31d-4575a3785009"
  },
  "transactionReferenceNumber": "0bea058b-ae8e-4bfc-90d0-7fa0ff6fa5a6"
}
"""
    result = processJson(test_payload)
    print("JSON Validation Result:")
    print(result)

LLM Output: content='```json\n{\n  "status": "valid",\n  "anomalies": [],\n  "action": "log_only"\n}\n```\n\n**Explanation:**\n\nThe provided JSON payload adheres to the schema and business rules defined. \n\n* **Schema Validation:** All required fields are present, data types are consistent, and there are no unexpected fields.\n* **Business Rule Validation:**\n    * Dates are realistic (no future dates).\n    * Due date is not before the issue date.\n    * Net pay is not exceeding gross salary (not applicable in this case as it\'s a quote).\n    * Sum of line items aligns with the total amount.\n    * Currency is consistent.\n    * Duplicate IDs are not detected. \n\n\n\nLet me know if you\'d like to test with another payload!\n' additional_kwargs={} response_metadata={'token_usage': {'completion_tokens': 177, 'prompt_tokens': 1852, 'total_tokens': 2029, 'completion_time': 0.321818182, 'prompt_time': 0.09260311, 'queue_time': 0.298404532, 'total_time': 0.414421292}, 'model_name': 'Gem