In [None]:
from typing import List, Dict, Optional, Literal, Union

In [None]:
## Load configuration
from dotenv import dotenv_values
from src import utils

# Load config settings
DOT_ENV = dotenv_values("../.env")
config = utils.load_config("../config.yaml")

# Create a unique run-id folder to store outputs
config["FILE_LOCATIONS"]["MAIN_DATA_FOLDER"] = "../src/data"
output_directory = utils.make_output_directory(config["FILE_LOCATIONS"])

In [None]:
## Step 1: Preprocess the guide
from pathlib import Path
from src.components import incose
incose_df = incose.preprocess_incose_guide(
            input_path=Path(config['FILE_LOCATIONS']['INCOSE_GUIDE']),
            output_path=Path(output_directory),
            start_page=65,
            end_page=115,
            regex=config['INCOSE_GUIDE_SETTINGS']['SECTIONS_REGEX_PAT'],
            replace_tokens=config['INCOSE_GUIDE_SETTINGS']['REPLACE_TOKENS'],
            subpatterns=config['INCOSE_GUIDE_SETTINGS']['SUBPATTERNS'],
            replace_with=config['INCOSE_GUIDE_SETTINGS']['REPLACE_WITH']
        )
incose_df.head(5)

In [None]:
## Step 2: Group all rule context based on which attribute group (e.g., accuracy, concision)
from src.components import prompteval as pe
rule_group_config = pe.make_rule_group_config(pe, config['SECTION_4_RULE_GROUPS'])
incose_df['rule_group'] = incose_df['rule_number'].map(rule_group_config)
print(set(incose_df['rule_group'].values))

In [None]:
## View the accuracy rule group slice of the incose_df
incose_df_accuracy = incose_df.loc[incose_df['rule_group'] == 'Accuracy']
context_str = ''
for index, row in incose_df_accuracy.iterrows():
    context_str += f"""
    {row['rule_number']}: {row['rule_title']} 
    Definition: {row['definition']}
    Elaboration: {row['elaboration']}
    Examples: {row['examples']}
    """

print(context_str)

In [None]:
# context str for pre-warming prompt
context_str = """
Here’s a practical, lightweight way to review a requirement for accuracy, aligned with the INCOSE Guide’s Section 4 rules and quality characteristics.

Preparation (what to have in hand)
- Source: stakeholder need, parent requirement, contract/standard, hazard/mission need.
- Context: operational scenarios/modes, interface definitions, architecture or allocation, performance budgets, environmental ranges.
- Verification approach: intended method (Test/Analysis/Inspection/Demonstration) and acceptance criteria.

Three-pass review method

Pass 1 — Language and structure (quick screen)
- Subject and shall: Does the statement name the item (e.g., “The system”) and use “shall” for the requirement?
- One per requirement: Is there only one enforceable statement (no and/or, chained clauses)?
- Positive, active, clear: Prefer positive statements with explicit trigger/condition (“When/If…”).
- Measurable: Are there concrete, measurable criteria with units, tolerances, and time bounds?
- Unambiguous wording: No vague terms (robust, user-friendly, minimize, as necessary), no open-ended “etc.” or “including but not limited to,” no undefined acronyms.

Pass 2 — Technical correctness and completeness (accuracy)
- Trace and necessity: Is it traceable to a valid source/parent? Is it actually needed at this level?
- Right level of abstraction: States “what” for this level (no premature design), or, if it must constrain design, is the rationale clear and approved?
- Context and conditions: Are operating modes, environmental conditions, and preconditions explicit (temp, voltage, state, load, network conditions)?
- Values and units sanity check:
  - Threshold vs. objective correct? Direction correct (min/max/<=/>=)?
  - Units consistent with parent budgets and interfaces; conversions checked.
  - Significant figures and tolerances realistic and testable.
- Alignment with allocations/budgets: Do numbers roll down correctly from parent performance budgets and constraints? Any double-counting or gaps?
- Feasibility: Technically achievable with current technology and margins; compatible with schedule/cost/risk.
- Interfaces precise: External items named with version/ID; data types, ranges, and protocols defined or referenced precisely.
- Standards/references: Cited by exact identifier and revision; referenced clauses match the intended behavior.
- Exceptions/faults: Does it define behavior under relevant faults or edge cases, or is there a linked requirement that does?

Pass 3 — Verifiability and conflict checks
- Pass/fail interpretation: Can a tester decide unambiguously if it passes? Sketch a test or analysis in one or two sentences.
- Verification method fit: Chosen method (T/A/I/D) is appropriate and feasible; required facilities/instruments exist.
- Boundary tests: Identify boundary values and worst-case conditions; ensure requirement can be verified there.
- Consistency: No conflict with peer requirements (same set and interfacing items); terminology and units consistent.
- Redundancy: Not duplicating another requirement; if similar, ensure they don’t diverge or contradict.
- TBD/TBR control: Any placeholders have owners, due dates, and resolution criteria.

Quick checklist you can apply to any single requirement
1) Who is the subject? (“The [item] shall …”)
2) Under what condition/mode? (“When/If/While …”)
3) What outcome/result is required? (state the “what,” not the “how”)
4) How much/how well, with units/tolerances/time bounds?
5) Where did this come from? (trace to source/parent; is it necessary?)
6) Is it feasible with current budgets/architecture?
7) Are interfaces/standards referenced precisely (name, ID, version)?
8) Is the wording free of ambiguity and open-ended terms?
9) What is the verification method, and what would the pass/fail test look like?
10) Any conflicts or duplicates with other requirements?

Simple accuracy aids
- Paraphrase test: Have two reviewers independently restate the requirement in their own words; differences reveal ambiguity/errors.
- EARS reframe: Restate as “When <trigger>, the <item> shall <response> within <time/criteria>” to expose missing conditions or measures.
- Boundary walkthrough: Check extremes of environment/inputs for hidden constraints.
- Unit/number sanity check: Recalculate conversions; compare to known performance and parent allocations.
- Adversarial read: Ask, “Could this be satisfied in an unintended way?” If yes, tighten it.

Example (condensed)
Original: “The controller shall save data quickly during power loss.”
Review outcome:
- Add subject/measure/condition: “When input voltage drops below 10.0 V for more than 2 ms, the controller shall commit the current configuration to non-volatile memory within 100 ms without data corruption.”
- Check accuracy: 10.0 V/2 ms threshold matches power budget and hold-up time; 100 ms aligns with memory write time plus margin; “data corruption” defined elsewhere; verification by test with controlled brownout; no conflicts with shutdown sequence requirement.

Deliverables from the review
- Marked-up requirement (or rewritten version)
- Recorded findings: defects, questions, TBDs, conflicts
- Proposed verification method and sketch of acceptance
- Traceability updates (source link, parent allocation)
- Decision: accept, revise, split, or defer with TBR/TBD controls

If you share a few of your requirements, I can run this review method on them and show the before/after along with the specific issues found.

Use the following JSON response format in the prompt:

{
  "requirements_review": [
    {
      "requirement_id": "<ID>",
      "original": "<original requirement>",
      "checks": {
        "<rule_id>": {"status": "pass|fail", "<rule description>": ["<issues>"], "explanation": "<brief>"},
      },
      "proposed_rewrite": "<single improved requirement that resolves all detected issues>",
      "split_recommendation": {
        "needed": true|false,
        "because": "<why>",
        "split_into": ["<Req A>", "<Req B>"]
      },
    }
  ]
}

"""

In [None]:
## Step 3: Generate a prompt for each attribute group
from pydantic import BaseModel, SecretStr, Field
from openai import OpenAI
import src
from src.components.promptrunner import ResponseClient

openai_api_key = str(DOT_ENV['OPENAI_API_KEY'])
openai_api_key_secret = SecretStr(openai_api_key)
# Create OpenAI instance
client = OpenAI(
    # Replace with your actual API key or use: api_key=os.environ.get("OPENAI_API_KEY")
    api_key=openai_api_key_secret.get_secret_value()
)
resp_client = ResponseClient(client=client, model="gpt-5")
# Create a prompt using this formatted checklist
class PromptTemplate(BaseModel):
    system: str = Field(description="A prompt system message") 
    user: str = Field(description="A user message")

system_message = """
You're a world-leading expert in AI prompt engineering.
You will be given a description of a task and your job is to create a prompt template to automate that task using prompt engineering best practices.

Prompt engineering is the process of discovering prompts which reliably yield useful or desired results.
Prompt Engineering best practices include:
1. Give Direction – Describe the desired style or persona in detail, or reference a relevant persona.
2. Specify Format – Define what rules to follow, and the required structure of the response.
3. Provide Examples – Insert a diverse set of test cases where the task was done correctly.

Your prompt template must take context from the user in the form of relevant input variables surrounded by curly brackets i.e. {{input_variable}}. These placeholders should be labelled in the template as they will be replaced with values when the prompt is used. Your prompt should provide multiple examples of different values the input variables might take, and what the expected responses would be in these test cases.

Respond only with your prompt template, and nothing else. Be creative.

DO NOT USE EMOJIS
"""

user_message = f"""
Here is the task:
Generate reviews of requirements according to the provided Context.

Context
---
{context_str}

Rules
---
The prompt will be deemed successful if it matches the following criteria:
- Is the submission helpful, insightful, and appropriate?
- Are prompt engineering best practices being used?
- Relevant input variables are included in the prompt template

PROMPT TEMPLATE:
"""

print(f"System:\n {system_message}")

print(f"User:\n {user_message}")

In [None]:
## Run prompt
messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": user_message}
]
structured_response = resp_client.get_structured_response(
    messages=messages,
    response_format=PromptTemplate,
)
resp_client.check_structured_output(structured_response)

In [11]:
print(structured_response.parsed.system)


You are an INCOSE-trained senior systems engineer and requirements quality reviewer. Your mission is to review each provided requirement for accuracy and quality using a disciplined three-pass method and produce only the requested JSON output. Be precise, neutral, and actionable. Do not include extra commentary outside the JSON. If information is missing, flag it explicitly and proceed conservatively. Prefer EARS-style phrasing when proposing rewrites. Ensure all units, tolerances, and time bounds are explicit and testable. Obey the provided context and verification constraints. If a requirement contains multiple enforceable statements, recommend a split and propose the decomposed requirements.


In [12]:
print(structured_response.parsed.user)


Task
Review each requirement using the provided review context and produce a structured JSON evaluation with checks, a concise improved rewrite, and a split recommendation when applicable.

Follow this method (aligned to the supplied review context):
- Pass 1 — Language and structure
- Pass 2 — Technical correctness and completeness (accuracy)
- Pass 3 — Verifiability and conflict checks

Rule IDs and descriptions to use for checks
- L1: Subject and shall — Names the item and uses “shall”.
- L2: One per requirement — Only one enforceable statement; avoids and/or, chained clauses.
- L3: Positive, active, clear — Prefer positive active voice with explicit trigger/condition.
- L4: Measurable — Concrete, measurable criteria with units, tolerances, time bounds.
- L5: Unambiguous wording — No vague terms or open-ended phrases; acronyms defined.
- T1: Trace and necessity — Traceable to valid source/parent; needed at this level.
- T2: Right level of abstraction — States the “what”; design cons

In [None]:
## Generated system message
print(structured_response.parsed.system)
with open(f"{output_directory}/generated_system_message_accuracy.txt", 'w', encoding="utf-8") as f:
    f.write(structured_response.parsed.system)

In [None]:
## Generated user message
print(structured_response.parsed.user)
with open(f"{output_directory}/generated_user_message_accuracy.txt", 'w', encoding="utf-8") as f:
    f.write(structured_response.parsed.user)

In [None]:
system_="""
You are a senior systems engineering requirements quality analyst and technical editor. Apply the provided rules context rigorously, focusing on R2 Active Voice, R3 Appropriate Subject-Verb, R5 Definite Articles, R6 Common Units of Measure, R7 Vague Terms, R8 Escape Clauses, and R9 Open-Ended Clauses. Deliver precise, actionable critiques and high-quality rewrites that preserve the original intent. Do not introduce design beyond what is necessary to correct clarity or verifiability. If information is missing to comply with the rules, flag the ambiguity and propose placeholders rather than inventing facts. Be concise, direct, and consistent with the project’s glossary, entity scope, and measurement system.' user='Task\n- Review and improve the supplied requirements against the provided rules context.\n- Apply only the rules in {{rules_context}} unless explicitly told otherwise.\n- Maintain original intent; if intent is unclear, note ambiguity and propose a conservative rewrite with placeholders.\n- Align subjects to the declared entity scope and glossary.\n- Use the chosen measurement system consistently.\n- Write in {{output_language}}.\n\nInputs\n- rules_context: {{rules_context}}\n- entity_scope: {{entity_scope}}  (e.g., System level "The <SOI> shall ..."; Subsystem level "The <subsystem> shall ..."; Business management "The <business> shall ...")\n- glossary: {{glossary}}  (key defined entities and terms, including intended "User")\n- measurement_system: {{measurement_system}}  (e.g., Metric (SI) with degree Celsius, km, s; or US customary)\n- project_patterns: {{project_patterns}}  (optional catalog of agreed patterns)\n- requirements: {{requirements}}  (list of requirement statements to review)\n- review_depth: {{review_depth}}  (brief | standard | deep)\n- id_prefix: {{id_prefix}}  (optional, for numbering e.g., RQ)\n- tone: {{tone}}  (e.g., crisp, coaching)\n- output_language: {{output_language}}  (default English)\n\nOutput format\nProvide exactly the following structure:\n\n1) Review Summary\n- Scope: <echo entity_scope>\n- Rules Applied: <list rule IDs from rules_context>\n- Counts: <#requirements>, <#pass>, <#needs revision>\n- Top Issues Observed: <bulleted list>\n- Global Recommendations: <bulleted list>\n\n2) Per-Requirement Reviews\nFor each requirement, in input order, produce:\n- ID: <{{id_prefix}}-NN>\n- Original: <verbatim requirement>\n- Rule Checks\n  - R2 Active Voice: <Pass|Fail> – <reason; identify subject and voice; show passive if present>\n  - R3 Appropriate Subject-Verb: <Pass|Fail> – <is subject appropriate to entity_scope? verb appropriate to subject?>\n  - R5 Definite Articles: <Pass|Fail> – <indefinite articles detected/replacements>\n  - R6 Units: <Pass|Fail> – <units presence/consistency with measurement_system; property-element pairs>\n  - R7 Vague Terms: <Pass|Fail> – <list terms found or None>\n  - R8 Escape Clauses: <Pass|Fail> – <list phrases found or None>\n  - R9 Open-Ended Clauses: <Pass|Fail> – <list phrases found or None>\n- Overall Verdict: <Pass|Needs Revision>\n- Improved Rewrite(s)\n  - Minimal Change: <single minimally edited sentence>\n  - Best-Practice Rewrite: <fully aligned with entity_scope, active voice, definite terms, precise units>\n- Notes and Assumptions: <ambiguities, placeholders used, rationale for choices>\n- Confidence: <High|Medium|Low>\n\n3) Red-Flag Index\n- Vague Terms Found: <unique list>\n- Escape Clauses Found: <unique list>\n- Open-Ended Clauses Found: <unique list>\n- Units At Risk: <elements needing units or unit harmonization>\n\nConstraints and Guidance\n- R2: Use active voice. Identify the responsible entity as the grammatical subject; avoid "shall be <past participle>". Prefer "The <entity> shall <verb> <object>".\n- R3: Ensure the subject matches the declared entity scope. Avoid subjects like "The User" for system requirements; use the entity from glossary (e.g., The <SOI>). Ensure verbs are concrete and verifiable.\n- R5: Prefer the definite article "the" for defined entities. Replace "a/an" with "the" when referring to defined entities or roles per glossary.\n- R6: State explicit units consistent with measurement_system. Do not mix systems. Keep consistent property-element unit pairs. Preserve precision when implying conversions; prefer placeholders if uncertain.\n- R7: Remove vague qualifiers and adverbs. Replace with measurable, testable criteria.\n- R8: Remove escape clauses like "where possible"; make the requirement unconditional or specify exact conditions.\n- R9: Remove open-ended phrases like "including but not limited to", "etc."; enumerate cases as separate requirements if needed.\n- Do not alter domain meaning. If you must choose values to eliminate vagueness and none are provided, insert placeholders like <value>, <range>, <standard id> and flag in Notes.\n\nProcess\n1) Parse {{requirements}} into individual statements; preserve order.\n2) For each statement, test against each rule in {{rules_context}}.\n3) Propose two improved rewrites (Minimal Change, Best-Practice) that comply with all applicable rules and align with {{entity_scope}}, {{glossary}}, and {{measurement_system}}.\n4) Summarize systemic issues and recommend global fixes.\n\nExamples\nExample A – System level\n- Example Inputs\n  - rules_context: R2, R3, R5, R6, R7, R8, R9\n  - entity_scope: System – The Autopilot_System shall ...\n  - glossary: User = certified pilot; Autopilot_System = SOI; Aircraft_Altitude, Cruise_Mode defined\n  - measurement_system: Metric (SI); seconds (s), minutes (min), kilometres (km), degree Celsius (°C)\n  - requirements:\n    1. The User shall be notified of altitude deviations.\n    2. While in Cruise_Mode, altitude shall be recorded every 5 min.\n    3. The Autopilot_System shall display engine data, etc.\n- Expected Output (abbreviated)\n  1) Review Summary\n  - Scope: System – The Autopilot_System shall ...\n  - Rules Applied: R2, R3, R5, R6, R7, R8, R9\n  - Counts: 3 total, 0 pass, 3 need revision\n  - Top Issues Observed:\n    - Passive voice and incorrect subject (R2, R3)\n    - Missing definite articles and units precision (R5, R6)\n    - Open-ended clause "etc." (R9)\n  - Global Recommendations:\n    - Use "The Autopilot_System" as subject for system requirements\n    - Replace "etc." with explicit items or separate requirements\n  2) Per-Requirement Reviews\n  - ID: RQ-01\n    Original: The User shall be notified of altitude deviations.\n    Rule Checks\n    - R2 Active Voice: Fail – Passive construction "shall be notified"; subject is "The User" receiving action\n    - R3 Appropriate Subject-Verb: Fail – Subject should be The Autopilot_System; verb should describe system action (notify)\n    - R5 Definite Articles: Pass – Uses "The"; but "altitude deviations" lacks defined object naming\n    - R6 Units: Fail – No thresholds/units for deviation magnitude and time to notify\n    - R7 Vague Terms: Fail – "deviations" undefined magnitude\n    - R8 Escape Clauses: Pass – None\n    - R9 Open-Ended Clauses: Pass – None\n    Overall Verdict: Needs Revision\n    Improved Rewrite(s)\n    - Minimal Change: The Autopilot_System shall notify the Pilot of Aircraft_Altitude deviations greater than <Δaltitude_threshold> metres within <t_notify> seconds of detection.\n    - Best-Practice Rewrite: The Autopilot_System shall notify the Pilot within <t_notify> s when the Aircraft_Altitude deviates by more than <Δaltitude_threshold> m from the Selected_Altitude while in the Cruise_Mode.\n    Notes and Assumptions: Thresholds and time are placeholders pending stakeholder agreement.\n    Confidence: Medium\n  - ID: RQ-02\n    Original: While in Cruise_Mode, altitude shall be recorded every 5 min.\n    Rule Checks\n    - R2: Fail – Missing responsible subject; passive implied\n    - R3: Fail – Subject should be The Autopilot_System; verb should be "record"\n    - R5: Fail – "altitude" should reference defined object (Aircraft_Altitude) with definite article\n    - R6: Pass – Time unit provided (min) consistent with SI-accepted units\n    - R7: Pass – No vague terms\n    - R8: Pass – None\n    - R9: Pass – None\n    Overall Verdict: Needs Revision\n    Improved Rewrite(s)\n    - Minimal Change: While in the Cruise_Mode, the Autopilot_System shall record the Aircraft_Altitude every 5 min.\n    - Best-Practice Rewrite: While in the Cruise_Mode, the Autopilot_System shall record the Aircraft_Altitude at a period of 5 min with a measurement precision of <precision> m.\n    Notes and Assumptions: Added measurement precision placeholder to support verification.\n    Confidence: High\n  - ID: RQ-03\n    Original: The Autopilot_System shall display engine data, etc.\n    Rule Checks\n    - R2: Pass – Active voice with correct subject\n    - R3: Pass – Subject appropriate; verb concrete (display)\n    - R5: Fail – "engine data" not specific/defined; no definite references\n    - R6: Fail – No units for displayed quantities\n    - R7: Fail – "data" unspecified (vague)\n    - R8: Pass – None\n    - R9: Fail – Contains "etc."\n    Overall Verdict: Needs Revision\n    Improved Rewrite(s)\n    - Minimal Change: The Autopilot_System shall display the Engine_RPM, Engine_Torque, and Engine_Oil_Temperature.\n    - Best-Practice Rewrite: The Autopilot_System shall display the Engine_RPM (rev/min), Engine_Torque (N·m), and Engine_Oil_Temperature (°C) in accordance with <Display_Standard_ID>.\n    Notes and Assumptions: Removed open-ended clause; enumerated key parameters. Add standard ID when known.\n    Confidence: High\n  3) Red-Flag Index\n  - Vague Terms Found: data, deviations\n  - Escape Clauses Found: None\n  - Open-Ended Clauses Found: etc.\n  - Units At Risk: deviation magnitude (m), notification time (s), display units for engine parameters\n\nExample B – Subsystem level\n- Example Inputs\n  - entity_scope: Subsystem – The Engine shall ...\n  - glossary: Engine = Turboprop_Engine; Shaft_Power defined\n  - measurement_system: Metric (SI)\n  - requirements:\n    1. An engine shall provide sufficient power.\n    2. The Engine shall operate at temperatures less than 800 degrees.\n- Expected Output (abbreviated)\n  - RQ-01\n    R2: Pass – Active; subject present\n    R3: Fail – Indefinite subject ("An engine") for subsystem set; use "The Engine"\n    R5: Fail – Indefinite article; vague noun phrase\n    R6: Fail – No units for power\n    R7: Fail – "sufficient" is vague\n    R8: Pass – None\n    R9: Pass – None\n    Overall: Needs Revision\n    Minimal Change: The Engine shall provide a Shaft_Power of at least <P_min> kW at <operating_condition>.\n    Best-Practice: The Engine shall provide Shaft_Power ≥ <P_min> kW at Sea_Level ISA conditions and ≤ <altitude> m, <temperature> °C.\n    Notes: Insert placeholders for testable thresholds and conditions.\n  - RQ-02\n    R2: Pass; R3: Pass\n    R5: Pass – Uses "The Engine"\n    R6: Fail – "degrees" missing scale\n    R7: Pass\n    R8: Pass\n    R9: Pass\n    Overall: Needs Revision\n    Minimal Change: The Engine shall operate at temperatures less than or equal to 800 °C.\n    Best-Practice: The Engine shall maintain Turbine_Inlet_Temperature ≤ 800 °C for a duration of <t> s at <power_setting>.\n\nExample C – Business management level\n- Example Inputs\n  - entity_scope: Business – The ACME_Company shall ...\n  - glossary: ACME_Company defined; LTIFR defined\n  - measurement_system: Metric (SI)\n  - requirements:\n    1. ACME shall be safe.\n    2. The ACME_Company shall, where possible, reduce incidents.\n- Expected Output (abbreviated)\n  - RQ-01\n    R2: Fail – Passive "shall be"; vague predicate\n    R3: Pass – Subject aligns to business; verb inappropriate\n    R5: Fail – Uses "ACME" inconsistently; prefer defined entity name\n    R6: Pass – Not applicable, but measurable KPI missing\n    R7: Fail – "safe" is vague\n    R8: Pass\n    R9: Pass\n    Overall: Needs Revision\n    Minimal Change: The ACME_Company shall achieve a Lost_Time_Injury_Frequency_Rate of ≤ <target> per 200,000 work hours by <date>.\n    Best-Practice: The ACME_Company shall achieve LTIFR ≤ <target> per 200,000 work hours and Total_Recordable_Incident_Rate ≤ <target2> per 200,000 work hours by <date>.\n  - RQ-02\n    R2: Pass – Active\n    R3: Pass – Subject appropriate\n    R5: Pass\n    R6: Pass – Not applicable\n    R7: Pass – "reduce" measurable if quantified\n    R8: Fail – Contains "where possible"\n    R9: Pass\n    Overall: Needs Revision\n    Minimal Change: The ACME_Company shall reduce Safety_Incidents by at least <percentage>% year-over-year.\n    Best-Practice: The ACME_Company shall reduce Safety_Incidents by ≥ <percentage>% year-over-year from the <baseline_year> baseline, measured quarterly.\n\nNow perform the review using the provided Inputs and produce the Output format exactly as specified.
"""

In [None]:
## Generated system message
"""
You are a Senior Requirements Quality Analyst and technical editor. You specialize in detecting and fixing requirement defects using authoritative quality rules. Be rigorous, consistent, and concise. Maintain the author’s technical intent while removing ambiguity. Do not add new functionality. Ask targeted clarification questions when needed.

Authoritative rules to enforce (from the provided Context):
- R7 Vague Terms: identify and replace vague quantifiers, adjectives, and adverbs with measurable, testable, and verifiable criteria.
- R9 Open-Ended Clauses: remove open-ended phrases (e.g., “including but not limited to”, “etc.”, “and so on”); require explicit enumerations; recommend splitting into multiple atomic requirements if needed.
- R12 Correct Grammar: ensure correct grammar, syntax, and punctuation to avoid ambiguity; correct tense/voice, subject-verb agreement, and modifier placement.
- R38 Abbreviations: avoid ambiguous abbreviations; if an abbreviation is in the provided glossary with a single meaning, it may be retained (prefer first-use expansion); otherwise fully spell out.

Style and constraints:
- Output must strictly follow the Response Format specified below. Do not use Markdown or tables.
- Keep wording precise, testable, and verifiable. Prefer active voice, singular characteristic per requirement.
- If a numeric threshold is missing, use any provided quantitative defaults; otherwise mark as TBD and add a clarification question.
- If input items lack IDs, auto-assign REQ-001, REQ-002, ... in order.
- Do not remove domain terms; capitalize defined entities consistently.
- Be self-consistent across all rewrites.

Response Format (produce exactly this JSON structure):
{
  "review_metadata": {
    "domain": "<echo {{domain}}>",
    "rules_applied": ["R7", "R9", "R12", "R38"],
    "assumptions": ["<list any assumptions made>"]
  },
  "compliance_summary": {
    "total_requirements": <int>,
    "pass_count": <int>,
    "fail_count": <int>,
    "issues_by_rule": {
      "R7": <int>, "R9": <int>, "R12": <int>, "R38": <int>
    }
  },
  "requirements_review": [
    {
      "id": "<ID>",
      "original": "<original requirement>",
      "checks": {
        "R7": {"status": "pass|fail", "vague_terms": ["<terms>"], "explanation": "<brief>"},
        "R9": {"status": "pass|fail", "open_ended_phrases": ["<phrases>"], "explanation": "<brief>"},
        "R12": {"status": "pass|fail", "grammar_issues": ["<issues>"], "explanation": "<brief>"},
        "R38": {"status": "pass|fail", "abbreviations_found": ["<abbr>"], "actions": "<expand/retain/remove>", "explanation": "<brief>"}
      },
      "proposed_rewrite": "<single improved requirement that resolves all detected issues>",
      "split_recommendation": {
        "needed": true|false,
        "because": "<why>",
        "split_into": ["<Req A>", "<Req B>"]
      },
      "clarifying_questions": ["<question 1>", "<question 2>"]
    }
  ]
}

Evaluation method:
1) Parse inputs and normalize IDs. 2) For each requirement, test R7, R9, R12, R38. 3) Explain each failure succinctly. 4) Rewrite to a single, verifiable sentence unless a split is recommended. 5) Apply glossary rules for abbreviations; on first use of allowed abbreviations, prefer the expanded form with abbreviation in parentheses. 6) If required numbers are missing and no defaults are provided, use TBD placeholders and ask explicit questions to resolve them. 7) Summarize compliance.

Important: If {{requirements}} is empty, respond with a single clarifying question requesting requirements to review and stop.
"""

## Generated user message
"""
Task: Review and improve the following requirement statements using the provided Context and variables.

Context (authoritative rules):
{{context}}

Variables:
- Domain: {{domain}}
- Requirements (list or newline-separated; may include IDs):
{{requirements}}
- Project Glossary (abbreviations and definitions; JSON or list):
{{glossary}}
- Quantitative Defaults (JSON mapping of common thresholds; optional, e.g., {"availability_threshold":"99.9%","availability_window_hours":"720","latency_seconds":"5","proximity_km":"20"}):
{{quantitative_defaults}}
- House Style Notes (nomenclature, capitalization, naming; optional):
{{house_style}}
- Max alternative rewrites per requirement (integer; default 1): {{max_rewrites_per_requirement}}
- Enable split recommendations (true|false; default true): {{enable_split}}

Produce output strictly in the Response Format JSON. Do not use Markdown.

Examples

Example 1 — Aviation domain with minimal glossary
Input variables:
- Domain: Aviation
- Requirements:
  REQ-1: The SOI shall usually be online.
  REQ-2: The Flight_Information_System shall display per Display Standard xyz the Tracking_Information for relevant aircraft within 5 seconds of detection.
  REQ-3: The op shall log events promptly.
- Project Glossary:
  ATC: Air Traffic Control
- Quantitative Defaults:
  {"availability_threshold":"99.95%","availability_window_hours":"720","latency_seconds":"5","proximity_km":"20"}
- Enable split recommendations: true

Expected output (abbreviated):
{
  "review_metadata": {"domain": "Aviation", "rules_applied": ["R7","R9","R12","R38"], "assumptions": []},
  "compliance_summary": {"total_requirements": 3, "pass_count": 0, "fail_count": 3, "issues_by_rule": {"R7": 3, "R9": 1, "R12": 0, "R38": 1}},
  "requirements_review": [
    {"id": "REQ-1", "original": "The SOI shall usually be online.",
     "checks": {"R7": {"status":"fail","vague_terms":["usually"],"explanation":"Frequency not verifiable."},
                 "R9": {"status":"pass","open_ended_phrases":[],"explanation":""},
                 "R12": {"status":"pass","grammar_issues":[],"explanation":""},
                 "R38": {"status":"pass","abbreviations_found":[],"actions":"retain","explanation":""}},
     "proposed_rewrite": "The System_of_Interest (SOI) shall have an Availability of at least 99.95% measured over any rolling 720-hour period.",
     "split_recommendation": {"needed": false, "because": "", "split_into": []},
     "clarifying_questions": []},

    {"id": "REQ-2", "original": "The Flight_Information_System shall display per Display Standard xyz the Tracking_Information for relevant aircraft within 5 seconds of detection.",
     "checks": {"R7": {"status":"fail","vague_terms":["relevant"],"explanation":"Scope of aircraft is undefined."},
                 "R9": {"status":"pass","open_ended_phrases":[],"explanation":""},
                 "R12": {"status":"pass","grammar_issues":[],"explanation":"Word order acceptable but can be improved."},
                 "R38": {"status":"pass","abbreviations_found":[],"actions":"retain","explanation":""}},
     "proposed_rewrite": "The Flight_Information_System shall display, in accordance with Display Standard xyz, the Tracking_Information of each Aircraft located within 20 kilometers of the Airfield within 5 seconds of detection.",
     "split_recommendation": {"needed": false, "because": "", "split_into": []},
     "clarifying_questions": ["Confirm whether the proximity criterion is distance from Airfield, ATC Sector bounds, or another defined Control_Area."]},

    {"id": "REQ-3", "original": "The op shall log events promptly.",
     "checks": {"R7": {"status":"fail","vague_terms":["promptly"],"explanation":"No latency target."},
                 "R9": {"status":"pass","open_ended_phrases":[],"explanation":""},
                 "R12": {"status":"pass","grammar_issues":[],"explanation":""},
                 "R38": {"status":"fail","abbreviations_found":["op"],"actions":"expand","explanation":"'op' not defined in glossary."}},
     "proposed_rewrite": "The Operator shall record each Event within 5 seconds of occurrence.",
     "split_recommendation": {"needed": false, "because": "", "split_into": []},
     "clarifying_questions": ["Confirm whether 5 seconds is acceptable for event logging latency."]}
  ]
}

Example 2 — Banking domain with defined abbreviations and open-ended clause
Input variables:
- Domain: Banking
- Requirements:
  1) The ATM shall display the Customer Account_Number, Account_Balance, and so on per Display Standard xyz.
  2) The Weapon_System shall storing the location of all ordnance.
  3) The website shall approximately load quickly.
- Project Glossary:
  ATM: Automated Teller Machine
  PIN: Personal Identification Number
- Quantitative Defaults:
  {"page_load_seconds":"2"}
- Enable split recommendations: true

Expected output (abbreviated):
{
  "review_metadata": {"domain": "Banking", "rules_applied": ["R7","R9","R12","R38"], "assumptions": ["Treat numbered list as ordered requirements; auto-assign IDs."]},
  "compliance_summary": {"total_requirements": 3, "pass_count": 0, "fail_count": 3, "issues_by_rule": {"R7": 2, "R9": 1, "R12": 1, "R38": 0}},
  "requirements_review": [
    {"id": "REQ-001", "original": "The ATM shall display the Customer Account_Number, Account_Balance, and so on per Display Standard xyz.",
     "checks": {"R7": {"status":"pass","vague_terms":[],"explanation":""},
                 "R9": {"status":"fail","open_ended_phrases":["and so on"],"explanation":"Non-specific list."},
                 "R12": {"status":"pass","grammar_issues":[],"explanation":""},
                 "R38": {"status":"pass","abbreviations_found":["ATM"],"actions":"retain","explanation":"Defined in glossary; first use expanded."}},
     "proposed_rewrite": "The Automated Teller Machine (ATM) shall display the Customer Account_Number and Account_Balance in accordance with Display Standard xyz.",
     "split_recommendation": {"needed": true, "because": "Multiple display elements and open-ended enumeration.", "split_into": ["The Automated Teller Machine (ATM) shall display the Customer Account_Number in accordance with Display Standard xyz.", "The Automated Teller Machine (ATM) shall display the Customer Account_Balance in accordance with Display Standard xyz."]},
     "clarifying_questions": ["List all additional fields to be displayed to replace 'and so on' (e.g., Account_Type, Overdraft_Limit)."]},

    {"id": "REQ-002", "original": "The Weapon_System shall storing the location of all ordnance.",
     "checks": {"R7": {"status":"pass","vague_terms":[],"explanation":""},
                 "R9": {"status":"pass","open_ended_phrases":[],"explanation":""},
                 "R12": {"status":"fail","grammar_issues":["incorrect verb form: 'shall storing'"],"explanation":"Use base verb after 'shall'."},
                 "R38": {"status":"pass","abbreviations_found":[],"actions":"retain","explanation":""}},
     "proposed_rewrite": "The Weapon_System shall store the location of all Ordnance.",
     "split_recommendation": {"needed": false, "because": "Single action.", "split_into": []},
     "clarifying_questions": []},

    {"id": "REQ-003", "original": "The website shall approximately load quickly.",
     "checks": {"R7": {"status":"fail","vague_terms":["approximately","quickly"],"explanation":"Not measurable."},
                 "R9": {"status":"pass","open_ended_phrases":[],"explanation":""},
                 "R12": {"status":"pass","grammar_issues":[],"explanation":""},
                 "R38": {"status":"pass","abbreviations_found":[],"actions":"retain","explanation":""}},
     "proposed_rewrite": "The Website shall complete initial page load within 2 seconds for 95% of requests measured over any 24-hour period.",
     "split_recommendation": {"needed": false, "because": "Single measurable characteristic.", "split_into": []},
     "clarifying_questions": ["Confirm the percentile (e.g., 95% vs. 99%) and measurement window."]}
  ]
}

Now perform the review on the provided inputs and return only the Response Format JSON.
"""

In [None]:
## Modify the prompt to return a simpler structure
system_message_modified = """
You are a Senior Requirements Quality Analyst and technical editor. You specialize in detecting and fixing requirement defects using authoritative quality rules. Be rigorous, consistent, and concise. Maintain the author’s technical intent while removing ambiguity. Do not add new functionality. Ask targeted clarification questions when needed.

Authoritative rules to enforce (from the provided Context):
- R7 Vague Terms: identify and replace vague quantifiers, adjectives, and adverbs with measurable, testable, and verifiable criteria.
- R9 Open-Ended Clauses: remove open-ended phrases (e.g., “including but not limited to”, “etc.”, “and so on”); require explicit enumerations; recommend splitting into multiple atomic requirements if needed.
- R12 Correct Grammar: ensure correct grammar, syntax, and punctuation to avoid ambiguity; correct tense/voice, subject-verb agreement, and modifier placement.
- R38 Abbreviations: avoid ambiguous abbreviations; if an abbreviation is in the provided glossary with a single meaning, it may be retained (prefer first-use expansion); otherwise fully spell out.

Style and constraints:
- Output must strictly follow the Response Format specified below. Do not use Markdown or tables.
- Keep wording precise, testable, and verifiable. Prefer active voice, singular characteristic per requirement.
- If a numeric threshold is missing, use any provided quantitative defaults; otherwise mark as TBD and add a clarification question.
- If input items lack IDs, auto-assign REQ-001, REQ-002, ... in order.
- Be self-consistent across all rewrites.

Response Format (produce exactly this JSON structure):
{
  "review_metadata": {
    "rules_applied": ["R7", "R9", "R12", "R38"],
    "assumptions": ["<list any assumptions made>"]
  },
  "compliance_summary": {
    "pass_count": <int>,
    "fail_count": <int>,
    "issues_by_rule": {
      "R7": <int>, "R9": <int>, "R12": <int>, "R38": <int>
    }
  },
  "requirements_review": [
    {
      "id": "<ID>",
      "original": "<original requirement>",
      "checks": {
        "R7": {"status": "pass|fail", "vague_terms": ["<terms>"], "explanation": "<brief>"},
        "R9": {"status": "pass|fail", "open_ended_phrases": ["<phrases>"], "explanation": "<brief>"},
        "R12": {"status": "pass|fail", "grammar_issues": ["<issues>"], "explanation": "<brief>"},
        "R38": {"status": "pass|fail", "abbreviations_found": ["<abbr>"], "actions": "<expand/retain/remove>", "explanation": "<brief>"}
      },
      "proposed_rewrite": "<single improved requirement that resolves all detected issues>",
      "split_recommendation": {
        "needed": true|false,
        "because": "<why>",
        "split_into": ["<Req A>", "<Req B>"]
      },
      "clarifying_questions": ["<question 1>", "<question 2>"]
    }
  ]
}

Evaluation method:
1) Parse inputs and normalize IDs. 2) For each requirement, test R7, R9, R12, R38. 3) Explain each failure succinctly. 4) Rewrite to a single, verifiable sentence unless a split is recommended. 5) Apply glossary rules for abbreviations; on first use of allowed abbreviations, prefer the expanded form with abbreviation in parentheses. 6) If required numbers are missing and no defaults are provided, use TBD placeholders and ask explicit questions to resolve them. 7) Summarize compliance.

Important: If {requirements} is empty, respond with a single clarifying question requesting requirements to review and stop.
"""

user_message_modified = """
Task: Review and improve the following requirement statements using the provided Context and variables.

Context (authoritative rules):
- R7 Vague Terms: identify and replace vague quantifiers, adjectives, and adverbs with measurable, testable, and verifiable criteria.
- R9 Open-Ended Clauses: remove open-ended phrases (e.g., “including but not limited to”, “etc.”, “and so on”); require explicit enumerations; recommend splitting into multiple atomic requirements if needed.
- R12 Correct Grammar: ensure correct grammar, syntax, and punctuation to avoid ambiguity; correct tense/voice, subject-verb agreement, and modifier placement.
- R38 Abbreviations: avoid ambiguous abbreviations; if an abbreviation is in the provided glossary with a single meaning, it may be retained (prefer first-use expansion); otherwise fully spell out.

Variables:
- Requirements (list or newline-separated; may include IDs):
{requirements}
- Enable split recommendations (true|false; default true): {enable_split}

Produce output strictly in the Response Format JSON. Do not use Markdown.

Examples

Example 1 — Aviation domain with minimal glossary
Input variables:
- Requirements:
  REQ-1: The SOI shall usually be online.
  REQ-2: The Flight_Information_System shall display per Display Standard xyz the Tracking_Information for relevant aircraft within 5 seconds of detection.
  REQ-3: The op shall log events promptly.
- Enable split recommendations: true

Expected output (abbreviated):
{
  "review_metadata": {"rules_applied": ["R7","R9","R12","R38"], "assumptions": []},
  "compliance_summary": {"total_requirements": 3, "pass_count": 0, "fail_count": 3, "issues_by_rule": {"R7": 3, "R9": 1, "R12": 0, "R38": 1}},
  "requirements_review": [
    {"id": "REQ-1", "original": "The SOI shall usually be online.",
     "checks": {"R7": {"status":"fail","vague_terms":["usually"],"explanation":"Frequency not verifiable."},
                 "R9": {"status":"pass","open_ended_phrases":[],"explanation":""},
                 "R12": {"status":"pass","grammar_issues":[],"explanation":""},
                 "R38": {"status":"pass","abbreviations_found":[],"actions":"retain","explanation":""}},
     "proposed_rewrite": "The System_of_Interest (SOI) shall have an Availability of at least 99.95% measured over any rolling 720-hour period.",
     "split_recommendation": {"needed": false, "because": "", "split_into": []},
     "clarifying_questions": []},

    {"id": "REQ-2", "original": "The Flight_Information_System shall display per Display Standard xyz the Tracking_Information for relevant aircraft within 5 seconds of detection.",
     "checks": {"R7": {"status":"fail","vague_terms":["relevant"],"explanation":"Scope of aircraft is undefined."},
                 "R9": {"status":"pass","open_ended_phrases":[],"explanation":""},
                 "R12": {"status":"pass","grammar_issues":[],"explanation":"Word order acceptable but can be improved."},
                 "R38": {"status":"pass","abbreviations_found":[],"actions":"retain","explanation":""}},
     "proposed_rewrite": "The Flight_Information_System shall display, in accordance with Display Standard xyz, the Tracking_Information of each Aircraft located within 20 kilometers of the Airfield within 5 seconds of detection.",
     "split_recommendation": {"needed": false, "because": "", "split_into": []},
     "clarifying_questions": ["Confirm whether the proximity criterion is distance from Airfield, ATC Sector bounds, or another defined Control_Area."]},

    {"id": "REQ-3", "original": "The op shall log events promptly.",
     "checks": {"R7": {"status":"fail","vague_terms":["promptly"],"explanation":"No latency target."},
                 "R9": {"status":"pass","open_ended_phrases":[],"explanation":""},
                 "R12": {"status":"pass","grammar_issues":[],"explanation":""},
                 "R38": {"status":"fail","abbreviations_found":["op"],"actions":"expand","explanation":"'op' not defined in glossary."}},
     "proposed_rewrite": "The Operator shall record each Event within 5 seconds of occurrence.",
     "split_recommendation": {"needed": false, "because": "", "split_into": []},
     "clarifying_questions": ["Confirm whether 5 seconds is acceptable for event logging latency."]}
  ]
}

Example 2 — Banking domain with defined abbreviations and open-ended clause
Input variables:
- Requirements:
  1) The ATM shall display the Customer Account_Number, Account_Balance, and so on per Display Standard xyz.
  2) The Weapon_System shall storing the location of all ordnance.
  3) The website shall approximately load quickly.
- Enable split recommendations: true

Expected output (abbreviated):
{
  "review_metadata": {"rules_applied": ["R7","R9","R12","R38"], "assumptions": ["Treat numbered list as ordered requirements; auto-assign IDs."]},
  "compliance_summary": {"total_requirements": 3, "pass_count": 0, "fail_count": 3, "issues_by_rule": {"R7": 2, "R9": 1, "R12": 1, "R38": 0}},
  "requirements_review": [
    {"id": "REQ-001", "original": "The ATM shall display the Customer Account_Number, Account_Balance, and so on per Display Standard xyz.",
     "checks": {"R7": {"status":"pass","vague_terms":[],"explanation":""},
                 "R9": {"status":"fail","open_ended_phrases":["and so on"],"explanation":"Non-specific list."},
                 "R12": {"status":"pass","grammar_issues":[],"explanation":""},
                 "R38": {"status":"pass","abbreviations_found":["ATM"],"actions":"retain","explanation":"Defined in glossary; first use expanded."}},
     "proposed_rewrite": "The Automated Teller Machine (ATM) shall display the Customer Account_Number and Account_Balance in accordance with Display Standard xyz.",
     "split_recommendation": {"needed": true, "because": "Multiple display elements and open-ended enumeration.", "split_into": ["The Automated Teller Machine (ATM) shall display the Customer Account_Number in accordance with Display Standard xyz.", "The Automated Teller Machine (ATM) shall display the Customer Account_Balance in accordance with Display Standard xyz."]},
     "clarifying_questions": ["List all additional fields to be displayed to replace 'and so on' (e.g., Account_Type, Overdraft_Limit)."]},

    {"id": "REQ-002", "original": "The Weapon_System shall storing the location of all ordnance.",
     "checks": {"R7": {"status":"pass","vague_terms":[],"explanation":""},
                 "R9": {"status":"pass","open_ended_phrases":[],"explanation":""},
                 "R12": {"status":"fail","grammar_issues":["incorrect verb form: 'shall storing'"],"explanation":"Use base verb after 'shall'."},
                 "R38": {"status":"pass","abbreviations_found":[],"actions":"retain","explanation":""}},
     "proposed_rewrite": "The Weapon_System shall store the location of all Ordnance.",
     "split_recommendation": {"needed": false, "because": "Single action.", "split_into": []},
     "clarifying_questions": []},

    {"id": "REQ-003", "original": "The website shall approximately load quickly.",
     "checks": {"R7": {"status":"fail","vague_terms":["approximately","quickly"],"explanation":"Not measurable."},
                 "R9": {"status":"pass","open_ended_phrases":[],"explanation":""},
                 "R12": {"status":"pass","grammar_issues":[],"explanation":""},
                 "R38": {"status":"pass","abbreviations_found":[],"actions":"retain","explanation":""}},
     "proposed_rewrite": "The Website shall complete initial page load within 2 seconds for 95% of requests measured over any 24-hour period.",
     "split_recommendation": {"needed": false, "because": "Single measurable characteristic.", "split_into": []},
     "clarifying_questions": ["Confirm the percentile (e.g., 95% vs. 99%) and measurement window."]}
  ]
}

Now perform the review on the provided inputs and return only the Response Format JSON.
"""

In [None]:
## Run prompt

import pandas as pd
# Load requirements
df = pd.read_excel('../src/data/demo_dataset.xlsx')
requirement_col = 'requirement'
#id_col = 'requirement_#'
#ids = list(df[id_col].values)[0:5]
requirements = list(df[requirement_col].values)[0:5]

# Create OpenAI instance
client = OpenAI(
    # Replace with your actual API key or use: api_key=os.environ.get("OPENAI_API_KEY")
    api_key=openai_api_key_secret.get_secret_value()
)
resp_client = ResponseClient(client=client, model="gpt-4o-mini")

messages = [
    {"role": "system", "content": system_message_modified},
    {"role": "user", "content": user_message_modified.replace('{requirements}', '\n'.join(requirements)).replace('{enable_split}', 'True')}
]
structured_response = resp_client.get_structured_response(
    messages=messages,
    response_format={"type": "json_object"},
)
resp_client.check_structured_output(structured_response)

In [None]:
print(structured_response.content)

In [None]:
import json
response_json = json.loads(structured_response.content)

In [None]:
import flatdict
from src import utils
nested_dict = response_json['requirements_review']
flat_dict = [flatdict.FlatDict(n, delimiter='.') for n in nested_dict]


In [None]:
revisions_df = pd.DataFrame(flat_dict)

In [None]:
revisions_df.head(2)

In [None]:
from src.components import prompteval as pe
## Run evaluations
# Functions currently requiring remediation
exclude_funcs = [
    'eval_explicit_enumeration',
    'eval_follows_style_guide',
    'eval_has_correct_grammar',
    'eval_has_supporting_diagram_or_model_reference',
    'eval_is_structured_set',
    'eval_is_unique_expression',
    'eval_has_explicit_conditions_for_single_action',
    'eval_is_structured_statement'
]
# Make evaluation function config
eval_config = pe.make_eval_config(pe, exclude_funcs=exclude_funcs)
# Call the evaluations on the dataframe 
eval_df = pe.call_evals(revisions_df, col='original', eval_config=eval_config)
# Get list of failed eval functions
eval_df = pe.get_failed_evals(eval_df)
# Map the failed eval functions to rule groups (as defined in the config.yaml file)
eval_df = pe.map_failed_eval_col_to_rule_group(eval_df, eval_to_rule_map=config["SECTION_4_RULE_GROUPS"], failed_eval_col='failed_evals')

In [None]:
eval_df

In [None]:
from src.components import prompteval as pe
## Run evaluations
# Functions currently requiring remediation
exclude_funcs = [
    'eval_explicit_enumeration',
    'eval_follows_style_guide',
    'eval_has_correct_grammar',
    'eval_has_supporting_diagram_or_model_reference',
    'eval_is_structured_set',
    'eval_is_unique_expression',
    'eval_has_explicit_conditions_for_single_action',
    'eval_is_structured_statement'
]
# Make evaluation function config
eval_config = pe.make_eval_config(pe, exclude_funcs=exclude_funcs)
# Call the evaluations on the dataframe 
eval_df = pe.call_evals(revisions_df, col='proposed_rewrite', eval_config=eval_config)
# Get list of failed eval functions
eval_df = pe.get_failed_evals(eval_df)
# Map the failed eval functions to rule groups (as defined in the config.yaml file)
eval_df = pe.map_failed_eval_col_to_rule_group(eval_df, eval_to_rule_map=config["SECTION_4_RULE_GROUPS"], failed_eval_col='failed_evals')

In [None]:
eval_df