In [28]:
import os
import json
import ast
import asyncio
from dotenv import dotenv_values

import pandas as pd
import flatdict
from pydantic import BaseModel, Field, SecretStr

from src import utils
from src.components import prompteval as pe
from src.components.promptrunner import RateLimitOpenAIClient

In [2]:
# Load config settings
config = utils.load_config("../config.yaml")
output_directory = "."

In [3]:
all_rule_groups = [
    'Conditions', 
    'Singularity', 
    'Uniformity_Of_Language', 
    'Concision', 
    'Modularity', 
    'Non_Ambiguity', 
    'Tolerance', 
    'Quantifiers', 
    'Quantification', 
    'Completeness', 
    'Accuracy', 
    'Abstraction',
    'Realism'    
]                

# Functions currently requiring remediation
eval_funcs = [
    'eval_avoids_vague_terms',
    'eval_definite_articles_usage',
    'eval_has_appropriate_subject_verb',
    'eval_has_common_units_of_measure',
    'eval_has_escape_clauses',
    'eval_has_no_open_ended_clauses',
    'eval_is_active_voice',
]

eval_weights = [
    0.35,
    0.05,
    0.15,
    0.05,
    0.10,
    0.10,
    0.20
]

# Make eval config
eval_config = pe.make_eval_config(pe, include_funcs=eval_funcs)

In [None]:
from dotenv import dotenv_values
from src.components.promptrunner import RateLimitOpenAIClient

# Instantiate the openai client
DOT_ENV = dotenv_values("../.env")
OPENAI_API_KEY = DOT_ENV['OPENAI_API_KEY']
rl_openai_client = RateLimitOpenAIClient(api_key=OPENAI_API_KEY)

In [None]:
import asyncio
import json
import flatdict
from typing import Any, Dict, List, Optional

async def process_json_responses(
    responses, ids, prompt_type, json_key: str = "requirements_review"
    ) -> List[Dict[str, Any]]:
    """Process OpenAI responses and flatten extracted JSON structures."""
    processed = []

    for i, response in enumerate(responses):
        output = dict(response)
        message = getattr(response.choices[0], "message", None)
        if not message:
            continue

        # Parse structured JSON content if available
        if getattr(message, "content", None):
            try:
                response_json = json.loads(message.content)
                if json_key in response_json:
                    nested_dicts = response_json[json_key]
                    flat_dicts = [flatdict.FlatDict(d, delimiter=".") for d in nested_dicts]
                    for d in flat_dicts:
                        output.update(d)
            except (json.JSONDecodeError, TypeError):
                output["json_parse_error"] = message.content

        # Include usage info
        if getattr(response, "usage", None):
            usage = dict(response.usage)
            usage.update(dict(getattr(response.usage, "completion_tokens_details", {})))
            usage.update(dict(getattr(response.usage, "prompt_tokens_details", {})))
            output.update(usage)

        # Include parsed content if provided
        if getattr(message, "parsed", None):
            output.update(dict(message.parsed))
        output.update(
            {
                "requirement_id": ids[i],
                "prompt_type": prompt_type,
            }
        )
        processed.append(output)
    return processed


async def run_requirement_review(
    openai_client,
    prompt_messages: List[str],
    requirements: List[str],
    ids: Optional[List[int]] = None,
    model: str = "gpt-4o-mini",
    json_key: str = "requirements_review",
    ) -> List[Dict[str, Any]]:
    """Execute concurrent review prompts and process JSON responses."""
    if ids is None:
        ids = list(range(len(requirements)))
    # Build concurrent tasks
    tasks = [
        openai_client.chat_completion_parse(
            model=model,
            messages=[
                {"role": "system", "content": prompt_messages[0]},
                {
                    "role": "user",
                    "content": prompt_messages[1]
                    .replace("{requirements}", f"{req_id}: {req}")
                    .replace("{enable_split}", "True"),
                },
            ],
            response_format={"type": "json_object"},
        )
        for req, req_id in zip(requirements, ids)
    ]
    # Run all requests concurrently
    responses = await asyncio.gather(*tasks)

    # Process structured JSON responses
    return await process_json_responses(responses, ids, prompt_messages[-1], json_key)

In [None]:
# Define function to perform AI requirement reviews
async def run_reviewer_prompts(openai_client, prompt_messages, requirements, ids, model: str = 'gpt-4o-mini'):
       
    if ids is None:
        ids = list(range(len(requirements)))
    req_tups = zip(requirements, ids)
    
    # Example of making multiple concurrent requests
    tasks = []
    for i, req_tup in enumerate(req_tups):
        tasks.append(
            openai_client.chat_completion_parse(
                model=model,
                messages=[
                    {"role": "system", "content": prompt_messages[0]},
                    {"role": "user", "content": prompt_messages[1].replace('{requirements}',f'{req_tup[1]}: {req_tup[0]}').replace('{enable_split}', 'True')}
                ],
                response_format={"type": "json_object"}
            )
        )
  
    # Wait for all requests to complete
    responses = await asyncio.gather(*tasks)
    
    # Process responses
    processed_responses = []
    for i, response in enumerate(responses):
        
        output = dict(response)
        message = response.choices[0].message
        print(message)
        if getattr(message, "content"):
            response_json = json.loads(message.content)
            if 'requirements_review' in  list(response_json.keys()):
                nested_dict = response_json['requirements_review']          
                print(f"Nested dict: {type(nested_dict)}, {len(nested_dict)}")
                # Only set to work if a single requirement is passed to prompt
                flat_dict = [flatdict.FlatDict(n, delimiter='.') for n in nested_dict]          
                print(f"Flat dict: {type(flat_dict)}, {len(flat_dict)}")
                print(flat_dict)
                for d in flat_dict:
                    output.update(
                        d
                    )
        
        if getattr(response, "usage"):
            
            output.update(
                dict(response.usage)
            )
            
            output.update(
                dict(response.usage.completion_tokens_details)
            )
            output.update(
                dict(response.usage.prompt_tokens_details)
            )

        if getattr(message, "parsed"):
            output.update(
                dict(message.parsed)
            )
        
        output.update(
            {'requirement_id': ids[i]}
        )

        output.update(
            {'prompt_type': prompt_messages[-1]}
        )
        
        print(f"\nResponse {i+1}:")
        print(output)
        processed_responses.append(output)
        
    #return processed_responses
    return processed_responses


In [None]:
SYSTEM_PROMPT = """
You are a Senior Requirements Quality Analyst and technical editor. 
You specialize in detecting and fixing requirement defects using authoritative quality rules. 
Be rigorous, consistent, and concise. Maintain the authorâ€™s technical intent while removing ambiguity. 
Do not add new functionality. Ask targeted clarification questions when needed.

Response Format (produce exactly this JSON structure):
{
  "requirements_review": [
    {
      "requirement_id": "<ID>",
      "original": "<original requirement>",
      "checks": {
        "R2": {"status": "pass|fail", "active_voice": ["<issues>"], "explanation": "<brief>"},
        "R3": {"status": "pass|fail", "appropriate_subj_verb": ["<issues>"], "explanation": "<brief>"},
        "R5": {"status": "pass|fail", "definite_articles": ["<issues>"], "explanation": "<brief>"},
        "R6": {"status": "pass|fail", "units": ["<issues>"], "explanation": "<brief>"},
        "R7": {"status": "pass|fail", "vague terms": ["<issues>"], "explanation": "<brief>"},
        "R8": {"status": "pass|fail", "escape_clauses": ["<issues>"], "explanation": "<brief>"},
        "R9": {"status": "pass|fail", "open_ended_clauses": ["<issues>"], "explanation": "<brief>"}
      },
      "proposed_rewrite": "<single improved requirement that resolves all detected issues>",
      "split_recommendation": {
        "needed": true|false,
        "because": "<why>",
        "split_into": ["<Req A>", "<Req B>"]
      },
    }
  ]
}

Evaluation method:
1) Parse inputs and normalize IDs. 
2) For each requirement, test 2, R3, R5, R6, R7, R8, R9. 
3) Explain each failure succinctly. 
4) Rewrite to a single, verifiable sentence unless a split is recommended. 
5) Apply glossary rules for abbreviations; on first use of allowed abbreviations, prefer the expanded form with abbreviation in parentheses. 
6) If required numbers are missing and no defaults are provided, use TBD placeholders and ask explicit questions to resolve them. 
7) Summarize compliance.

Important: If {requirements} is empty, respond with a single clarifying question requesting requirements to review and stop.
"""

USER_PROMPT = """
Task: Review and improve the following requirement statements using the provided variables.

Variables:
- Requirements (list or newline-separated; may include IDs):
  {requirements}
- Enable split recommendations (true|false; default true): {enable_split}

Produce output strictly in the Response Format JSON. Do not use Markdown.

Now perform the review on the provided inputs and return only the Response Format JSON.
"""

In [6]:
MODEL = 'gpt-4o-mini'

In [10]:
requirements = [
    "If projected the data must be readable.  On a 10x10 projection screen  90% of viewers must be able to read Event / Activity data from a viewing distance of 30",
    "The product shall ensure that it can only be accessed by authorized users.  The product will be able to distinguish between authorized and unauthorized users in all access attempts",
    "All business rules specified in the Disputes System shall be in compliance to the guidelines of Regulation E and Regulation Z",
]

ids = [
    "REQ-001",
    "REQ-002",
    "REQ-003"
]

df = pd.DataFrame(
    {
        'requirements': requirements,
        'ids': ids   
    }
)

In [None]:
# Get pre-revision Accuracy Score
initial_df = pe.call_evals(df.copy(), col='requirements', eval_config=eval_config)
initial_df = pe.get_failed_evals(initial_df)
pe.add_weighted_column(initial_df, eval_funcs, eval_weights, "weighted_value")

[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\Daniel\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\Daniel\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\Daniel\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


0    0.65
1    0.80
2    0.80
Name: weighted_value, dtype: float64

In [None]:
# Run revisions and cast to dataframe
prompt = [SYSTEM_PROMPT, USER_PROMPT]
revisions = asyncio.run(run_requirement_review(rl_openai_client, prompt, requirements, None, MODEL))
final_df = pd.DataFrame(revisions)

ParsedChatCompletionMessage[NoneType](content='{\n  "requirements_review": [\n    {\n      "requirement_id": "REQ-001",\n      "original": "If projected the data must be readable.  On a 10x10 projection screen  90% of viewers must be able to read Event / Activity data from a viewing distance of 30",\n      "checks": {\n        "R2": {"status": "fail", "active_voice": ["Subject is missing from main clause."], "explanation": "The requirement does not specify who must ensure the data is readable."},\n        "R3": {"status": "fail", "appropriate_subj_verb": ["Subject-verb agreement is unclear due to lack of subject."], "explanation": "The requirement lacks a clear subject for the action."},\n        "R5": {"status": "fail", "definite_articles": ["Missing article before \'10x10 projection screen\'."], "explanation": "Should specify \'the 10x10 projection screen\'."},\n        "R6": {"status": "fail", "units": ["Distance unit is missing (assumed meters or feet)."], "explanation": "The unit 

In [29]:
# Get post-revision Accuracy Score
final_df = pe.call_evals(final_df.copy(), col='proposed_rewrite', eval_config=eval_config)
final_df = pe.get_failed_evals(final_df)
pe.add_weighted_column(final_df, eval_funcs, eval_weights, "weighted_value")

[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\Daniel\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\Daniel\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\Daniel\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


0    0.65
1    0.85
2    1.00
Name: weighted_value, dtype: float64