In [1]:
from typing import List, Dict, Optional, Literal, Union

In [2]:
## Load configuration
from dotenv import dotenv_values
from src import utils

# Load config settings
DOT_ENV = dotenv_values("../.env")
config = utils.load_config("../config.yaml")

# Create a unique run-id folder to store outputs
config["FILE_LOCATIONS"]["MAIN_DATA_FOLDER"] = "../src/data"
output_directory = utils.make_output_directory(config["FILE_LOCATIONS"])

In [17]:
## Step 3: Generate a prompt for each attribute group
from pydantic import BaseModel, SecretStr, Field
from openai import OpenAI
import aiswre
from aiswre.components.promptrunner import ResponseClient

openai_api_key = str(DOT_ENV['OPENAI_API_KEY'])
openai_api_key_secret = SecretStr(openai_api_key)
# Create OpenAI instance
client = OpenAI(
    # Replace with your actual API key or use: api_key=os.environ.get("OPENAI_API_KEY")
    api_key=openai_api_key_secret.get_secret_value()
)
resp_client = ResponseClient(client=client, model="gpt-4o")
# Create a prompt using this formatted checklist
class TestCase(BaseModel):
    testcase_id: int = Field(description="sequential ID") 
    steps: List[str] = Field(description="a list of actions to be performed by the test case")
    expectedResults: List[str] = Field(description="a list of expected results for each step performed")
    prerequisites: str = Field(description="description of dependencies or environmental considerations to successfully run the test case")

class TestSuite(TestCase):
    testcases: List[TestCase] = Field(description="A List of Test Cases per the TestCase JSON format")

system_message = """
I want you to act as a software quality assurance tester for a new software application. 

Your job is write tests for the functionality and performance of the system to ensure it meets the specified requirement. Do not include any personal opinions or subjective evaluations in your test case creation.
"""

user_message = """
Given the Requirement, generate a list of test cases and each test case, produce the output format as described in the Response Format JSON. Do not use Markdown.

## Example - Not Related to Inputs

Example - Input
- Requirement: The System shall implement multi-threading and load balancing.

Example - Output
- Test Cases:
{
'testcase-id': '1',
'step': ['Establish a baseline of system performance under normal conditions', 'Simulate load by running a single instance of a task', 'Record the system performance', 'Increase the load by adding more instances of the task', 'Record the system performance', 'Compare the recorded performance with the baseline', 'Analyze the comparison data', 'Identify any significant performance degradation', 'Repeat the steps by adding more instances each time', 'Record and analyze the findings'],
'expectedResult': ['Establish a baseline performance for reference', 'System performance is logged', 'Performance under single instance load is recorded', 'System performance under increased load is recorded', 'Comparison data is obtained', 'Data analysis reveals any significant performance change', 'If there is degradation, system may have issue with multi-threading', 'No significant degradation indicates system is handling multi-threading correctly', 'Performance remains steady indicates good load balancing', 'System’s capability to handle multiple threads is confirmed'],
'prerequisites': 'Simulated task must be representative of a real task and the system performance should be recorded and analyzed to check the consistency and stability of the system.',

},

{
'testcase-id': '2',
'step': ['Create a light load scenario', 'Record the system response and performance', 'Increase the load gradually', 'Record the system’s response and performance', 'Repeat the increase in load and record the system’s response', 'Create a maximum load scenario', 'Record the system’s response and performance', 'Create a beyond-capacity load scenario', 'Record the system’s response', 'Analyze the recorded data to determine how it handles high loads'],
'expectedResult': ['System should handle light load with negligible impact on performance', 'Response and performance under light load is recorded', 'System should handle gradual increase in load', 'Response and performance under gradual increase in load is recorded', 'System’s ability to handle increasing load is validated', 'System should handle maximum load', 'Response and performance at maximum load is recorded', 'System may show some performance degradation', 'Response under beyond-capacity load is recorded', 'Analysis shows system’s ability to handle high loads'],
'prerequisites': 'Understanding of the potential maximum load the system can handle. Analytical tools ready to record the system response and performance metrics.',

},

{
'testcase-id': '3',
'step': ['Simulate high load scenario', 'Disconnect some threads while system is handling multiple tasks', 'Record the immediate system response', 'Monitor system performance over a period of time', 'Identify if the remaining threads pick up the tasks', 'Check for any error messages', 'Note the system’s recovery time', 'Repeat the disconnect-reconnect cycle few times', 'Record system response each time', 'Analyze the recorded data'],
'expectedResult': ['High load scenario is replicated', 'Threads are disconnected', 'Immediate system response is recorded', 'System performance is monitored', 'Remaining threads pick up tasks is validated', 'Error messages should be decipherable and actionable', 'Recovery time is noted', 'System shows consistency in handling disconnect-reconnect cycles', 'Response to each cycle is recorded', 'Analysis shows the system’s resilience and ability to recover in disrupted situations'],
'prerequisites': 'Knowledge of how to simulate high load and disconnect threads. Performance monitoring tools to record system response and analyze them.',

},

{
'testcase-id': '4',
'step': ['Simulate high load scenario', 'Reconnect threads back to the system while handling multiple tasks', 'Monitor the immediate system response', 'Watch the system performance over a period of time', 'Confirm if the reconnected threads start picking tasks', 'Check for any error messages', 'Note the system’s recovery time', 'Repeat the disconnect-reconnect cycle few times', 'Record system response each time', 'Analyze the recorded data'],
'expectedResult': ['High load scenario is replicated', 'Threads are reconnected', 'Immediate system response is recorded', 'System performance is monitored', 'Reconnected threads start picking up tasks', 'Error messages should be decipherable and actionable', 'Recovery time is noted', 'System shows consistency in handling reconnect cycles', 'Response to each cycle is recorded', 'Analysis shows the system’s resilience and handling of thread reconnections'],
'prerequisites': 'Knowledge of how to simulate high load and reconnect threads. Performance monitoring tools to record system response.',

},

{
'testcase-id': '5',
'step': ['Establish system’s performance under normal load', 'Generate a sudden increase in load', 'Monitor the immediate system response', 'Note the system's performance over a period of time', 'Increase the number of threads', 'Monitor the system’s response to increased threads', 'Check if the load is distributed to new threads', 'Continue to monitor system’s performance over a period of time', 'Gradually decrease the load', 'Continue to monitor the system’s performance and see if it reverts back to normal'],
'expectedResult': ['System performance under normal load is recorded', 'Sudden increase in load is triggered', 'Immediate system response after load increase is recorded', 'System’s performance during high load is monitored', 'Threads are increased successfully', 'System’s response to increased threads is monitored', 'Load is distributed to all available threads', 'System's performance under high load and increased threads is monitored', 'Load is decreased gradually', 'System’s performance reverts back to normal indicating good scalability'],
'prerequisites': 'To execute these tests, knowledge of system’s performance, ability to increase threads and load, and performance monitoring tools are required.'
}


## Inputs
Requirement: {requirement}

## Outputs
Test Cases:
"""

print(f"System:\n {system_message}")

print(f"User:\n {user_message}")

System:
 
I want you to act as a software quality assurance tester for a new software application. 

Your job is write tests for the functionality and performance of the system to ensure it meets the specified requirement. Do not include any personal opinions or subjective evaluations in your test case creation.

User:
 
Given the Requirement, generate a list of test cases and each test case, produce the output format as described in the Response Format JSON. Do not use Markdown.

## Example - Not Related to Inputs

Example - Input
- Requirement: The System shall implement multi-threading and load balancing.

Example - Output
- Test Cases:
{
'testcase-id': '1',
'step': ['Establish a baseline of system performance under normal conditions', 'Simulate load by running a single instance of a task', 'Record the system performance', 'Increase the load by adding more instances of the task', 'Record the system performance', 'Compare the recorded performance with the baseline', 'Analyze the comp

In [18]:
requirement = "The System shall identify each User as authorized or unauthorized for each access attempt."

## Run prompt
messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": user_message.replace("{requirement}",requirement)}
]
structured_response = resp_client.get_structured_response(
    messages=messages,
    response_format=TestSuite,
)
resp_client.check_structured_output(structured_response)

testcase_id=1 steps=['Create a list of authorized users and unauthorized users in the system database', 'Attempt to access the system as an authorized user', 'Observe the system response', 'Attempt to access the system as an unauthorized user', 'Observe the system response', 'Repeat the access attempt for both user types multiple times'] expectedResults=['Authorized users are correctly identified and granted access', 'System allows access for authorized users', 'Unauthorized users are correctly identified and denied access', 'System denies access for unauthorized users', 'The system consistently identifies users as either authorized or unauthorized in subsequent attempts'] prerequisites="A list of users with clearly defined authorization statuses must exist in the system's database. The testing environment should simulate actual user access scenarios." testcases=[TestCase(testcase_id=2, steps=['Simulate multiple access attempts from an authorized user within a short timeframe', "Monito

In [21]:
print(structured_response.parsed.testcase_id)
print(structured_response.parsed.steps)
print(structured_response.parsed.expectedResults)
print(structured_response.parsed.prerequisites)


1
['Create a list of authorized users and unauthorized users in the system database', 'Attempt to access the system as an authorized user', 'Observe the system response', 'Attempt to access the system as an unauthorized user', 'Observe the system response', 'Repeat the access attempt for both user types multiple times']
['Authorized users are correctly identified and granted access', 'System allows access for authorized users', 'Unauthorized users are correctly identified and denied access', 'System denies access for unauthorized users', 'The system consistently identifies users as either authorized or unauthorized in subsequent attempts']
A list of users with clearly defined authorization statuses must exist in the system's database. The testing environment should simulate actual user access scenarios.


In [None]:
## Generated system message
print(structured_response.parsed.system)
with open(f"{output_directory}/generated_system_message_accuracy.txt", 'w', encoding="utf-8") as f:
    f.write(structured_response.parsed.system)

In [None]:
## Generated user message
print(structured_response.parsed.user)
with open(f"{output_directory}/generated_user_message_accuracy.txt", 'w', encoding="utf-8") as f:
    f.write(structured_response.parsed.user)