### Build testgen Python Package

In [69]:
# !pip -q install -e ../.

### Imports

In [70]:
import pandas as pd
import json
import os
from tqdm.notebook import tqdm
from testgen.utils import *
from pathlib import Path
from datetime import datetime as dt

### EDA

In [71]:
base_path = Path().cwd().parent

In [72]:
# read data and rename columns
df = pd.read_excel(base_path / "data/requirements.xlsx")
df_examples = pd.read_excel(base_path / "data/examples.xlsx")

In [73]:
# make sure all columns except the first one are binary
for c in df.columns[1:]:
    assert df[c].drop_duplicates().shape[0] == 2

# Find Examples

In [74]:
N_EXAMPLES = 1

indexes_to_drop, examples = get_examples_from_df(df_examples, N_EXAMPLES)

In [75]:
# join all examples in a text format to add to prompt
examples_txt = ""

for e1 in examples.values():
    for e2 in e1:
        examples_txt += f"Requirement: {e2[0]}\n"
        examples_txt += f"Vector: {e2[1]}\n"
        examples_txt += f"Target Sensor/s: {' and '.join(df.columns[1:][[True if x == 1 else False for x in map(int, e2[1][1:-1].split(','))]])}\n"
        examples_txt += "\n"

In [76]:
print("\n".join(examples_txt.split("\n")[-9:]))

Requirement: Power steering systems, whether hydraulic or electronic, must operate effectively in a wide range of environmental conditions, including extreme temperatures
Vector: [0,0,0,0,1]
Target Sensor/s: steering_torque

Requirement: Upon detection of lift-off oversteer, the system shall regain control and prevent spin-out based on the steering angle input and modulate the acceleration pedal 
Vector: [1,1,0,0,0]
Target Sensor/s: acceleration_pedal and wheel_steering_angle




# LLM

In [77]:
from testgen.prompts import SystemPrompt
from testgen.prompts import Sensors
from testgen.prompts import UserPromptBulk

In [78]:
llm_models = {
    "azure": ["gpt-4o-mini"],
    "groq": [
        "llama-3.2-3b-preview",
        "llama-3.2-90b-text-preview",
        "mixtral-8x7b-32768",
    ],
}

endpoint_attrs = {
    "azure": {
        "api_key": os.getenv("AZURE_OPENAI_API_KEY"),
        "api_version": "2024-08-01-preview",
        "azure_endpoint": os.getenv("AZURE_OPENAI_ENDPOINT"),
    },
    "groq": {
        "api_key": os.getenv("GROQ_API_KEY"),
    },
}

In [79]:
endpoint_name = "azure"
model_name = llm_models[endpoint_name][0]

client = llm_client(endpoint_name, **endpoint_attrs[endpoint_name])

In [80]:
# t = client.chat.completions.create(
#     model=model_name,
#     messages=[{"role": "user", "content": "Respond: Here"}]
# )
# t.choices[0].message.content

### Get Requirements for multi prediction

In [81]:
N_REQS = 2
SAMPLE_TYPE = "random"

batches = get_batches(df, SAMPLE_TYPE, N_REQS)
batches, req_texts = requirement_text_bulk(batches)

Number of Batches: 68
Number of Instances left: 1


In [82]:
results = []

for batch, req_text in tqdm(zip(batches, req_texts)):
    result = invoke_bulk(model_name, client, batch,
                         SystemPrompt, Sensors, examples_txt, UserPromptBulk, req_text)
    results.append(result)

0it [00:00, ?it/s]

In [83]:
total_number_of_instances = len(results) * N_REQS

accuracy = 0
total_tokens = 0
total_completion_tokens = 0
total_time = 0

for r in results:
    accuracy += sum(r["accuracy"])
    total_tokens += r["total_tokens"]
    total_completion_tokens += r["completion_tokens"]
    total_time += r['response_time']

number_of_reqs = len(results)
accuracy /= total_number_of_instances
avg_time_per_req = round(total_time / total_number_of_instances, 6)
avg_token_per_req = total_tokens / total_number_of_instances
avg_completion_token_per_req = total_completion_tokens / total_number_of_instances

In [84]:
print("number_of_reqs:", number_of_reqs)
print("accuracy:", accuracy)
print("avg_time_per_req:", avg_time_per_req)
print("avg_token_per_req:", avg_token_per_req)
print("avg_completion_token_per_req:", avg_completion_token_per_req)

number_of_reqs: 68
accuracy: 0.8897058823529411
avg_time_per_req: 0.476348
avg_token_per_req: 436.375
avg_completion_token_per_req: 15.5


# Save conversation

In [85]:
# save results
time = dt.now()

results_path = "results/bulk-{bulk}_{model}_n-{examples}_acc-{accuracy}_{time}.json"
results_path = results_path.format(
    model=model_name,
    examples=N_EXAMPLES,
    bulk=N_REQS,
    time=time.strftime('%m.%d.%Y-%H:%M:%S'),
    accuracy=round(accuracy, 3)
)

results_file = base_path / results_path
results_file.parent.mkdir(exist_ok=True)
results_file.touch()

with results_file.open("w") as f:
    json.dump({"accuracy": accuracy,
        "number_of_reqs": number_of_reqs,
        "total_tokens": total_tokens,
        "total_completion_tokens": total_completion_tokens,
        "avg_token_per_req": avg_token_per_req,
        "avg_completion_token_per_req": avg_completion_token_per_req,
        "avg_time_per_req": avg_time_per_req,
        "examples": examples,
        "responses": results}, f, indent=4)