### Build testgen Python Package

In [24]:
!pip -q install -e ../.

### Imports

In [25]:
import pandas as pd
import json
import os
from tqdm.notebook import tqdm
from testgen.utils import *
from pathlib import Path
from datetime import datetime as dt
from dotenv import load_dotenv

### EDA

In [26]:
base_path = Path().cwd().parent
load_dotenv()

True

In [27]:
# read data and rename columns
df = pd.read_excel(base_path / "data/requirements.xlsx")
df_examples = pd.read_excel(base_path / "data/examples.xlsx")

### Drop Sensor

In [28]:
from testgen.prompts import Sensors

# Acc: Acceleration Pedal
# WSA: Wheel Steering Angle
# WS: Wheel Speed
# YR: Yaw Rate
# ST: Steering Torque
target_sensor = ["Wheel Steering Angle", "Steering Torque"]

Sensors = Sensors.split("\n")
# drop all sensors in Sensors
Sensors = "\n".join(filter(lambda s: all([s.find(f"{t} (") == -1 for t in target_sensor]), Sensors))

print(Sensors)

Acceleration Pedal (Acc): Measures the amount of pressure applied to the accelerator pedal, indicating the driver's desired acceleration.
Wheel Speed (WS): Measures the rotational speed of the vehicle's wheels, providing information on the vehicle's speed and potential wheel slippage.
Yaw Rate (YR): Measures the rate of rotation around the vertical axis of the vehicle, indicating its turning behavior and stability.


In [29]:
# delete targeted sensors
df = df.drop(columns=[c.replace(" ", "_").lower() for c in target_sensor])
df_examples = df_examples.drop(columns=[c.replace(" ", "_").lower() for c in target_sensor])

### Find Examples

In [30]:
N_EXAMPLES = 1


if N_EXAMPLES > 1:
    indexes_to_drop, examples = get_examples_from_df(df_examples.drop(columns=["selected"]), N_EXAMPLES)


if N_EXAMPLES == 1: # pick pre selected example when N_EXAMPLES = 1
    df_examples_n1 = df_examples[df_examples["selected"] == 1].copy()
    df_examples_n1.drop(columns=["selected"], inplace=True)
    indexes_to_drop, examples = get_examples_from_df(df_examples_n1, 1)

In [31]:
Negative_N_EXAMPLES = 2

if Negative_N_EXAMPLES > 0:
    df_examples_t = df_examples.drop(columns=["selected"])
    negative_examples = df_examples_t[df_examples_t.iloc[:, 1:].sum(axis=1) == 0].sample(Negative_N_EXAMPLES)
    negative_examples["vector"] = negative_examples.iloc[:, 1:].apply(lambda x: "[" + ",".join(map(str, x.to_list())) + "]", axis=1)


    examples["negative"] = negative_examples[["requirement", "vector"]].values.tolist()

In [32]:
# join all examples in a text format to add to prompt
examples_txt = ""

for e1 in examples.values():
    for e2 in e1:
        examples_txt += f"Requirement: {e2[0]}\n"
        examples_txt += f"Vector: {e2[1]}\n"
        examples_txt += f"Target Sensor/s: {' and '.join(df.columns[1:][[True if x == 1 else False for x in map(int, e2[1][1:-1].split(','))]])}\n"
        examples_txt += "\n"

In [33]:
print("\n".join(examples_txt.split("\n")[-9:]))

Requirement: The steering system must provide consistent and predictable torque feedback to the driver under all driving conditions
Vector: [0,0,0]
Target Sensor/s: 

Requirement: The power steering system must adapt the torque levels in response to detected road surface conditions (e.g., ice, water, gravel)
Vector: [0,0,0]
Target Sensor/s: 




# LLM

In [34]:
from testgen.prompts import SystemPrompt
from testgen.prompts import UserPrompt

In [35]:
llm_models = {
    "azure": ["gpt-4o-mini"],
    "groq": [
        "llama-3.2-3b-preview",
        "llama-3.2-90b-text-preview",
        "mixtral-8x7b-32768",
    ],
}

endpoint_attrs = {
    "azure": {
        "api_key": os.getenv("AZURE_OPENAI_API_KEY"),
        "api_version": "2024-08-01-preview",
        "azure_endpoint": os.getenv("AZURE_OPENAI_ENDPOINT"),
    },
    "groq": {
        "api_key": os.getenv("GROQ_API_KEY"),
    },
}

In [36]:
endpoint_name = "azure"
model_name = llm_models[endpoint_name][0]

client = llm_client(endpoint_name, **endpoint_attrs[endpoint_name])

In [37]:
# t = client.chat.completions.create(
#     model=model_name,
#     messages=[{"role": "user", "content": "Respond: Here"}]
# )
# t.choices[0].message.content

# Run for all Requirements

In [38]:
results = []
responses = []

for instance in tqdm(df.iterrows()):
    result, response = client_invoke(
        client,
        model_name,
        SystemPrompt,
        Sensors,
        examples_txt,
        UserPrompt,
        instance,
    )

    results.append(result)
    responses.append(response)

0it [00:00, ?it/s]

In [39]:
accuracy = 0
total_tokens = 0
total_completion_tokens = 0
total_time = 0

for r in results:
    accuracy += r["accuracy"]
    total_tokens += r["total_tokens"]
    total_completion_tokens += r["completion_tokens"]
    total_time += r['response_time']

number_of_reqs = len(results)
accuracy /= len(results)
avg_time_per_req = round(total_time / len(results), 6)
avg_token_per_req = total_tokens / len(results)
avg_completion_token_per_req = total_completion_tokens / len(results)

In [40]:
# save results
time = dt.now()

dropped_sensors = "("+"_".join([''.join([x[0].upper() for x in s.split(' ')]) for s in target_sensor])+")"

results_path = "results/single_dropped_{model}_n-{examples}-neg-{negative}_acc-{accuracy}_{ds}_{time}.json"
results_path = results_path.format(
    model=model_name,
    examples=N_EXAMPLES,
    negative=Negative_N_EXAMPLES,
    ds=dropped_sensors,
    time=time.strftime('%m.%d.%Y-%H:%M:%S'),
    accuracy=round(accuracy, 3)
)

results_file = base_path / results_path
results_file.parent.mkdir(exist_ok=True)
results_file.touch()

with results_file.open("w") as f:
    json.dump({"accuracy": accuracy,
        "number_of_reqs": number_of_reqs,
        "total_tokens": total_tokens,
        "total_completion_tokens": total_completion_tokens,
        "avg_token_per_req": avg_token_per_req,
        "avg_completion_token_per_req": avg_completion_token_per_req,
        "avg_time_per_req": avg_time_per_req,
        "examples": examples,
        "responses": results}, f, indent=4)