In [1]:
!pip install groq -q

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/129.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━[0m [32m122.9/129.6 kB[0m [31m4.5 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.6/129.6 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import json
import os
import csv
import time
from typing import Dict, Any, List, Optional
from groq import Groq, Client
from tqdm import tqdm
import groq
import pandas as pd
import random
import re

In [3]:
def get_api_key() -> str:
    """Get the API key from environment variable or ask the user."""
    api_key = os.environ.get("GROQ_API_KEY")
    if not api_key:
        api_key = input("Please enter your Groq API key: ")
    return api_key

def load_csv_file(file_path: str) -> pd.DataFrame:
    """Load the CSV file containing technical analyses."""
    try:
        df = pd.read_csv(file_path, encoding="utf-8")
        print(f"File {file_path} loaded successfully. {len(df)} rows found.")
        return df
    except Exception as e:
        print(f"Error loading CSV file: {e}")
        return pd.DataFrame()

def get_style_and_length_configurations():
    """Define all style and length combinations."""
    base_styles = [
        {
            "style": "SysML/INCOSE",
            "perspective": "systems engineering",
            "tone": "formal and precise"
        },
        {
            "style": "descriptive overview",
            "perspective": "informational content",
            "tone": "neutral and factual"
        },
        {
            "style": "comprehensive analysis",
            "perspective": "technical literature",
            "tone": "detailed and objective"
        },
        {
            "style": "instructional/how-to",
            "perspective": "user guidance",
            "tone": "clear and direct"
        }
    ]

    length_configs = [
        {
            "length": "short",
            "description": "1 concise phrase",
            "max_tokens": 150,
            "instruction": "Keep the description very brief, limited to 1 or 2 clear and concise sentences."
        },
        {
            "length": "medium",
            "description": "3-4 sentences",
            "max_tokens": 450,
            "instruction": "Provide a description consisting of 3 to 4 sentences, covering key aspects clearly."
        },
        {
            "length": "long",
            "description": "5-6 sentences",
            "max_tokens": 800,
            "instruction": "Provide a detailed description consisting of 5 to 6 sentences, covering all key aspects."
        }
    ]

    style_length_combinations = []
    for style in base_styles:
        for length in length_configs:
            combination = {
                "style": style["style"],
                "perspective": style["perspective"],
                "tone": style["tone"],
                "length": length["length"],
                "length_description": length["description"],
                "max_tokens": length["max_tokens"],
                "length_instruction": length["instruction"]
            }
            style_length_combinations.append(combination)

    return style_length_combinations

In [4]:
def parse_technical_analysis(technical_analysis: str) -> Dict[str, str]:
    """Parses the technical analysis into sections."""
    sections = {}
    pattern = re.compile(r"#\s*(.*?)\s*:")
    matches = pattern.findall(technical_analysis)

    for i, match in enumerate(matches):
        start = technical_analysis.find(f"# {match}:")
        if i == len(matches) - 1:
            end = len(technical_analysis)
        else:
            end = technical_analysis.find(f"# {matches[i + 1]}:")
        content = technical_analysis[start + len(f"# {match}: "):end].strip()
        sections[match] = content

    return sections

def check_description_quality(generated_description: str, technical_analysis_sections: Dict[str, str]) -> Dict[str, bool]:
    """Checks if the generated description covers all expected sections."""
    coverage = {}
    description_lower = generated_description.lower()

    # Define section-specific keywords and concepts
    section_keywords = {
        "Technical Details": [
            "radar", "camera", "sensor", "77 ghz", "200 meters", "monocular", "field of view",
            "powertrain", "braking system", "can bus", "engine torque", "transmission",
            "brake pressure", "software architecture", "modular", "matlab", "simulink",
            "algorithm", "processing", "tracking", "control"
        ],
        "Operational Steps": [
            "initialization", "initialized", "started", "acc mode", "data collection",
            "sensor data", "target vehicle detection", "detects", "tracks", "filter",
            "false targets", "accurate tracking", "operational", "steps", "process"
        ],
        "Interactions with Components and Systems": [
            "braking system", "brake pressure", "suspension system", "damping rates",
            "vehicle stability", "steering system", "steering corrections", "lane centering",
            "infotainment system", "visual feedback", "auditory feedback", "interfaces",
            "interaction", "components", "systems"
        ],
        "Necessary Conditions": [
            "traffic conditions", "traffic density", "moderate", "low traffic",
            "driver attention", "attentive", "alert", "hands on wheel", "eyes on road",
            "conditions", "requirements", "necessary"
        ],
        "User Interface and Controls": [
            "acc button", "steering wheel", "activated", "deactivated", "distance setting",
            "desired distance", "target vehicle", "visual feedback", "display",
            "instrument cluster", "auditory feedback", "warning tone", "voice prompt",
            "interface", "controls", "button", "feedback"
        ],
        "Fault Detection and Management": [
            "sensor fault", "fault detection", "radar sensor", "camera sensor",
            "ecu fault", "software failures", "hardware failures", "powertrain fault",
            "engine failures", "transmission failures", "braking system fault",
            "brake pad wear", "abs malfunctions", "redundancy", "backup",
            "fault", "detection", "management", "diagnosis"
        ],
        "Performance Metrics": [
            "speed error", "vehicle speed", "target speed", "system availability",
            "percentage", "operational", "available", "performance", "metrics",
            "measurement", "error", "availability"
        ],
        "Regulatory Compliance": [
            "unece regulations", "fmvss regulations", "advanced driver assistance",
            "iso 26262", "functional safety", "cybersecurity standards",
            "automotive systems", "type approval", "regulatory authorities",
            "compliance", "standards", "regulations", "safety"
        ],
        "Integration with Advanced Features": [
            "lane centering", "highway pilot", "comprehensive", "driver assistance",
            "package", "integration", "advanced features", "integrates",
            "advanced", "features", "pilot", "centering"
        ]
    }

    for section, keywords in section_keywords.items():
        section_content = technical_analysis_sections.get(section, "")

        if section_content.strip() == "":
            coverage[section] = any(keyword in description_lower for keyword in keywords)
        else:
            keyword_matches = sum(1 for keyword in keywords if keyword in description_lower)
            section_name_variants = [
                section.lower(),
                section.lower().replace(" ", ""),
                section.lower().replace(" ", "_"),
            ]

            name_match = any(variant in description_lower for variant in section_name_variants)

            section_lower = section_content.lower()
            important_terms = []
            quoted_terms = re.findall(r'\\(.?)\\*', section_content)
            for term in quoted_terms:
                important_terms.append(term.lower())

            numbers_specs = re.findall(r'\d+\s*(?:ghz|meters|degree|%)', section_lower)
            important_terms.extend(numbers_specs)

            content_match = any(term in description_lower for term in important_terms)

            coverage[section] = (
                keyword_matches >= 2 or
                name_match or
                content_match or
                keyword_matches >= 1 and len([k for k in keywords if k in description_lower and len(k) > 4]) >= 1
            )

    return coverage

def generate_single_user_description(client: Groq, technical_analysis: str,
                                   functionality_name: str,
                                   model: str = "llama3-70b-8192",
                                   temperature: float = 0.7) -> Dict[str, Any]:
    """Generate a single user description from technical analysis with a random style and length."""
    style_length_combinations = get_style_and_length_configurations()

    config = random.choice(style_length_combinations)
    system_prompt = f"You are tasked with creating user-friendly descriptions of {functionality_name} systems from technical analyses."
    user_prompt = f"""\
    Based on the following comprehensive technical analysis of {functionality_name}, create a user-friendly description using the specified parameters:

    TECHNICAL ANALYSIS:
    {technical_analysis}

    DESCRIPTION PARAMETERS:
    STYLE: {config["style"]}
    PERSPECTIVE: {config["perspective"]}
    TONE: {config["tone"]}
    LENGTH: {config["length"]} ({config["length_description"]})"""

    if config["style"].lower() == "sysml/incose":
        user_prompt += f"""\
        Your description should adhere to the following guidelines:
        - Use formal and unambiguous language.
        - Structure the requirements using "shall" statements to denote mandatory system behaviors.
        - Focus on the system's functionalities, behaviors, and interactions.
        - Avoid implementation details unless they are necessary for understanding the requirement.
        - Ensure that each requirement is clear, concise, and verifiable.
        - Organize the requirements logically, separating different aspects of the system.

        Example of a SysML/INCOSE requirement:
        The ACC system shall allow the driver to deactivate the system by pressing the OFF button on the steering wheel. Upon deactivation, the system shall maintain the vehicle's current speed and transition control to the driver instantaneously."""

    elif config["style"].lower() == "descriptive overview":
        user_prompt += f"""\
        Your description should adhere to the following guidelines:
        - Provide a clear and concise overview of the system.
        - Focus on the main features and functionalities.
        - Use neutral and factual language.
        - Avoid technical jargon unless necessary.
        - Ensure the description is accessible to a general audience.
        """
    elif config["style"].lower() == "comprehensive analysis":
        user_prompt += f"""\
        Your description should adhere to the following guidelines:
        - Offer an in-depth analysis of the system.
        - Cover all key aspects, including technical details, operational principles, and integration with other systems.
        - Use detailed and objective language.
        - Include relevant data and examples where possible.
        - Ensure the analysis is thorough and well-structured.
        """
    elif config["style"].lower() == "instructional/how-to":
        user_prompt += f"""\
        Your description should adhere to the following guidelines:
        - Provide step-by-step instructions on how to use the system.
        - Focus on user actions and system responses.
        - Use clear and direct language.
        - Include any necessary warnings or cautions.
        - Ensure the instructions are easy to follow and understand.
        """
    else:
        user_prompt += f"""\
        Create a {config["length"]} user description that translates the technical information into accessible language while maintaining the specified style and tone.

        IMPORTANT GUIDELINES:
        - Write in third person or neutral descriptive language.
        - Avoid personal pronouns (you, your, we, us, I) unless necessary for the style.
        - Use objective, factual descriptions.
        - Focus on what the system does, how it works, and its characteristics.
        - Write as if describing the technology for an encyclopedia or technical reference.
        - Make the technical information accessible to users without deep technical background.
        - Adhere to the specified length requirements.
        """

    user_prompt += "\n\nRETURN ONLY THE USER DESCRIPTION TEXT WITHOUT ANY ADDITIONAL FORMATTING OR INTRODUCTORY SENTENCES."

    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ],
            temperature=temperature,
            max_tokens=config["max_tokens"]
        )

        description_text = response.choices[0].message.content.strip()

        user_description = {
            "case_id": f"USER_DESC_{int(time.time())}_{random.randint(1000, 9999)}",
            "description_type": "user description",
            "style": config["style"],
            "perspective": config["perspective"],
            "tone": config["tone"],
            "length": config["length"],
            "original_technical_analysis": technical_analysis,
            "functionality_name": functionality_name,
            "user_description": description_text
        }

        technical_analysis_sections = parse_technical_analysis(technical_analysis)
        quality_check = check_description_quality(description_text, technical_analysis_sections)
        user_description["quality_check"] = quality_check

        time.sleep(0.2)
        return user_description

    except Exception as e:
        print(f"Error generating user description: {e}")
        return None

In [10]:
def save_user_descriptions(functionality_name: str, all_descriptions: List[Dict], output_file: str) -> None:
    """Save user descriptions to CSV file."""
    rows = []

    for desc in all_descriptions:
        if desc is not None:
            rows.append({
                "Functionality": functionality_name,
                "User_Description": desc["user_description"]
            })

    try:
        with open(output_file, "w", newline='', encoding="utf-8") as csvfile:
            fieldnames = ["Functionality", "User_Description"]
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            for row in rows:
                writer.writerow(row)

        print(f"User descriptions saved to {output_file}")

    except Exception as e:
        print(f"Error saving CSV file: {e}")

def save_quality_checks(all_quality_checks: List[Dict], output_file: str) -> None:
    """Saves the quality check results to a CSV file with numeration."""
    rows = []
    for idx, check in enumerate(all_quality_checks, 1):
        row = {"ID": idx}
        row.update({section: covered for section, covered in check.items()})
        rows.append(row)

    try:
        with open(output_file, "w", newline='', encoding="utf-8") as csvfile:
            fieldnames = ["ID"] + [
                "Technical Details",
                "Operational Steps",
                "Interactions with Components and Systems",
                "Necessary Conditions",
                "User Interface and Controls",
                "Fault Detection and Management",
                "Performance Metrics",
                "Regulatory Compliance",
                "Integration with Advanced Features"
            ]
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            for row in rows:
                writer.writerow(row)

        print(f"Quality checks saved to {output_file}")

    except Exception as e:
        print(f"Error saving quality checks: {e}")

def main():
    print(f"=== ACC User Description Generator from Technical Analysis ===")
    print("Configuration: 1 random user description per technical analysis")
    print("="*80)

    api_key = get_api_key()
    client = Groq(api_key=api_key)
    model_name = "llama3-70b-8192"
    temperature_generation = 0.7
    csv_file_path = "/content/adas_systems_step-01.csv"
    df = load_csv_file(csv_file_path)
    if df.empty:
        print("No data found in CSV file. Exiting.")
        return

    if "System" not in df.columns:
        print("Error: 'System' column not found in CSV file.")
        print(f"Available columns: {list(df.columns)}")
        return

    if "Description Modified" not in df.columns:
        print("Error: 'Description Modified' column not found in CSV file.")
        print(f"Available columns: {list(df.columns)}")
        return

    start_row = 2747
    end_row = 2750

    df_filtered = df.iloc[start_row:end_row]

    if df_filtered.empty:
        print("No valid technical analyses found. Exiting.")
        return

    print(f"Found {len(df_filtered)} rows with technical analyses to process.")

    style_length_combinations = get_style_and_length_configurations()
    print(f"\nAvailable Style and Length Combinations ({len(style_length_combinations)} total):")
    for i, config in enumerate(style_length_combinations, 1):
        print(f"  {i}. {config['style']} - {config['length']}")

    print(f"\nTotal user descriptions to generate: {len(df_filtered)}")
    print(f"(1 randomly selected variation per technical analysis)")

    proceed = input("\nProceed with generation? (y/n): ").lower()
    if proceed != 'y':
        print("Generation cancelled.")
        return

    all_user_descriptions = []
    all_quality_checks = []

    for index, row in tqdm(df_filtered.iterrows(), total=len(df_filtered), desc="Processing technical analyses"):
        technical_analysis = row["Description Modified"]
        functionality_name = row["System"]

        print(f"\nProcessing row {index + 1}/{len(df_filtered)}")
        print(f"Technical Analysis preview: {technical_analysis[:100]}...")

        technical_analysis_sections = parse_technical_analysis(technical_analysis)

        user_description = generate_single_user_description(
            client, technical_analysis, functionality_name,
            model=model_name, temperature=temperature_generation
        )

        if user_description:
            all_user_descriptions.append(user_description)
            print(f"Generated 1 user description (Style: {user_description['style']}, Length: {user_description['length']})")

            quality_check = check_description_quality(user_description["user_description"], technical_analysis_sections)
            all_quality_checks.append(quality_check)

            print("Quality Check Summary:")
            for section, covered in quality_check.items():
                status = "Covered" if covered else "Not Covered"
                print(f"  - {section}: {status}")
        else:
            print("Failed to generate user description for this row")
            all_quality_checks.append({section: False for section in technical_analysis_sections.keys()})

    print(f"\nSuccessfully generated {len(all_user_descriptions)} user descriptions")

    output_file = f"user_descriptions_single_per_analysis.csv"
    save_user_descriptions(functionality_name, all_user_descriptions, output_file)

    quality_output_file = "quality_checks.csv"
    save_quality_checks(all_quality_checks, quality_output_file)

    style_summary = {}
    length_summary = {}
    quality_summary = {}

    for desc in all_user_descriptions:
        style = desc.get("style", "Unknown")
        length = desc.get("length", "Unknown")
        quality = desc.get("quality_check", {})

        style_summary[style] = style_summary.get(style, 0) + 1
        length_summary[length] = length_summary.get(length, 0) + 1

        for section, covered in quality.items():
            quality_summary[section] = quality_summary.get(section, 0) + (1 if covered else 0)

    print(f"\n" + "="*80)
    print(f"GENERATION SUMMARY")
    print(f"="*80)
    print(f"Technical analyses processed: {len(df_filtered)}")
    "User descriptions generated: {len(all_user_descriptions)}"
    print(f"Generation ratio: 1:1 (1 description per technical analysis)")

    print(f"\nRandomly selected style distribution:")
    for style, count in style_summary.items():
        percentage = (count / len(all_user_descriptions)) * 100
        print(f"  - {style}: {count} descriptions ({percentage:.1f}%)")

    print(f"\nRandomly selected length distribution:")
    for length, count in length_summary.items():
        percentage = (count / len(all_user_descriptions)) * 100
        print(f"  - {length}: {count} descriptions ({percentage:.1f}%)")

    print(f"\nQuality Check Summary:")
    for section, count in quality_summary.items():
        percentage = (count / len(all_user_descriptions)) * 100
        print(f"  - {section}: {count} covered ({percentage:.1f}%)")

    print(f"\nInput file: {csv_file_path}")
    print(f"Output file: {output_file}")
    print(f"Quality check file: {quality_output_file}")
    print(f"="*80)

if __name__ == "__main__":
    main()

=== ACC User Description Generator from Technical Analysis ===
Configuration: 1 random user description per technical analysis
Please enter your Groq API key: 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
File /content/adas_systems_step-01.csv loaded successfully. 3000 rows found.
Found 3 rows with technical analyses to process.

Available Style and Length Combinations (12 total):
  1. SysML/INCOSE - short
  2. SysML/INCOSE - medium
  3. SysML/INCOSE - long
  4. descriptive overview - short
  5. descriptive overview - medium
  6. descriptive overview - long
  7. comprehensive analysis - short
  8. comprehensive analysis - medium
  9. comprehensive analysis - long
  10. instructional/how-to - short
  11. instructional/how-to - medium
  12. instructional/how-to - long

Total user descriptions to generate: 3
(1 randomly selected variation per technical analysis)

Proceed with generation? (y/n): y


Processing technical analyses:   0%|          | 0/3 [00:00<?, ?it/s]


Processing row 2748/3
Technical Analysis preview: # Technical Details


**Sensor Suite**: The BSM system utilizes a combination of sensors, including:...


Processing technical analyses:  33%|███▎      | 1/3 [00:01<00:02,  1.17s/it]

Generated 1 user description (Style: instructional/how-to, Length: short)
Quality Check Summary:
  - Technical Details: Covered
  - Operational Steps: Covered
  - Interactions with Components and Systems: Not Covered
  - Necessary Conditions: Covered
  - User Interface and Controls: Covered
  - Fault Detection and Management: Covered
  - Performance Metrics: Not Covered
  - Regulatory Compliance: Not Covered
  - Integration with Advanced Features: Not Covered

Processing row 2749/3
Technical Analysis preview: # Technical Details


**Sensor Suite**: The BSM system utilizes a combination of sensors, including:...


Processing technical analyses:  67%|██████▋   | 2/3 [00:02<00:00,  1.01it/s]

Generated 1 user description (Style: descriptive overview, Length: medium)
Quality Check Summary:
  - Technical Details: Covered
  - Operational Steps: Not Covered
  - Interactions with Components and Systems: Covered
  - Necessary Conditions: Covered
  - User Interface and Controls: Not Covered
  - Fault Detection and Management: Covered
  - Performance Metrics: Not Covered
  - Regulatory Compliance: Covered
  - Integration with Advanced Features: Covered

Processing row 2750/3
Technical Analysis preview: # Technical Details


**Sensor Suite**: The BSM system utilizes a combination of sensors, including:...


Processing technical analyses: 100%|██████████| 3/3 [00:02<00:00,  1.03it/s]

Generated 1 user description (Style: comprehensive analysis, Length: medium)
Quality Check Summary:
  - Technical Details: Covered
  - Operational Steps: Covered
  - Interactions with Components and Systems: Not Covered
  - Necessary Conditions: Not Covered
  - User Interface and Controls: Not Covered
  - Fault Detection and Management: Covered
  - Performance Metrics: Covered
  - Regulatory Compliance: Covered
  - Integration with Advanced Features: Covered

Successfully generated 3 user descriptions
User descriptions saved to user_descriptions_single_per_analysis.csv
Quality checks saved to quality_checks.csv

GENERATION SUMMARY
Technical analyses processed: 3
Generation ratio: 1:1 (1 description per technical analysis)

Randomly selected style distribution:
  - instructional/how-to: 1 descriptions (33.3%)
  - descriptive overview: 1 descriptions (33.3%)
  - comprehensive analysis: 1 descriptions (33.3%)

Randomly selected length distribution:
  - short: 1 descriptions (33.3%)
  - me


