# Generate Full Description of Systems

In [None]:
pip install groq -q

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/129.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━[0m [32m122.9/129.4 kB[0m [31m5.0 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.4/129.4 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import os
import pandas as pd
import csv
from groq import Groq, Client
import time
from tqdm import tqdm


In [None]:
systems = [
    "Adaptive Cruise Control",
    "Lane Keeping Assist",
    "Blind Spot Monitoring"
]

def get_api_key() -> str:
    """
    Retrieves the API key from an environment variable or prompts the user to enter it.
    """
    api_key = os.environ.get("GROQ_API_KEY")
    if not api_key:
        api_key = input("Please enter your Groq API key: ")
    return api_key

def load_existing_descriptions(output_file: str) -> dict:
    """
    Loads existing descriptions from a CSV file.
    """
    existing_descriptions = {}
    if os.path.exists(output_file):
        try:
            with open(output_file, "r", encoding="utf-8", newline='') as csvfile:
                reader = csv.DictReader(csvfile)
                for row in reader:
                    existing_descriptions[row["System"]] = row["Description Complete"]
            print(f"Loaded {len(existing_descriptions)} existing descriptions.")
        except Exception as e:
            print(f"Error loading existing descriptions: {e}")
    return existing_descriptions

def generate_detailed_descriptions(
    api_key: str,
    systems: list,
    output_file: str = "adas_systems.csv",
    model: str = "llama3-70b-8192",
    temperature: float = 0.5,
    max_tokens: int = 8100,
    delay: float = 1.0,
    batch_size: int = 1
) -> dict:
    """
    Generates detailed descriptions for each ADAS system, including only the specified sections.

    Saves the results regularly and can resume processing if interrupted.

    Args:
        api_key (str): Groq API key.
        systems (list): List of ADAS systems.
        output_file (str): CSV file to save the results.
        model (str): Model to use for generation.
        temperature (float): Creativity parameter (0.0-1.0).
        max_tokens (int): Maximum number of tokens for the generated description.
        delay (float): Delay in seconds between API requests.
        batch_size (int): Number of systems to process before saving.

    Returns:
        dict: Dictionary containing the detailed descriptions.
    """
    descriptions = load_existing_descriptions(output_file)
    client = Client(api_key=api_key)
    remaining_systems = [s for s in systems if s not in descriptions]

    if not remaining_systems:
        print("All systems have already been processed!")
        return descriptions

    print(f"Generating descriptions for {len(remaining_systems)} remaining systems...")

    for i, system in enumerate(tqdm(remaining_systems)):
        system_prompt = "You are an automotive engineering expert specializing in Advanced Driver Assistance Systems (ADAS)."
        user_prompt = f"""
        Please provide a detailed and technical description of the ADAS system '{system}'. The description should include the following sections:

        # Technical Details
            - [Technical detail 1]
            - [Technical detail 2]
            - [Technical detail 3]
            - [Technical detail 4]
            - [Technical detail 5]

        # Operational Steps
            - [Operational step 1]
            - [Operational step 2]
            - [Operational step 3]
            - [Operational step 4]
            - [Operational step 5]

        # Interactions with Components and Systems
            - [Interaction 1]
            - [Interaction 2]
            - [Interaction 3]
            - [Interaction 4]
            - [Interaction 5]

        # Necessary Conditions
            - [Condition 1]
            - [Condition 2]
            - [Condition 3]
            - [Condition 4]
            - [Condition 5]

        # User Interface and Controls
            - [Interface element 1]
            - [Interface element 2]
            - [Interface element 3]
            - [Interface element 4]
            - [Interface element 5]

        # Fault Detection and Management
            - [Fault management 1]
            - [Fault management 2]
            - [Fault management 3]
            - [Fault management 4]
            - [Fault management 5]

        # Performance Metrics
            - [Performance metric 1]
            - [Performance metric 2]
            - [Performance metric 3]
            - [Performance metric 4]
            - [Performance metric 5]

        # Regulatory Compliance
            - [Regulatory aspect 1]
            - [Regulatory aspect 2]
            - [Regulatory aspect 3]
            - [Regulatory aspect 4]
            - [Regulatory aspect 5]

        # Integration with Advanced Features
            - [Integration 1]
            - [Integration 2]
            - [Integration 3]
            - [Integration 4]
            - [Integration 5]

        Please ensure the description is detailed, technical, and comprehensive. Use the following delimiters to mark the start and end of the description:
        START_DESCRIPTION
        [Your detailed description here]
        END_DESCRIPTION
        Do not include the delimiters in the actual description.
        """

        try:
            response = client.chat.completions.create(
                model=model,
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                temperature=temperature,
                max_tokens=max_tokens
            )
            description = response.choices[0].message.content
            descriptions[system] = description

            time.sleep(delay)

            if (i + 1) % batch_size == 0 or i == len(remaining_systems) - 1:
                save_descriptions_to_csv(descriptions, output_file)
                print(f"Saved {i + 1}/{len(remaining_systems)} systems processed.")

        except Exception as e:
            print(f"Error generating description for '{system}': {e}")
            descriptions[system] = f"Generation error: {e}"
            save_descriptions_to_csv(descriptions, output_file)

    return descriptions

def save_descriptions_to_csv(descriptions: dict, output_file: str) -> None:
    """
    Saves the descriptions to a CSV file.
    """
    dataset = []
    for system, description in descriptions.items():
        dataset.append({
            "System": system,
            "Description Complete": description
        })

    with open(output_file, "w", newline='', encoding="utf-8") as csvfile:
        fieldnames = ["System", "Description Complete"]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for entry in dataset:
            writer.writerow(entry)

def main():
    api_key = get_api_key()
    output_file = "adas_systems.csv"
    model_name = "llama3-70b-8192"
    temperature = 0.5
    max_tokens = 8100
    descriptions = generate_detailed_descriptions(
        api_key=api_key,
        systems=systems,
        output_file=output_file,
        model=model_name,
        temperature=temperature,
        max_tokens=max_tokens,
        batch_size=1
    )

    print(f"\nCompleted! {len(descriptions)} descriptions generated and saved to {output_file}")

if __name__ == "__main__":
    main()

Please enter your Groq API key: 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
Generating descriptions for 3 remaining systems...


 33%|███▎      | 1/3 [00:05<00:10,  5.24s/it]

Saved 1/3 systems processed.


 67%|██████▋   | 2/3 [00:10<00:05,  5.42s/it]

Saved 2/3 systems processed.


100%|██████████| 3/3 [00:15<00:00,  5.28s/it]

Saved 3/3 systems processed.

Completed! 3 descriptions generated and saved to adas_systems.csv





# Generate descriptions and missing descriptions

In [None]:
import random
import re

In [None]:
df = pd.read_csv(f'/content/{output_file}')
print(df.shape)

df.head()

(3, 2)


Unnamed: 0,System,Description Complete
0,Adaptive Cruise Control,START_DESCRIPTION\n\n**Adaptive Cruise Control...
1,Lane Keeping Assist,START_DESCRIPTION\n\n**Lane Keeping Assist (LK...
2,Blind Spot Monitoring,START_DESCRIPTION\n\n**Blind Spot Monitoring (...


In [None]:
df['Description Complete'][0]



**Clean and load description**

In [None]:
def clean_description(row):
    description_complete = row['Description Complete']
    try:
        # Split the string to extract the description
        start_delimiter = 'START_DESCRIPTION\n\n'
        end_delimiter = '\n\nEND_DESCRIPTION'
        # Extract the description and remove the first two characters (e.g., '##')
        description = "# " + description_complete.split(start_delimiter)[1].split(end_delimiter)[0][2:]
        return description
    except IndexError:
        # If delimiters are not found, return the original string or handle as needed
        return description_complete

In [None]:
# Apply the cleaning function to each row
df['Description Complete'] = df.apply(clean_description, axis=1)
df['Description Complete'][1]



In [None]:
def process_description(description):
    # Split the description into lines
    lines = description.split('\n')

    # Remove the first line
    if lines:
        lines = lines[1:]

    # Process each line to replace section titles with Markdown headers
    new_lines = []
    for line in lines:
        if line.startswith('**') and line.endswith('**'):
            # Remove the leading and trailing '**'
            header = line[2:-2].strip()
            # Replace spaces with hyphens and convert to lowercase for anchor
            anchor = '-'.join(header.lower().split())
            # Create a Markdown header with anchor
            new_line = f'# {header}\n'
            new_lines.append(new_line)
        else:
            new_lines.append(line)

    # Join the lines back into a single string
    return '\n'.join(new_lines)

# Apply the processing function to the 'Description Complete' column
df['Description Complete'] = df['Description Complete'].apply(process_description)

# Display the updated DataFrame
print(df['Description Complete'][1])


# Technical Details


1. **Camera System**: The LKA system uses a monocular camera mounted on the windshield, typically behind the rearview mirror, to capture images of the road ahead. The camera is equipped with a wide-angle lens and a high-dynamic-range (HDR) image sensor to capture clear images in various lighting conditions.
2. **Image Processing Unit (IPU)**: The camera sends the captured images to the IPU, which is a dedicated computer that processes the images in real-time to detect the lane markings, road edges, and other relevant features.
3. **Lane Detection Algorithm**: The IPU runs a sophisticated lane detection algorithm that analyzes the images to detect the lane markings, including solid and dashed lines, and determines the vehicle's position within the lane.
4. **Electric Power Steering (EPS) System**: The LKA system interfaces with the EPS system to provide gentle steering corrections to the driver. The EPS system is typically equipped with a motor, gearbox, and senso

**Remove random rows from each section**

In [None]:
def remove_numbering(line):
    """
    Removes the numbering from a line if it starts with a number followed by a dot and space.
    For example, "1. Camera System: ..." becomes "Camera System: ...".
    """
    return re.sub(r'^\d+\.\s+', '', line)

In [None]:
def generate_samples(system, description, num_samples=1000):
    # Split the description into lines
    lines = description.split('\n')

    # Identify sections
    sections = []
    current_section = None
    for line in lines:
        if line.startswith('# '):
            current_section = line
            sections.append({'title': current_section, 'lines': []})
        elif current_section:
            sections[-1]['lines'].append(line)

    # Function to remove random rows from a section
    def remove_random_rows(section):
        num_lines = len(section['lines'])
        if num_lines <= 1:
            return section  # Cannot remove any rows

        # Randomly decide how many rows to remove (between 1 and min(4, num_lines - 1))
        num_remove = random.randint(1, min(4, num_lines - 1))
        indices_to_remove = random.sample(range(num_lines), num_remove)
        new_lines = [line for idx, line in enumerate(section['lines']) if idx not in indices_to_remove]
        # Remove numbering from each line
        new_lines = [remove_numbering(line) for line in new_lines]
        missing = [section['lines'][idx] for idx in indices_to_remove]
        return {
            'title': section['title'],
            'lines': new_lines,
            'missing': missing
        }

    # Generate samples
    samples = []
    for _ in range(num_samples):
        processed_sections = []
        missing_tracking = []
        for section in sections:
            processed_section = remove_random_rows(section)
            processed_sections.append(processed_section)
            if processed_section['missing']:
                for missing_line in processed_section['missing']:
                    missing_tracking.append({
                        'section': processed_section['title'],
                        'missing_line': missing_line
                    })
        # Reconstruct the description
        new_description = '\n'.join([section['title'] + '\n' + '\n'.join(section['lines']) for section in processed_sections])
        samples.append({
            'System': system,
            'Description Modified': new_description,
            'Missing Rows': missing_tracking
        })

    return samples

**Generate the dataset**

In [None]:
all_samples = []

for idx, row in df.iterrows():
    samples =
     (row['System'], row['Description Complete'], num_samples=1000)
    all_samples.extend(samples)

# Create a new DataFrame for the samples
samples_df = pd.DataFrame(all_samples)

# Display the first few samples
print("Number of samples generated:", len(samples_df))
print("\nSample 1:\n")
print(samples_df['Description Modified'][0])
print("\nMissing Rows for Sample 1:\n")
for missing in samples_df['Missing Rows'][0]:
    print(f"Section: {missing['section']}\nMissing Line: {missing['missing_line']}\n")

Number of samples generated: 3000

Sample 1:

# Technical Details
**Radar and Camera Sensors**: The ACC system employs a combination of radar and camera sensors to detect and track the distance, speed, and trajectory of the preceding vehicle. The radar sensor operates at a frequency of 77 GHz and has a range of up to 200 meters, while the camera sensor is a high-resolution, monocular camera with a 30-degree field of view.
**Powertrain and Braking System Interface**: The ACC system interfaces with the vehicle's powertrain and braking systems through a dedicated CAN bus. The system can adjust engine torque, transmission gear shifts, and brake pressure to maintain a safe distance from the preceding vehicle.
**Software Architecture**: The ACC system's software architecture is based on a modular, layered design, with separate modules for sensor processing, tracking, and control. The system uses a model-based design approach, with algorithms developed using MATLAB and Simulink.

# Operationa

In [None]:
samples_df

Unnamed: 0,System,Description Modified,Missing Rows
0,Adaptive Cruise Control,# Technical Details\n**Radar and Camera Sensor...,"[{'section': '# Technical Details', 'missing_l..."
1,Adaptive Cruise Control,# Technical Details\n\n\n**Electronic Control ...,"[{'section': '# Technical Details', 'missing_l..."
2,Adaptive Cruise Control,# Technical Details\n\n\n**Radar and Camera Se...,"[{'section': '# Technical Details', 'missing_l..."
3,Adaptive Cruise Control,# Technical Details\n\n\n**Radar and Camera Se...,"[{'section': '# Technical Details', 'missing_l..."
4,Adaptive Cruise Control,# Technical Details\n\n**Radar and Camera Sens...,"[{'section': '# Technical Details', 'missing_l..."
...,...,...,...
2995,Blind Spot Monitoring,# Technical Details\n\n\n**Sensor Suite**: The...,"[{'section': '# Technical Details', 'missing_l..."
2996,Blind Spot Monitoring,# Technical Details\n\n**Sensor Suite**: The B...,"[{'section': '# Technical Details', 'missing_l..."
2997,Blind Spot Monitoring,# Technical Details\n\n\n\t* Radar sensors (24...,"[{'section': '# Technical Details', 'missing_l..."
2998,Blind Spot Monitoring,# Technical Details\n\n**Sensor Suite**: The B...,"[{'section': '# Technical Details', 'missing_l..."


In [None]:
samples_df.to_csv('adas_systems_step-01.csv', index=False)