In [None]:
pip install groq -q

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/130.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m130.2/130.2 kB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import requests
import json
import time
import pandas as pd
from collections import deque
import datetime
import sys
import re

In [None]:
api_key = "**"

## function for extract Sections

In [None]:
def extract_sections(text, sections_to_extract):
    extracted_sections = {}
    current_section = None
    lines = text.split('\n')

    for line in lines:
        for section in sections_to_extract:
            # (# Section) and (**Section:**)
            pattern = r'^\s*((?:#\s+)|(?:\*\*))' + re.escape(section) + r'\s*[:]?(\*\*?|$)'
            if re.match(pattern, line, re.IGNORECASE):
                current_section = section
                extracted_sections[current_section] = []
                break
        if current_section and line.strip() != '':
            if not re.match(r'^\s*((?:#\s+)|(?:\*\*))', line):
                extracted_sections[current_section].append(line.strip())

    return extracted_sections

In [None]:
class RateLimiter:
    """Manages rate limits for the Groq API"""

    def __init__(self, requests_per_minute=30):
        self.requests_per_minute = requests_per_minute
        self.request_timestamps = deque(maxlen=requests_per_minute)

    def wait_if_needed(self):
        """Waits if necessary to respect the requests per minute limit"""
        current_time = time.time()

        if len(self.request_timestamps) >= self.requests_per_minute:
            oldest_request_time = self.request_timestamps[0]
            time_since_oldest = current_time - oldest_request_time

            if time_since_oldest < 60:
                wait_time = 60 - time_since_oldest + 0.1
                print(f"Rate limit reached, waiting for {wait_time:.2f} seconds...")
                time.sleep(wait_time)

        self.request_timestamps.append(time.time())

## Function of Generating Sequence Diagram



In [None]:
def generate_sequence_diagram(description_dict, rate_limiter):
    url = "https://api.groq.com/openai/v1/chat/completions"

    prompt = f"""You are an expert in UML modeling, specializing in sequence diagrams for automotive systems, particularly ADAS (Advanced Driver Assistance Systems).

    TASK:
    1. Carefully analyze the technical description below, which is divided into sections.
    2. Extract all necessary elements for a complete UML sequence diagram:
       - Participants (driver, ADAS system components, sensors, interfaces)
       - Messages exchanged and their precise chronology
       - Conditions, loops, and alternative paths
    3. Generate valid and complete PlantUML code that accurately represents the system's interaction flow, using single quotes for participant names that contain spaces or special characters.

    TECHNICAL DESCRIPTION:
    """

    for section, content in description_dict.items():
        prompt += f"\n# {section}\n"
        for line in content:
            prompt += f"{line}\n"

    # Additional instructions to handle participant naming
    prompt += f"""

    IMPORTANT NOTES:
    - Use single quotes for participant names that contain spaces or special characters. For example:
        participant "Powertrain System" as Powertrain
      Alternatively, if possible, use CamelCase or underscores to avoid spaces:
        participant PowertrainSystem as Powertrain
    - Ensure that the PlantUML code starts with @startuml and ends with @enduml.
    - Avoid using double quotes for participant names unless necessary.
    """

    payload = {
        "model": "mistral-saba-24b",
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.2,
        "max_tokens": 2048
    }

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }

    rate_limiter.wait_if_needed()

    max_retries = 3
    for attempt in range(max_retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload))
            response.raise_for_status()

            print(f"Status code: {response.status_code}")
            response_json = response.json()

            if "choices" in response_json and len(response_json["choices"]) > 0:
                uml_code = response_json["choices"][0]["message"]["content"]
                return uml_code
            else:
                print(f"Unusual response: {response_json}")
                if "error" in response_json:
                    print(f"Error: {response_json['error']}")
                return f"UML generation error: {response_json}"

        except requests.exceptions.RequestException as e:
            print(f"Attempt {attempt+1} failed: {e}")
            if attempt < max_retries - 1:
                if "429" in str(e):
                    wait_time = 60 + 2 ** attempt
                    print(f"Rate limit error. Waiting for {wait_time} seconds...")
                else:
                    wait_time = 2 ** attempt
                    print(f"Retrying in {wait_time} seconds...")
                time.sleep(wait_time)
            else:
                return f"Failed after {max_retries} attempts: {str(e)}"

## Process The Output

In [None]:
def process_csv_batch(file_path, batch_size=20, start_from=0):
    """Processes the CSV in batches without creating a progression file"""

    try:
        df = pd.read_csv(file_path)
        total_rows = len(df)

        print(f"File loaded successfully: {total_rows} entries found")
        rate_limiter = RateLimiter(requests_per_minute=25)
        start_idx = start_from
        end_idx = min(start_idx + batch_size, total_rows)
        print(f"Processing lines {start_idx} to {end_idx-1}")
        results = []
        output_file = f"uml_sequence_diagrams.csv"

        for index in range(start_idx, end_idx):
            row = df.iloc[index]
            try:
                print(f"Processing line {index}...")
                if 'Output' not in row:
                    print(f"Error: 'Output' column not found in line {index}")
                    print(f"Available columns: {row.index.tolist()}")
                    continue

                description = row['Output']
                if pd.isna(description) or not description.strip():
                    print(f"Line {index}: empty or invalid description, skipped")
                    continue

                sections = extract_sections(description, sections_to_extract=[
                    "Technical Details",
                    "Operational Steps",
                    "Interactions with Components and Systems",
                    "Necessary Conditions",
                    "User Interface and Controls",
                    "Fault Detection and Management"
                ])

                structured_description = ""
                for section, content in sections.items():
                    structured_description += f"# {section}\n"
                    for line in content:
                        structured_description += f"{line}\n"
                    structured_description += "\n"

                uml_content = generate_sequence_diagram(sections, rate_limiter)
                uml_structure = ""
                uml_code = ""

                if "```" in uml_content:
                    parts = uml_content.split("```")
                    uml_structure = parts[0].strip()
                    if len(parts) > 2:
                        uml_code = parts[1].strip()
                else:
                    uml_structure = uml_content

                result_dict = {
                    'index': index,
                    'description': structured_description.strip(),
                    'uml_code': uml_code
                }

                if 'ID' in row:
                    result_dict['ID'] = row['ID']

                results.append(result_dict)
                if index % 5 == 0 or index == end_idx - 1:
                    temp_df = pd.DataFrame(results)
                    temp_df.to_csv(output_file, index=False)
                    print(f"Progress saved: {len(results)}/{end_idx-start_idx} lines processed")

            except Exception as e:
                print(f"Error processing line {index}: {str(e)}")
                with open("errors_processing.log", "a") as log:
                    log.write(f"{datetime.datetime.now()} - Error line {index}: {str(e)}\n")
                continue

        if results:
            output_df = pd.DataFrame(results)
            output_df.to_csv(output_file, index=False)
            print(f"Results saved in '{output_file}'")
        else:
            print("No results to save")

        return end_idx, total_rows

    except Exception as e:
        print(f"Critical error processing the CSV: {str(e)}")
        return start_from, 0

In [None]:
if __name__ == "__main__":
    FILE_PATH = 'Dataset_ACC.csv'
    BATCH_SIZE = 500

    start_from = 0
    if len(sys.argv) > 1:
        try:
            start_from = int(sys.argv[1])
        except ValueError:
            print(f"Invalid argument for starting point: {sys.argv[1]}, using 0 by default.")

    current_position, total = process_csv_batch(FILE_PATH, BATCH_SIZE, start_from)

    if total > 0:
        print(f"Processing completed: {current_position}/{total} entries ({current_position/total*100:.1f}%)")

        if current_position < total:
            print(f"\nTo continue processing from the next entry, run:")
            print(f"python {sys.argv[0]} {current_position}")

Invalid argument for starting point: -f, using 0 by default.
File loaded successfully: 1000 entries found
Processing lines 0 to 499
Processing line 0...
Status code: 200
Progress saved: 1/500 lines processed
Processing line 1...
Status code: 200
Processing line 2...
Attempt 1 failed: 429 Client Error: Too Many Requests for url: https://api.groq.com/openai/v1/chat/completions
Rate limit error. Waiting for 61 seconds...
Status code: 200
Processing line 3...
Status code: 200
Processing line 4...
Status code: 200
Processing line 5...
Attempt 1 failed: 429 Client Error: Too Many Requests for url: https://api.groq.com/openai/v1/chat/completions
Rate limit error. Waiting for 61 seconds...
Status code: 200
Progress saved: 6/500 lines processed
Processing line 6...
Status code: 200
Processing line 7...
Status code: 200
Processing line 8...
Attempt 1 failed: 429 Client Error: Too Many Requests for url: https://api.groq.com/openai/v1/chat/completions
Rate limit error. Waiting for 61 seconds...
St