In [None]:
import sys
import os

sys.path.append(os.path.abspath('..'))
from large_prompts.master_prompt import master_prompt

In [None]:
def prepare_AI_Act_prompt(master_prompt, single_use_case):

    additional_format = f"""\nDo not give any intros or outros. Respond in plain text string only (no unusual arrays), without any formatting f.e. no bold or ## headings or numbered headings. The following is the AI Use case of a startup you have to classify:\n\n{single_use_case}"""

    # print(file_content)  # Output all content
    return master_prompt + additional_format

In [None]:
single_use_case = """
AI Use Case: Industrial Process Automation,
Use Case Description: 3dvisionlabs employs 3D computer vision and artificial intelligence to automate tasks and enhance efficiency in industrial and logistics sectors. Their HemiStereo technology provides real-time depth perception, enabling precise analytics in complex environments. The intended purpose is to streamline industrial processes by automating tasks and reducing inefficiencies. It is deployed in the industry and logistics sectors, including manufacturing and supply chain operations. The system operates with a high level of autonomy to monitor and analyze processes without human intervention. It enhances worker productivity and safety by automating repetitive tasks and providing real-time insights. The system uses 3D depth data and visual information captured by HemiStereo cameras. Industrial operators and managers are the target users who aim to optimize processes. The AI system can adapt to various industrial environments and learn from data to improve performance over time. While the safety-critical nature of the system is moderate, accurate operation is essential to prevent potential hazards in industrial settings.
"""

In [None]:
# import ollama

# client = ollama.Client()

# model = "llama3.2" # context length = 131072

# input_prompt = prepare_AI_Act_prompt(master_prompt, single_use_case)
# options={
#     'num_ctx': 100,
#     'num_predict': 4096
# }

# # Send query to the model
# response = client.generate(model=model, prompt="Hi", options=options)

# print(response.response)

In [None]:
import ollama

client = ollama.Client()

# Model is fine-tuned for some other task
model = "gemma3:27b" # context length = 131072

input_prompt = prepare_AI_Act_prompt(master_prompt, single_use_case)
options={
    'num_ctx': 8192, # Context length. Input tokens + output tokens (generated so far) (total must be less than this value)
    'num_predict': 4096 # Infinite output tokens
}

# Send query to the model
response = client.generate(model=model, prompt=input_prompt, options=options)

print(response.response)

In [None]:
import ollama

client = ollama.Client()

model = "qwq:32b"

input_prompt = prepare_AI_Act_prompt(master_prompt, single_use_case)
options={
    'num_ctx': 8192, # Context length. Input tokens + output tokens (generated so far) (total must be less than this value)
    'num_predict': 4096 # Infinite output tokens
}

# Send query to the model
response = client.generate(model=model, prompt=input_prompt, options=options)

print(response.response)

In [None]:
import ollama

client = ollama.Client()

model = "deepseek-r1:32b"
input_prompt = prepare_AI_Act_prompt(master_prompt, single_use_case)
options={
    'num_ctx': 8192,
    'num_predict': 4096
}

# Send query to the model
response = client.generate(model=model, prompt=input_prompt, options=options)

print(response.response)

In [None]:
import ollama

client = ollama.Client()

model = "qwen2.5:32b"
input_prompt = prepare_AI_Act_prompt(master_prompt, single_use_case)
options={
    'num_ctx': 8192,
    'num_predict': 4096
}

# Send query to the model
response = client.generate(model=model, prompt=input_prompt, options=options)

print(response.response)

In [None]:
import ollama

client = ollama.Client()

model = "command-r:35b"
input_prompt = prepare_AI_Act_prompt(master_prompt, single_use_case)
options={
    'num_ctx': 8192,
    'num_predict': 4096
}

# Send query to the model
response = client.generate(model=model, prompt=input_prompt, options=options)

print(response.response)

In [None]:
import ollama

client = ollama.Client()

model = "mistral-small:24b"
input_prompt = prepare_AI_Act_prompt(master_prompt, single_use_case)
options={
    'num_ctx': 8192,
    'num_predict': 4096
}

# Send query to the model
response = client.generate(model=model, prompt=input_prompt, options=options)

print(response.response)

In [None]:
# Suspicious model

import ollama

client = ollama.Client()

model = "yi:34b"
input_prompt = prepare_AI_Act_prompt(master_prompt, single_use_case)
options={
    'num_ctx': 8192,
    'num_predict': 4096
}

# Send query to the model
response = client.generate(model=model, prompt=input_prompt, options=options)

print(response.response)

In [None]:
import ollama

client = ollama.Client()

model = "exaone-deep:32b"
input_prompt = prepare_AI_Act_prompt(master_prompt, single_use_case)
options={
    'num_ctx': 8192,
    'num_predict': 4096
}

# Send query to the model
response = client.generate(model=model, prompt=input_prompt, options=options)

print(response.response)

In [None]:
import ollama
import itertools

def ollama_generate_classification(model, single_use_case):
    client = ollama.Client()

    input_prompt = prepare_AI_Act_prompt(master_prompt, single_use_case)
    options={
        'num_ctx': 8192,
        'num_predict': 4096
    }

    # Send query to the model
    response = client.generate(model=model, prompt=input_prompt, options=options)

    return response.response

target_tags = [
    "AI Use Case",
    "Use Case Description",
    "Risk Classification",
    "Reason",
    "Requires Additional Information",
    "What additional Information"
]

def generate_tag_variants(tag):
    """Generate all variants by replacing spaces with '', '-' or keeping space."""
    parts = tag.split(" ")
    combinations = list(itertools.product(["", "-", " "], repeat=len(parts)-1))
    variants = set()

    for combo in combinations:
        variant = parts[0]
        for sep, part in zip(combo, parts[1:]):
            variant += sep + part
        variants.add(variant)
    return variants

# Map correct tags to all possible variants
tag_variants = {tag: generate_tag_variants(tag) for tag in target_tags}

In [None]:
import json
import re

with open('use_cases.json', 'r') as file:
    use_cases_data = json.load(file)

model_list = ['gemma3:27b', 'qwen2.5:32b']

for index, company in enumerate(use_cases_data['companies']):
    # print(f"Company: {company['company_name']}")
    for use_case in company["use_cases"]:
        single_use_case = f"""AI Use Case: {use_case["use_case_name"]}\nUse Case Description: {use_case["use_case_description"]}"""        
        # print(single_use_case + "\n")
    
        # Run through all models
        classification_string = ""
        for model in model_list:
            classification = ollama_generate_classification(model, single_use_case)
            # classification = ""
            classification_string += f"{classification}\n\n########END OF USE CASE########\n\n"
            print(classification)

#         classification_string = """
# AI Use Case: Obstacle Detection
# Use Case Description: The obstacle detection system is designed to enhance train safety by identifying potential hazards on or near the tracks. It is deployed in the rail transportation sector and is specifically used in both passenger and freight operations. This system has a medium level of autonomy, assisting drivers rather than replacing them. It significantly improves safety by reducing the risk of collisions and enabling timely responses to obstacles. It uses data from camera and LiDAR sensors to detect objects, and the users are primarily train operators and traffic controllers. The system likely incorporates adaptive learning to improve detection accuracy over time and may operate under safety-critical conditions, especially in environments with high traffic or poor visibility.
# Risk Classification: High-risk AI system under Annex I
# Reason: The AI system is a safety component of a product (trains) covered by Union harmonization laws. Specifically, the Rail System Interoperability Directive (2016/797) applies to rail transportation systems. As a safety component directly impacting train safety and potentially preventing collisions, it falls under the definition of a safety component. Because the system is used for safety-critical operations and is a safety component of a product covered under Union Harmonization laws, it requires third-party conformity assessment under the Rail System Interoperability Directive, making it a High-risk AI system under Annex I. There are no transparency obligations triggered by this use case.
# Requires Additional Information: No
# What additional Information: asdasdxzczxc- zxczxc

# ########END OF USE CASE########

# <think>

# Okay, let me try to work through this step by step. The AI use case is Obstacle Detection for trains. The description says it enhances safety by identifying hazards on or near tracks, used in rail transport for both passenger and freight. It has medium autonomy, assisting drivers but not replacing them. Uses camera and LiDAR data, adaptive learning, and operates under safety-critical conditions.

# First, check if it's a prohibited system under any of the articles A to H. Let's go through each:

# A) Harmful Manipulation and Deception: Doesn't involve manipulation or deception. It's about detecting obstacles, so no here.

# B) Exploitation of Vulnerabilities: Not targeting any vulnerable groups like age or disability. The system is for physical safety, so no.

# C) Social Scoring: No, it's not evaluating people's behavior or traits for social scoring.

# D) Crime Risk Assessment: Not assessing individuals' crime risk. It's about obstacles, not criminal behavior.

# E) Untargeted Facial Scraping: Not related to facial recognition, so no.

# F) Emotion Recognition: Not inferring emotions, so no.

# G) Biometric Categorization: Doesn't categorize individuals based on biometric data like race or religion.

# H) Real-time Remote Biometric Identification: Not using biometric data for identification in public spaces by law enforcement. It's about obstacles, so no.

# So, not prohibited. Next, check if it's a high-risk under Annex I. The criteria are that it's a product or safety component covered by Union harmonization laws and requires third-party assessment. 

# The system is part of rail transportation. Looking at the Union harmonization laws in Annex I sections. Section B includes Rail System Interoperability Directive (2016/797). Since the system is a safety component of a rail system (as it's for obstacle detection in trains), it's part of the product covered under that directive. Safety components are those that fulfill a safety function or whose failure would endanger safety. Since obstacle detection is a safety function, it qualifies. Now, does the directive require third-party conformity assessment? The user instruction says if it's under Annex I and the product requires third-party assessment under that law. Assuming rail systems are covered (since they are in the list), then this would be Annex I. 

# Next, check if it's high-risk under Annex III. The categories in Annex III include critical infrastructure (section b). The system is part of rail traffic management, which is listed under critical infrastructure. So it could be here too. But since it's already under Annex I, which takes precedence? The user instructions say to check both, but the classification is separate. The user says if it's under either Annex I or III, but the question is which one applies first. Since Annex I applies if it meets those two conditions, which it does, then it's under Annex I. 

# Now, check transparency obligations. The system interacts with natural persons (train operators) but it's obvious it's an AI system (since it's part of the train's safety system, not pretending to be human). So Art 50(1) exception applies. It doesn't generate synthetic content, use emotion recognition, or biometric categorization. So no transparency obligations. 

# Therefore, it should be High-risk under Annex I. 

# Wait, but the system is a safety component of a product covered by the Rail Directive. So the first condition is met. The second condition is that the product or component requires third-party assessment under the directive. Since rail systems are subject to type approval and conformity assessments, this likely holds. Thus, High-risk under Annex I. No transparency obligations because the exceptions apply (obvious interaction, not synthetic content, etc.). So the classification is "High-risk AI system under Annex I".
# </think>

# AI Use Case: Obstacle Detection  
# Use Case Description: The obstacle detection system is designed to enhance train safety by identifying potential hazards on or near the tracks. It is deployed in the rail transportation sector and is specifically used in both passenger and freight operations. This system has a medium level of autonomy, assisting drivers rather than replacing them. It significantly improves safety by reducing the risk of collisions and enabling timely responses to obstacles. It uses data from camera and LiDAR sensors to detect objects, and the users are primarily train operators and traffic controllers. The system likely incorporates adaptive learning to improve detection accuracy over time and may operate under safety-critical conditions, especially in environments with high traffic or poor visibility.  
# Risk Classification: High-risk AI system under Annex I  
# Reason:  
# 1. **Prohibited AI System Check**: None of the prohibited categories (A–H) apply. The system does not involve manipulation, deception, exploitation of vulnerabilities, social scoring, crime risk assessment, facial scraping, emotion recognition, biometric categorization, or real-time biometric identification.  
# 2. **High-Risk Under Annex I**:  
# - The system is a **safety component** of rail transportation systems covered under the **Rail System Interoperability Directive (2016/797)** (Section B of Annex I).  
# - It fulfills a safety function (obstacle detection to prevent collisions) and its failure would endanger human safety.  
# - Rail systems are subject to third-party conformity assessments under Union harmonization laws (e.g., Directive 2016/797), satisfying both conditions for Annex I classification.  
# 3. **High-Risk Under Annex III**: Not applicable. While rail traffic management could fall under critical infrastructure (Annex III, Section b), the system already qualifies under Annex I.  
# 4. **Transparency Obligations**: None apply. The system does not interact with natural persons in a deceptive manner (Art 50(1) exception applies as its purpose is obvious), generate synthetic content, use emotion/biometric categorization, or produce deep fakes.  
# Requires Additional Information: No  
# What additional Information: N/A


# ########END OF USE CASE########
#         """

            

        
        # --- Get the classifications ---
        # === Step 3: Normalize headers with optional leading whitespace ===
        for correct, variants in tag_variants.items():
            for variant in variants:
                if variant == correct:
                    continue
                classification_string = re.sub(
                    rf"^[ \t]*{re.escape(variant)}\s*:", f"{correct}:", classification_string, flags=re.MULTILINE
                )

        # === Step 4: Extract use case blocks ===
        use_case_blocks = re.split(r"########END OF USE CASE########", classification_string.strip())

        # === Step 5: Extract relevant fields ===
        split_pattern = re.compile(
            rf"^[ \t]*({'|'.join(re.escape(tag) for tag in target_tags)})\s*:",
            flags=re.MULTILINE
        )


        results = []

        for block in use_case_blocks:
            block = block.strip()

            if not re.search(r"^AI Use Case:", block, flags=re.MULTILINE):
                continue  # Skip invalid blocks

            chunks = split_pattern.split(block)

            data = {
                "AI Use Case": use_case["use_case_name"],
                "Use Case Description": use_case["use_case_description"],
                "Risk Classification": "",
                "Reason": "",
                "Requires Additional Information": "No",
                "What additional Information": ""
            }

            for i in range(1, len(chunks), 2):
                tag = chunks[i].strip()
                value = chunks[i + 1].strip()
                if tag in data:
                    data[tag] = value

            if data["Requires Additional Information"].lower() == "no":
                data["What additional Information"] = ""

            results.append(data)


        # === Step 6: Output result ===
        # print(json.dumps(results, indent=4, ensure_ascii=False))
        # Optional: Save to file
        with open("test.json", "w", encoding="utf-8") as f:
            json.dump(results, f, indent=4, ensure_ascii=False)

        break
    
    
    print("\n\n")
    if index == 0:
        break

In [None]:
# Time taken = 10 mins
model_list = ['gemma3:27b', 'qwq:32b', 'deepseek-r1:32b', 'qwen2.5:32b', 'command-r:35b', 'mistral-small:24b', 'yi:34b', 'exaone-deep:32b']


<!-- Tested Individually -->
gemma = 35s
qwq:32b = 1m40s
deepseek-r1-32b = 1m6s
qwen2.5-32b = 1m10s
command-r:35b = 1m10s
mistral-small-24b = 53.5s
yi:34b = 1m12s
exaone-deep:32b = 3m40s