<a href="https://colab.research.google.com/github/ffeldhaus/conversational-agents-intent-improver-agent/blob/main/Improve_Conversational_Agents_(Dialogflow_CX)_Intents_via_Reinforcement_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install --quiet google-cloud-dialogflow-cx google-genai tqdm langcodes

In [None]:
from concurrent.futures import ThreadPoolExecutor, as_completed
import io
import re
import uuid

from google.colab import auth
from google.colab import files

from google.api_core import retry
from google.api_core.client_options import ClientOptions

from google.cloud import dialogflowcx_v3 as dialogflow

from google import genai
from google.genai import types

from pydantic import BaseModel, Field

import pandas as pd

from tqdm.auto import tqdm

from langcodes import Language

In [None]:
# @title Configuration { display-mode: "form" }

AGENT_NAME = "projects/ucds-testsystem/locations/europe-west3/agents/e444b62c-6b83-489b-b41c-373c62093972" # @param {type:"string"}
FLOW_ID = "135bf7a3-7481-4e69-923b-f9dcfc4bec6e" # @param {type:"string"}
PAGE_ID = "b9d110b1-d061-471a-a568-68f6198efcb3" # @param {type:"string"}
NLU_THRESHOLD_ALTERNATIVE_MATCHING_INTENTS = 0.0001 # @param {type:"number"}
TEST_TRAIN_SPLIT_RATE = 0.95 # @param {type:"number"}
REINFORCEMENT_ITERATIONS = 3 # @param {type:"number"}
GEMINI_MODEL = "gemini-2.5-pro" # @param {type:"string"}
LANGUAGE_CODE = "" # @param {type:"string"}
MAX_TOKEN_COUNT = 1048575 # @param {type:"integer"}

# Extract the project ID from the agent name
project_id = AGENT_NAME.split('/')[1]
# Extract the location from the agent name
location = AGENT_NAME.split('/')[3]

In [None]:
# @title Authenticate

auth.authenticate_user(project_id=project_id)

In [None]:
# @title Initialize

# Initialize genai Client for Gemini usage
genai_client = genai.Client(
    #vertexai=True, project=project_id, location=location
    vertexai=True, project=project_id, location="europe-west4"
)

# Initialize Dialogflow CX Clients with the correct endpoint

if location != "global":
  api_endpoint=f"{location}-dialogflow.googleapis.com"
else:
  api_endpoint=f"dialogflow.googleapis.com"

client_options = ClientOptions(api_endpoint=api_endpoint)
agents_client = dialogflow.AgentsClient(client_options=client_options)
flows_client = dialogflow.FlowsClient(client_options=client_options)
pages_client = dialogflow.PagesClient(client_options=client_options)
intents_client = dialogflow.IntentsClient(client_options=client_options)
entity_types_client = dialogflow.EntityTypesClient(client_options=client_options)
sessions_client = dialogflow.SessionsClient(client_options=client_options)

In [None]:
# @title Get Dialogflow CX resources

# Read the agent
agent = agents_client.get_agent(name=AGENT_NAME)
print(f"Agent read successfully: {agent.display_name}")

# fallback to default language code
if not LANGUAGE_CODE:
  LANGUAGE_CODE = agent.default_language_code

# get language name
language = Language.get(LANGUAGE_CODE).language_name("en")
print(f"Using Language: {language} ({LANGUAGE_CODE})")

# Get Flow
flow = flows_client.get_flow(name=f"{AGENT_NAME}/flows/{FLOW_ID}")
print(f"Flow read successfully: {flow.display_name}")

# Get Page
page = pages_client.get_page(name=f"{AGENT_NAME}/flows/{FLOW_ID}/pages/{PAGE_ID}")
print(f"Page read successfully: {page.display_name}")

# Get Intents
intents = list(intents_client.list_intents(parent=AGENT_NAME))
print(f"Intents read successfully: {len(intents)}")

# Get Entity Types
entity_types = list(entity_types_client.list_entity_types(parent=AGENT_NAME))
print(f"Entity types read successfully: {len(entity_types)}")

Agent read successfully: HVB-DFCX-EWU-3
Using Language: German de
Flow read successfully: Get Customer Request
Page read successfully: Testautomation Customer Request
Intents read successfully: 704
Entity types read successfully: 234


In [None]:
# @title Check and update NLU threshold for the flow
if NLU_THRESHOLD_ALTERNATIVE_MATCHING_INTENTS != 0 and flow.nlu_settings.classification_threshold != NLU_THRESHOLD_ALTERNATIVE_MATCHING_INTENTS:
    flow.nlu_settings.classification_threshold = NLU_THRESHOLD_ALTERNATIVE_MATCHING_INTENTS
    flows_client.update_flow(request={"flow": flow})
    print(f"NLU threshold updated for flow {flow.display_name} to {NLU_THRESHOLD_ALTERNATIVE_MATCHING_INTENTS}")
else:
    print(f"NLU threshold for flow {flow.display_name} is already {flow.nlu_settings.classification_threshold} or NLU_THRESHOLD_ALTERNATIVE_MATCHING_INTENTS is 0. No update needed.")

NLU threshold updated for flow Get Customer Request to 0.0001


In [None]:
# @title Upload XLSX or CSV file(s) with test sentences and matching intents

uploaded_files = files.upload()

test_sentences = {}

# Create a dictionary to map intent display names to UUIDs
intent_display_name_to_uuid = {intent.display_name: intent.name.split('/')[-1] for intent in intents}

for file_name, file_content in uploaded_files.items():
    print(f"Processing file: {file_name}")
    try:
        if file_name.endswith('.csv'):
            df = pd.read_csv(io.BytesIO(file_content), on_bad_lines='warn')
        elif file_name.endswith('.xlsx'):
            df = pd.read_excel(io.BytesIO(file_content))
        else:
            print(f"Skipping unsupported file type: {file_name}")
            continue

        # Assume the first row is header and the data starts from the second row
        # Rename columns for easier access
        df.columns = ['Intent', 'Test']


        # Process data: Assume column 1 is intent, column 2 is test sentence
        for index, row in df.iterrows():
            intent_from_file = str(row['Intent']).strip()
            sentence = str(row['Test']).strip()

            if not sentence: # Skip if sentence is empty
                continue

            # Check if intent is a UUID
            if re.match(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', intent_from_file):
                intent_uuid = intent_from_file
            # Check if intent is a display name and exists in the agent's intents
            elif intent_from_file in intent_display_name_to_uuid:
                intent_uuid = intent_display_name_to_uuid[intent_from_file]
            else:
                print(f"Ignoring row {index} in {file_name}: Invalid intent '{intent_from_file}'. Not a valid UUID or a known display name.")
                continue

            if intent_uuid not in test_sentences:
                test_sentences[intent_uuid] = []
            test_sentences[intent_uuid].append(sentence)

    except Exception as e:
        print(f"Error processing file {file_name}: {e}")

print(f"\nProcessed {len(test_sentences)} intents with a total of {sum(len(sentences) for sentences in test_sentences.values())} test sentences.")

Saving Neue_Testsätze_250416 (3).xlsx to Neue_Testsätze_250416 (3).xlsx
Processing file: Neue_Testsätze_250416 (3).xlsx

Processed 3 intents with a total of 45 test sentences.


In [None]:
# @title Function to match intent for a given text

@retry.Retry()
def match_intent(project_id, location, agent_id, text, session_id, flow_id, page_id, timeout=10):
    """Returns the result of detect intent with texts as inputs.

    Using the same `session_id` between requests allows continuation of the conversation."""
    agent = f"projects/{project_id}/locations/{location}/agents/{agent_id}"
    session_path = f"{agent}/sessions/{session_id}"
    current_page_path = f"{agent}/flows/{flow_id}/pages/{page_id}" # Correct format for currentPage

    text_input = dialogflow.TextInput(text=text)
    query_input = dialogflow.QueryInput(
        text=text_input,
        language_code="en-US"
        # Remove context_paths as it's not the correct way to specify the starting page
    )
    query_parameters = dialogflow.QueryParameters(
        current_page=current_page_path # Specify the starting page using currentPage
    )

    # No need for try-except here because @retry handles exceptions
    response = sessions_client.match_intent(
        request={
            "session": session_path,
            "query_input": query_input,
            "query_params": query_parameters
        },
        timeout=timeout # Add timeout
    )
    return response

In [None]:
stop automatic execution here as the following cells are very experimental

# Task
Implement Reinforcement Learning using gemini to analyze an intent, intent description, intent training phrases, entities mentioned in the intent and have Gemini make suggestions what to improve to increase the intent matching rate of the test data. Gemini should understand that the Intent matching is done using a BERT NLU trained specifically with the Intent Training Phrases and Entities. Ensure that only a split of the test data is used and always the same (e.g. for split 0.3 use the first 3 for verification only and the later 7 for reinforcement learning and verification). The recommendations should be applied and retested. Then gemini should analyze the results and improvements and make further suggestions, up to REINFORCEMENT_ITERATIONS iterations. Ultimately a report should be generated on what improvements where achieved and guidance on what additionally could / should be changed for further improvements. To call Gemini only use the model name without a project or path, e.g. "gemini-2.5-pro" the project and location where already specified during client initialization.

## Split test data

### Subtask:
Split the `test_sentences` data into training and testing sets based on the `TEST_TRAIN_SPLIT_RATE`. The training set will be used for reinforcement learning with Gemini, and the testing set will be used for final evaluation.


**Reasoning**:
Import the necessary function and split the data into training and testing sets, then convert them back to the required dictionary format.



In [None]:
from sklearn.model_selection import train_test_split

# Convert the test_sentences dictionary into a list of tuples
test_sentences_list = []
for intent_uuid, sentences in test_sentences.items():
    for sentence in sentences:
        test_sentences_list.append((sentence, intent_uuid))

# Split the list into training and testing sets
train_list, test_list = train_test_split(test_sentences_list, test_size=TEST_TRAIN_SPLIT_RATE, random_state=42)

# Convert the resulting lists back into dictionaries
train_sentences = {}
for sentence, intent_uuid in train_list:
    if intent_uuid not in train_sentences:
        train_sentences[intent_uuid] = []
    train_sentences[intent_uuid].append(sentence)

test_sentences_eval = {}
for sentence, intent_uuid in test_list:
    if intent_uuid not in test_sentences_eval:
        test_sentences_eval[intent_uuid] = []
    test_sentences_eval[intent_uuid].append(sentence)

print(f"Total sentences: {len(test_sentences_list)}")
print(f"Training sentences: {len(train_list)}")
print(f"Testing sentences for evaluation: {len(test_list)}")

Total sentences: 45
Training sentences: 2
Testing sentences for evaluation: 43


## Initial evaluation

Evaluate all Test sentences as initial baseline.

In [None]:
# Initialize a dictionary to store the evaluation results
evaluation_results = {}

session_id = "baseline-" + str(uuid.uuid4())

# Use ThreadPoolExecutor for parallel execution during evaluation
with ThreadPoolExecutor(max_workers=10) as executor: # Adjust max_workers as needed
    future_to_sentence_eval = {}
    for intent_uuid, sentences in test_sentences.items():
        for sentence in sentences:
            future = executor.submit(
                match_intent,
                project_id,
                location,
                agent.name.split('/')[-1],
                sentence,
                session_id,
                FLOW_ID, # Pass FLOW_ID
                PAGE_ID # Pass PAGE_ID
            )
            future_to_sentence_eval[future] = (sentence, intent_uuid)

    # Wrap as_completed with tqdm for a progress bar
    for future in tqdm(as_completed(future_to_sentence_eval), total=len(future_to_sentence_eval), desc="Evaluating Sentences"):
        sentence, expected_intent_uuid = future_to_sentence_eval[future]
        try:
            response = future.result()

            if response and response.matches:
                matched_intent_id = response.matches[0].intent.name.split('/')[-1] if response.matches[0].intent else "N/A"
                matched_intent_display_name = response.matches[0].intent.display_name if response.matches[0].intent else "N/A"
                matched_intent_confidence = response.matches[0].confidence

                alternative_matches = []
                if len(response.matches) > 1:
                     for i, match in enumerate(response.matches[1:]): # Start from the second match
                        alternative_intent_id = match.intent.name.split('/')[-1] if match.intent else "N/A"
                        alternative_intent_display_name = match.intent.display_name if match.intent else "N/A"
                        alternative_intent_confidence = match.confidence
                        alternative_matches.append({
                            "intent_id": alternative_intent_id,
                            "display_name": alternative_intent_display_name,
                            "confidence": alternative_intent_confidence
                        })

                evaluation_results[sentence] = {
                    "expected_intent_uuid": expected_intent_uuid,
                    "matched_intent_uuid": matched_intent_id,
                    "matched_intent_display_name": matched_intent_display_name,
                    "matched_intent_confidence": matched_intent_confidence,
                    "alternative_matches": alternative_matches
                }
            else:
                evaluation_results[sentence] = {
                    "expected_intent_uuid": expected_intent_uuid,
                    "matched_intent_uuid": "No match",
                    "matched_intent_display_name": "No match",
                    "matched_intent_confidence": 0.0,
                    "alternative_matches": []
                }
        except Exception as exc:
            evaluation_results[sentence] = {
                "expected_intent_uuid": expected_intent_uuid,
                "matched_intent_uuid": f"Error: {exc}",
                "matched_intent_display_name": f"Error: {exc}",
                "matched_intent_confidence": 0.0,
                "alternative_matches": []
            }
            print(f"Evaluation sentence '{sentence}' generated an exception: {exc}")


Evaluating Sentences:   0%|          | 0/45 [00:00<?, ?it/s]

## Reinforcement Learning Loop

### Subtask: Select Training Data

Choose a subset of the training data from `train_sentences` for the current reinforcement learning iteration.

**Reasoning**:
Select a subset of the training data for the first reinforcement learning iteration. For simplicity in this first iteration, we will use all the training data.

In [None]:
# For the first iteration, use all training sentences
current_train_sentences = train_sentences

print(f"Selected {sum(len(sentences) for sentences in current_train_sentences.values())} training sentences for the current iteration.")

Selected 2 training sentences for the current iteration.


### Subtask: Gemini Analysis

Use Gemini to analyze the selected training data, the corresponding intent definition (including training phrases and entities), and the NLU model's behavior. Gemini should identify areas for improvement in the intent definition to increase matching rates.

**Reasoning**:
Iterate through the training sentences, retrieve the corresponding intent details (display name, training phrases, and entities), and use Gemini to analyze this information along with the evaluation results to generate suggestions for improvement.

In [None]:
# Define the Pydantic schema for the Gemini output
class IntentSuggestions(BaseModel):
    analysis: str = Field(description="Detailed analysis of the intent and suggestions for improvement.")
    phrases_to_remove: list[int] = Field(description="List of training phrase indices to remove.")
    phrases_to_add: list[str] = Field(description="List of new training phrases to add, including parameter annotations in the form [parameter text](parameter_id) with 'parameter text' included in the list of entities or their synonym of the entity type corresponding to the parameter and parameter_id matching one of the existing parameters of the intent.")


# Function to get intent details
def get_intent_details(intent_uuid, intents, entity_types):
    intent = next((intent for intent in intents if intent.name.split('/')[-1] == intent_uuid), None)
    if not intent:
        return None, None, None, None, None

    display_name = intent.display_name
    description = intent.description

    training_phrases = []
    for tp in intent.training_phrases:
        phrase_str = ""
        for part in tp.parts:
            if part.parameter_id:
                phrase_str += f"[{part.text}]({part.parameter_id})"
            else:
                phrase_str += part.text
        training_phrases.append(phrase_str)

    parameters = []
    for p in intent.parameters:
        parameters.append({
            "id": p.id,
            "entity_type": p.entity_type.split('/')[-1]
        })

    # Extract entity types mentioned in training phrases
    mentioned_entity_types_details = []
    for parameter in intent.parameters:
        entity_type_name = parameter.entity_type.split('/')[-1]
        entity_type_obj = next((et for et in entity_types if et.name.split('/')[-1] == entity_type_name), None)
        if entity_type_obj:
            entities_with_synonyms = []
            for entity in entity_type_obj.entities:
                entities_with_synonyms.append(f"{entity.value}: {', '.join(entity.synonyms)}")
            mentioned_entity_types_details.append({
                "parameter_id": parameter.id,
                "display_name": entity_type_obj.display_name,
                "entities": entities_with_synonyms
            })

    return display_name, description, training_phrases, parameters, mentioned_entity_types_details

# Initialize a dictionary to store Gemini's suggestions
gemini_suggestions = {}

# Iterate through the current training sentences
for intent_uuid, sentences in tqdm(current_train_sentences.items(), desc="Analyzing Intents with Gemini"):
    display_name, description, training_phrases, parameters, entities_details = get_intent_details(intent_uuid, intents, entity_types)

    if not display_name:
        print(f"Could not find intent with UUID: {intent_uuid}. Skipping.")
        continue

    # Get relevant evaluation results for this intent's training sentences
    relevant_eval_results = {sentence: result for sentence, result in evaluation_results.items() if result['expected_intent_uuid'] == intent_uuid and sentence in sentences}

    training_phrases_string = ""
    for i, phrase in enumerate(training_phrases):
        training_phrases_string += f"{i}: {phrase}\n"

    all_entities_details = entities_details[:]
    # Prepare the prompt for Gemini
    prompt = f"""You are an expert NLU analyst tasked with optimizing an intent within a BERT-based Natural Language Understanding (NLU) system.

Model Context: Because this is a BERT-based model, it relies on deep contextual understanding, sentence structure, and semantic relationships (embeddings), rather than simple keyword matching. Your goal is to refine the training data to sharpen the semantic boundaries of the target intent, improving its precision and recall (F1 score), and reducing confusion with other intents.

Some training phrases may contain intentional spelling or grammar errors to capture ASR (Automatic Speech Recognition) transcription errors.

Analyze the following intent and related data and provide concrete recommendations.

## Analysis Objectives

1.  **Identify Weaknesses & Coverage Gaps:** Determine why the target intent is not matching correctly. Is the training data too narrow (poor recall), too broad (poor precision), lacking variety, or failing to cover the scope defined in the description?
2.  **Analyze Overlaps (Confusion Analysis):** For each Alternative Matched Intent, explain the *root cause* of the confusion. Focus on semantic similarities, shared vocabulary, or ambiguous training phrases in the Target Intent that cause the BERT model to struggle with differentiation.
3.  **Entity Analysis:** Analyze if the existing entity types are being used effectively. You may suggest changes to the entity type definitions in the analysis, but recommendations (ADD/REMOVE) must use the entity types as they currently exist.

## Recommendation Guidelines

Your recommendations (ADD/REMOVE) must adhere strictly to these rules:

1.  **Scope Management:** Do **not** expand the scope of the Target Intent beyond its description. Recommendations should only sharpen the existing scope and reduce ambiguity.
2.  **Entity Constraints:** You must **not** add new entities or entity types in the training phrases. All new training phrases must only use the provided Mentioned Entity Types.
3.  **Language Requirements:** **Crucial:** All analysis, explanations, and suggested training phrases must be in **{language}**.
4.  **Natural Language and Stop Words (Crucial for BERT):**
    *   Prioritize natural, idiomatic, conversational **{language}**.
    *   **Include common {language} stop words** (articles, possessive pronouns, prepositions) where they are essential for a natural-sounding, grammatically correct utterance. BERT requires these for contextual understanding.
    *   Avoid *unnecessary* filler words (e.g., "umm," "please"), but do not strip necessary stop words.
5.  **Handling Existing Errors:** Training phrases in the existing dataset that contain only one word, incomplete words, transcription errors (ASR), or spelling/grammar mistakes must **not** be marked for removal (they provide robustness), unless they are the direct cause of severe, unresolvable cross-intent confusion. You may ADD the corrected or more complete versions of these phrases.
6.  **Quantity Guideline:** If the intent already has a large number of phrases (e.g., >100), be highly selective with ADD recommendations, focusing only on phrases that resolve specific overlaps or critical gaps.

## Recommendation Types

*   **ADD:**
    *   Suggest new phrases that specifically help disambiguate the Target Intent from the Alternative Matched Intents.
    *   Increase variety in sentence structure (questions, commands, statements) and coverage of real-world user expressions *within the scope*.
    *   Explain the rationale for the addition.
*   **REMOVE:**
    *   Identify phrases (by index) that are highly ambiguous, out of scope, or directly cause unresolvable confusion with Alternative Matched Intents.
    *   Explain the rationale for removal.
    *   *Note: To update a phrase, you must REMOVE the old index and ADD the new version.*

# Intent to Analyze

Intent Display Name: {display_name}
Description: {description}
Parameters: {parameters}
Training Phrases (with parameter annotations):
{training_phrases_string}

# Test Sentences and Matching Results (from evaluation):
"""
    if relevant_eval_results:
        for sentence, result in relevant_eval_results.items():
            prompt += f"""
Sentence: {sentence}
Expected Intent: {display_name}
Matched Intent: {result['matched_intent_display_name']}
Matched Intent Confidence: {result['matched_intent_confidence']}
"""
            if result['alternative_matches']:
                for i, alt_match in enumerate(result['alternative_matches']):
                    prompt += f"Alternative Match #{i+1}: {alt_match['display_name']} (Confidence: {alt_match['confidence']})\n"

    else:
        prompt += "No relevant evaluation results found for this intent in the training data.\n"

    current_tokens = genai_client.models.count_tokens(model=GEMINI_MODEL, contents=prompt).total_tokens

    # Collect and deduplicate alternative intents
    alternative_intents_info = {}
    if relevant_eval_results:
        for result in relevant_eval_results.values():
            for alt_match in result.get('alternative_matches', []):
                alt_intent_uuid = alt_match.get('intent_id')
                if alt_intent_uuid and alt_intent_uuid != intent_uuid and alt_intent_uuid not in alternative_intents_info:
                    alt_display_name, alt_description, alt_training_phrases, alt_parameters, alt_entities_details = get_intent_details(alt_intent_uuid, intents, entity_types)
                    if alt_display_name:
                        all_entities_details.extend(alt_entities_details)
                        alt_training_phrases_string = ""
                        for phrase in alt_training_phrases:
                            alt_training_phrases_string += f"- {phrase}\n"
                        alternative_intents_info[alt_intent_uuid] = {
                            "display_name": alt_display_name,
                            "description": alt_description,
                            "parameters": alt_parameters,
                            "training_phrases": alt_training_phrases_string
                        }

    if alternative_intents_info:
        prompt += "\n# Overlapping Intents\n"
        for alt_intent_uuid, alt_info in alternative_intents_info.items():
            alternative_intent_prompt = f"\nAlternative Matched Intent Display Name: ({alt_info['display_name']})\n"
            alternative_intent_prompt += f"Alternative Matched Description: {alt_info['description']}\n"
            alternative_intent_prompt += f"Alternative Matched Parameters: {alt_info['parameters']}\n"
            alternative_intent_prompt += f"Alternative Matched Training Phrases:\n{alt_info['training_phrases']}"
            alternative_intent_token_count = genai_client.models.count_tokens(model=GEMINI_MODEL, contents=alternative_intent_prompt).total_tokens
            if current_tokens + alternative_intent_token_count > MAX_TOKEN_COUNT:
                break
            else:
                current_tokens += alternative_intent_token_count
                prompt += alternative_intent_prompt


    # Deduplicate and add entities information
    unique_entities = {v['display_name']:v for v in all_entities_details}.values()
    if unique_entities:
        prompt += "\n# Entity Types and Entities used in training phrases:\n"
        for entity_detail in unique_entities:
            entities_string = f"\nParameter ID: {entity_detail['parameter_id']}\nEntity Type: {entity_detail['display_name']}\nEntities:\n"
            for entity in entity_detail['entities']:
                entities_string += f"- {entity}\n"
            entities_token_count = genai_client.models.count_tokens(model=GEMINI_MODEL, contents=entities_string).total_tokens
            if current_tokens + entities_token_count > MAX_TOKEN_COUNT:
                break
            else:
                prompt += entities_string
                current_tokens += entities_token_count

    print(prompt)
    print(f"Prompt tokens: {current_tokens}")

    try:
        # Call Gemini API
        response = genai_client.models.generate_content(
            model=GEMINI_MODEL,
            contents=prompt,
            config=types.GenerateContentConfig(
                response_mime_type='application/json',
                response_schema=IntentSuggestions,
            )
        )
        gemini_suggestions[intent_uuid] = response.text
    except Exception as e:
        gemini_suggestions[intent_uuid] = f"Error generating suggestions: {e}"
        print(f"Error processing intent {display_name} ({intent_uuid}): {e}")
    # TODO: Remove
    break


# You can now review the gemini_suggestions dictionary

Analyzing Intents with Gemini:   0%|          | 0/1 [00:00<?, ?it/s]

You are an expert NLU analyst tasked with optimizing an intent within a BERT-based Natural Language Understanding (NLU) system.

Model Context: Because this is a BERT-based model, it relies on deep contextual understanding, sentence structure, and semantic relationships (embeddings), rather than simple keyword matching. Your goal is to refine the training data to sharpen the semantic boundaries of the target intent, improving its precision and recall (F1 score), and reducing confusion with other intents.

Some training phrases may contain intentional spelling or grammar errors to capture ASR (Automatic Speech Recognition) transcription errors.

Analyze the following intent and related data and provide concrete recommendations.

## Analysis Objectives

1.  **Identify Weaknesses & Coverage Gaps:** Determine why the target intent is not matching correctly. Is the training data too narrow (poor recall), too broad (poor precision), lacking variety, or failing to cover the scope defined in t

In [None]:
print(gemini_suggestions['ae8ce9f7-df8f-4e14-a5d5-568d0c482510'])

{
  "analysis": "Der Intent 'AGB-Zustimmung#InfoboxN' ist darauf ausgelegt, die explizite Zustimmung des Nutzers zu den AGB zu erfassen, insbesondere im Kontext einer drohenden Kontokündigung. Die Analyse der Trainingsdaten und der Konfusions-Intents zeigt jedoch einige Schwachstellen:\n\n1.  **Mangelnde Trennschärfe**: Der Intent enthält zahlreiche Trainingsphrasen, die lediglich allgemeine Fragen oder Informationsanfragen zu den AGB darstellen (z. B. 27: 'Frage wegen AGB', 24: 'Nachfrage zur AGB', 26: 'Unklarheiten bezüglich AGBs'). Diese Phrasen gehören eindeutig in den Scope des Konfusions-Intents 'Allgemeine_Geschäftsbedingungen_SH#Infobox', welcher für 'Infos/Fragen zu AGB' konzipiert ist. Diese Überschneidung ist die Hauptursache für die Konfusion.\n\n2.  **Übermäßig generische Phrasen**: Phrasen, die nur aus Keywords wie 'AGB' (61), 'Geschäftsbedingungen' (82) oder 'AGB Änderung' (22) bestehen, sind zu mehrdeutig. Sie signalisieren nicht klar die Absicht der *Zustimmung* und kö