In [2]:
import pandas as pd
import yaml

# Load the dataset
file_path = '../conversation_dataset_with_uuid.csv'
df = pd.read_csv(file_path)

# Step 1: Filter and clean the data
df = df.dropna(subset=['User', 'AI Response'])  # Remove rows with missing User or AI Response

# Group data by 'Conversation ID'
grouped = df.groupby('Conversation ID')

# Data structures for NLU, domain, and stories
nlu_data = {'version': '3.1', 'nlu': []}
domain_data = {
    'version': '3.1',
    'intents': [],
    'responses': {}
}
stories_data = {'version': '3.1', 'stories': []}

# Step 2: Map data based on the specifications
scene_counter = {}  # To keep track of intent numbering by scene
for conversation_id, group in grouped:
    group = group.reset_index()  # Reset index for easier row access

    # Map intent based on scene with consecutive numbering
    scene = group.loc[0, 'Scene']
    if scene not in scene_counter:
        scene_counter[scene] = 1
    intent = f"{scene.replace(' ', '_')}{scene_counter[scene]}"
    scene_counter[scene] += 1

    # Add to NLU data
    examples = "\n".join(f"- {row['User']}" for _, row in group.iterrows())
    nlu_data['nlu'].append({'intent': intent, 'examples': examples})

    # Add to Domain data
    if intent not in domain_data['intents']:
        domain_data['intents'].append(intent)

    for _, row in group.iterrows():
        ai_response = row['AI Response']
        ai_strategy = row['AI Strategy'] if pd.notnull(row['AI Strategy']) else "initial"
        action_name = ai_strategy.lower().replace(" ", "_")

        # Add responses in the required text format
        response_key = f"utter_{action_name}"
        if response_key not in domain_data['responses']:
            domain_data['responses'][response_key] = []
        response_entry = {'text': ai_response}

        # Append the AI response as a new response
        domain_data['responses'][response_key].append(response_entry)

        # Add stories
        story = next((story for story in stories_data['stories'] if story['story'] == f"Story for {conversation_id}"), None)
        if not story:
            stories_data['stories'].append({
                'story': f"Story for {conversation_id}",
                'steps': [{'intent': intent}]
            })
        for story in stories_data['stories']:
            if story['story'] == f"Story for {conversation_id}":
                story['steps'].append({'action': f"utter_{action_name}"})

# Step 3: Save to YAML files
nlu_output_path = 'nlu.yml'
domain_output_path = 'domain.yml'
stories_output_path = 'stories.yml'

# Save NLU data to nlu.yml
with open(nlu_output_path, 'w') as nlu_file:
    yaml.dump(nlu_data, nlu_file, default_flow_style=False, sort_keys=False)

# Save domain data to domain.yml with desired format
with open(domain_output_path, 'w') as domain_file:
    yaml.dump(domain_data, domain_file, default_flow_style=False, sort_keys=False)

# Save stories data to stories.yml
with open(stories_output_path, 'w') as stories_file:
    yaml.dump(stories_data, stories_file, default_flow_style=False, sort_keys=False)

# Print completion messages
print("Files generated successfully:")
print(f"NLU file: {nlu_output_path}")
print(f"Domain file: {domain_output_path}")
print(f"Stories file: {stories_output_path}")

Files generated successfully:
NLU file: nlu.yml
Domain file: domain.yml
Stories file: stories.yml


In [4]:
!rasa train

Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "C:\Users\MIS\anaconda3\Scripts\rasa.exe\__main__.py", line 4, in <module>
  File "C:\Users\MIS\anaconda3\Lib\site-packages\rasa\__init__.py", line 8, in <module>
    from rasa.run import run
  File "C:\Users\MIS\anaconda3\Lib\site-packages\rasa\run.py", line 7, in <module>
    from rasa.core.lock_store import LockStore
  File "C:\Users\MIS\anaconda3\Lib\site-packages\rasa\core\__init__.py", line 5, in <module>
    from rasa.core.train import train
  File "C:\Users\MIS\anaconda3\Lib\site-packages\rasa\core\train.py", line 11, in <module>
    from rasa.core.domain import Domain
  File "C:\Users\MIS\anaconda3\Lib\site-packages\rasa\core\domain.py", line 11, in <module>
    from rasa.utils.common import (
  File "C:\Users\MIS\anaconda3\Lib\site-packages\rasa\utils\common.py", line 8, in <module>
    import rasa.core.utils
  File "C:\Users\MIS\a