# Data Generation

This file combines different synthetic data.

In [2]:
import pandas as pd

In [3]:
# Data for the sample dataset
data = {
    'text': [
        # Intent 1: Why is action A not used in the plan?
        "Why is action A not included in the project roadmap?",
        "What are the reasons for excluding action A from the plan?",
        "Why was action A omitted from the strategy?",
        "Why didn't we consider action A for the project plan?",
        "Why was action A left out of the final plan?",

        # Intent 2: Why is action A used in the plan?
        "Why was action A chosen for the implementation?",
        "What made action A a suitable choice for the plan?",
        "Why did we decide to include action A in the strategy?",
        "What are the benefits of using action A in the plan?",
        "Why was action A prioritized in the project?",

        # Intent 3: Why is action A used in state S, rather than action B?
        "Why is action A applied in state S instead of action B?",
        "What are the reasons for using action A over action B in state S?",
        "Why does the plan favor action A in state S over action B?",
        "What made action A a better choice than action B in state S?",
        "Why was action A preferred in state S instead of action B?"
    ],
    'label': [
        # Labels corresponding to the intents
        1, 1, 1, 1, 1,  # Intent 1
        2, 2, 2, 2, 2,  # Intent 2
        3, 3, 3, 3, 3   # Intent 3
    ]
}

# Create a DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
df.head()

Unnamed: 0,text,label
0,Why is action A not included in the project ro...,1
1,What are the reasons for excluding action A fr...,1
2,Why was action A omitted from the strategy?,1
3,Why didn't we consider action A for the projec...,1
4,Why was action A left out of the final plan?,1


In [4]:
# Define intents
intents = {
    "1. Why is action A not used in the plan?": 1,
    "2. Why is action A used in the plan?": 2,
    "3. Why is action A used rather than action B?": 3
}

# Define example questions for each intent
questions = {
    "1. Why is action A not used in the plan?": [
        "Why was action A excluded from the plan?",
        "What were the reasons for omitting action A from the plan?",
        "Can you explain why action A was not considered in the plan?",
        "Why didn't the plan include action A?",
        "What is the rationale for not using action A in the plan?"
    ],
    "2. Why is action A used in the plan?": [
        "Why was action A included in the plan?",
        "What are the reasons for using action A in the plan?",
        "Can you explain why action A is part of the plan?",
        "Why did the plan incorporate action A?",
        "What is the rationale for including action A in the plan?"
    ],
    "3. Why is action A used rather than action B?": [
        "Why was action A chosen over action B?",
        "What made action A more suitable than action B?",
        "Can you explain why action A is preferred to action B?",
        "Why did the plan opt for action A instead of action B?",
        "What are the reasons for selecting action A rather than action B?"
    ]
}

# Generate dataset
data = []
for intent, label in intents.items():
    for question in questions[intent]:
        data.append([question, label])

# Convert to DataFrame
df_2 = pd.DataFrame(data, columns=["text", "label"])

# Shuffle the DataFrame to ensure the data is mixed
df_2 = df_2.sample(frac=1).reset_index(drop=True)

df_2.head()

Unnamed: 0,text,label
0,Can you explain why action A is preferred to a...,3
1,Can you explain why action A is part of the plan?,2
2,What is the rationale for not using action A i...,1
3,What are the reasons for using action A in the...,2
4,Why was action A included in the plan?,2


In [5]:
# Generate new dataset for each domain
def generate_domain_data(domain_questions, intent_label):
    domain_data = []
    for intent, questions in domain_questions.items():
        for question in questions:
            domain_data.append([question, intent_label[intent]])
    return domain_data

In [6]:
# Define new grounded example questions for each intent in the Sokoban, Rubik's Cube, N-Puzzle, and FreeCell domains
sokoban_questions = {
    "1. Why is action A not used in the plan?": [
        "Why was 'push box to the left' excluded from the plan?",
        "What were the reasons for omitting 'push box to the left' from the plan?",
        "Can you explain why 'push box to the left' was not considered in the plan?",
        "Why didn't the plan include 'push box to the left'?",
        "What is the rationale for not using 'push box to the left' in the plan?",
        "The player doesn't push any boxes. Shouldn't pushing a box (action A) be part of the plan to reach the goals?",
        "There are empty storage spaces, but the plan doesn't use action A (push box) to utilize them. Is there a reason for this?",
        "I noticed that action A is not included in the plan. Can you explain why?",
        "The plan seems to be missing action A. Is there a reason for this?",
        "Is there a specific reason why we're not considering action A?",
        "The data suggests action A might be helpful. Why isn't it part of the plan?",
    ],
    "2. Why is action A used in the plan?": [
        "Why was 'push box to the left' included in the plan?",
        "What are the reasons for using 'push box to the left' in the plan?",
        "Can you explain why 'push box to the left' is part of the plan?",
        "Why did the plan incorporate 'push box to the left'?",
        "What is the rationale for including 'push box to the left' in the plan?",
        "The plan involves the player pushing a box (action A) even though there are no empty storage spaces nearby. Can you explain the purpose?",
        "The solution seems to have the player moving a box diagonally (action A). Isn't this inefficient? Why is it used?",
        "I'm curious why action A is included in the plan. Can you elaborate?",
        "It looks like action A is being used in the plan. Can you explain the reasoning behind this?",
        "Can you explain the benefits of using action A in the plan?",
    ],
    "3. Why is action A used rather than action B?": [
        "Why was 'push box to the left' chosen over 'move to the right'?",
        "What made 'push box to the left' more suitable than 'move to the right'?",
        "Can you explain why 'push box to the left' is preferred to 'move to the right'?",
        "Why did the plan opt for 'push box to the left' instead of 'move to the right'?",
        "What are the reasons for selecting 'push box to the left' rather than 'move to the right'?",
        "The plan has the player pushing a box to the left (action A). Wouldn't pushing it upwards (action B) be a shorter path?",
        "The solution uses action A (pull box) to get to the goal. But wouldn't pushing the box (action B) be a more strategic move to avoid getting stuck?",
        "The plan uses action A, but wouldn't action B be a better choice? Why?",
        "I see that action A is chosen over action B in the plan. Can you explain the decision?",
        "What factors led to choosing action A over action B?",
        "Is there a specific drawback to using action B instead of action A?",
    ]
}

rubiks_cube_questions = {
    "1. Why is action A not used in the plan?": [
        "Why was 'rotate the top face clockwise' excluded from the plan?",
        "What were the reasons for omitting 'rotate the top face clockwise' from the plan?",
        "Can you explain why 'rotate the top face clockwise' was not considered in the plan?",
        "Why didn't the plan include 'rotate the top face clockwise'?",
        "What is the rationale for not using 'rotate the top face clockwise' in the plan?"
    ],
    "2. Why is action A used in the plan?": [
        "Why was 'rotate the top face clockwise' included in the plan?",
        "What are the reasons for using 'rotate the top face clockwise' in the plan?",
        "Can you explain why 'rotate the top face clockwise' is part of the plan?",
        "Why did the plan incorporate 'rotate the top face clockwise'?",
        "What is the rationale for including 'rotate the top face clockwise' in the plan?"
    ],
    "3. Why is action A used rather than action B?": [
        "Why was 'rotate the top face clockwise' chosen over 'rotate the front face counterclockwise'?",
        "What made 'rotate the top face clockwise' more suitable than 'rotate the front face counterclockwise'?",
        "Can you explain why 'rotate the top face clockwise' is preferred to 'rotate the front face counterclockwise'?",
        "Why did the plan opt for 'rotate the top face clockwise' instead of 'rotate the front face counterclockwise'?",
        "What are the reasons for selecting 'rotate the top face clockwise' rather than 'rotate the front face counterclockwise'?"
    ]
}

n_puzzle_questions = {
    "1. Why is action A not used in the plan?": [
        "Why was 'move the blank space up' excluded from the plan?",
        "What were the reasons for omitting 'move the blank space up' from the plan?",
        "Can you explain why 'move the blank space up' was not considered in the plan?",
        "Why didn't the plan include 'move the blank space up'?",
        "What is the rationale for not using 'move the blank space up' in the plan?"
    ],
    "2. Why is action A used in the plan?": [
        "Why was 'move the blank space up' included in the plan?",
        "What are the reasons for using 'move the blank space up' in the plan?",
        "Can you explain why 'move the blank space up' is part of the plan?",
        "Why did the plan incorporate 'move the blank space up'?",
        "What is the rationale for including 'move the blank space up' in the plan?"
    ],
    "3. Why is action A used rather than action B?": [
        "Why was 'move the blank space up' chosen over 'move the blank space left'?",
        "What made 'move the blank space up' more suitable than 'move the blank space left'?",
        "Can you explain why 'move the blank space up' is preferred to 'move the blank space left'?",
        "Why did the plan opt for 'move the blank space up' instead of 'move the blank space left'?",
        "What are the reasons for selecting 'move the blank space up' rather than 'move the blank space left'?"
    ]
}

freecell_questions = {
    "1. Why is action A not used in the plan?": [
        "Why was 'move card to a free cell' excluded from the plan?",
        "What were the reasons for omitting 'move card to a free cell' from the plan?",
        "Can you explain why 'move card to a free cell' was not considered in the plan?",
        "Why didn't the plan include 'move card to a free cell'?",
        "What is the rationale for not using 'move card to a free cell' in the plan?"
    ],
    "2. Why is action A used in the plan?": [
        "Why was 'move card to a free cell' included in the plan?",
        "What are the reasons for using 'move card to a free cell' in the plan?",
        "Can you explain why 'move card to a free cell' is part of the plan?",
        "Why did the plan incorporate 'move card to a free cell'?",
        "What is the rationale for including 'move card to a free cell' in the plan?"
    ],
    "3. Why is action A used rather than action B?": [
        "Why was 'move card to a free cell' chosen over 'move card to a foundation'?",
        "What made 'move card to a free cell' more suitable than 'move card to a foundation'?",
        "Can you explain why 'move card to a free cell' is preferred to 'move card to a foundation'?",
        "Why did the plan opt for 'move card to a free cell' instead of 'move card to a foundation'?",
        "What are the reasons for selecting 'move card to a free cell' rather than 'move card to a foundation'?"
    ]
}

In [7]:
# Define the intent labels
intent_label = {
    "1. Why is action A not used in the plan?": 1,
    "2. Why is action A used in the plan?": 2,
    "3. Why is action A used rather than action B?": 3
}

In [8]:
# Generate data for each domain
sokoban_data = generate_domain_data(sokoban_questions, intent_label)
rubiks_cube_data = generate_domain_data(rubiks_cube_questions, intent_label)
n_puzzle_data = generate_domain_data(n_puzzle_questions, intent_label)
freecell_data = generate_domain_data(freecell_questions, intent_label)

# Combine all data
combined_data = sokoban_data + rubiks_cube_data + n_puzzle_data + freecell_data

# Convert to DataFrame
combined_df = pd.DataFrame(combined_data, columns=["text", "label"])
combined_df

Unnamed: 0,text,label
0,Why was 'push box to the left' excluded from t...,1
1,What were the reasons for omitting 'push box t...,1
2,Can you explain why 'push box to the left' was...,1
3,Why didn't the plan include 'push box to the l...,1
4,What is the rationale for not using 'push box ...,1
...,...,...
72,Why was 'move card to a free cell' chosen over...,3
73,What made 'move card to a free cell' more suit...,3
74,Can you explain why 'move card to a free cell'...,3
75,Why did the plan opt for 'move card to a free ...,3


In [10]:
df_final = pd.concat([df, df_2, combined_df], ignore_index=True)

# Save to CSV
file_path = "./intent_classification_dataset.csv"
df_final.to_csv(file_path, index=False)