In [1]:
import ast
import pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split



In [2]:
DATA_PATH = Path("../data/iglu/")
dataset_path = DATA_PATH / "clarifying_questions_train.csv"
question_bank_path = DATA_PATH / "question_bank.csv"

In [3]:
df = pd.read_csv(dataset_path)
df.head()

Unnamed: 0,GameId,ClarifyingQuestion,InitializedWorldPath,InputInstruction,IsInstructionClear,Partition,qrel,qbank
0,CQ-game-1,,initial_world_states/builder-data/34-c135/step-20,Destroy the two blocks over the row that is co...,Yes,train,,
1,CQ-game-10,,initial_world_states/builder-data/12-c139/step-22,Place one yellow block on top of each purple b...,Yes,train,,
2,CQ-game-10,,initial_world_states/builder-data/12-c139/step-22,Place one yellow block on top of each purple b...,Yes,train,,
3,CQ-game-1000,Which color blocks?,initial_world_states/builder-data/23-c135/step-2,Place four blocks to the east of the highest b...,No,train,q_149,"'q_696', 'q_203', 'q_516', 'q_677', 'q_769', '..."
4,CQ-game-1001,,initial_world_states/builder-data/4-c96/step-18,Locate the purple structure and the block on i...,Yes,train,,


In [4]:
q_df = pd.read_csv(question_bank_path)
q_df.head()

Unnamed: 0,qrel,ClarifyingQuestion
0,q_149,Which color blocks?
1,q_436,After you remove the one green block there are...
2,q_111,in any square west of the red blocks?
3,q_653,Should I destory east or west puyrple?
4,q_170,Where exactly am I placing the blue blocks?


In [5]:
def unwrap_bank(row):        
    qbank = ast.literal_eval(row["qbank"])  
    qbank=q_df[q_df["qrel"].isin(qbank)]
    return qbank["qrel"].tolist(), qbank["ClarifyingQuestion"].tolist()

ranking_df = df[df["IsInstructionClear"] == "No"].copy()
ranking_df["qbank"],ranking_df["candidates"] = zip(*ranking_df.apply(unwrap_bank,axis=1))
ranking_df = ranking_df.drop(columns=["GameId","InitializedWorldPath","IsInstructionClear","Partition"])

In [7]:
ranking_df

Unnamed: 0,ClarifyingQuestion,InputInstruction,qrel,qbank,candidates
3,Which color blocks?,Place four blocks to the east of the highest b...,q_149,"[q_149, q_479, q_467, q_925, q_960, q_737, q_7...","[Which color blocks?, What purple blocks? Ther..."
5,After you remove the one green block there are...,facing north destroy a green block located on ...,q_436,"[q_436, q_321, q_362, q_573, q_514, q_204, q_7...",[After you remove the one green block there ar...
16,in any square west of the red blocks?,Stack seven green blocks immediately to the we...,q_111,"[q_111, q_170, q_925, q_288, q_673, q_573, q_7...","[in any square west of the red blocks?, Where ..."
25,Should I destory east or west puyrple?,Facing north place one purple block to the lef...,q_653,"[q_653, q_170, q_479, q_293, q_467, q_925, q_4...","[Should I destory east or west puyrple?, Where..."
55,Where exactly am I placing the blue blocks?,facing northdelete top 2 purple blocks on Righ...,q_170,"[q_149, q_436, q_170, q_712, q_737, q_576, q_7...","[Which color blocks?, After you remove the one..."
...,...,...,...,...,...
6723,Where should I build the blue blocks after des...,I destroy a all red blocks and then build anot...,q_405,"[q_467, q_280, q_580, q_775, q_569, q_598, q_6...",[There are no yellow or blue blocks. Do you wa...
6736,Would the row of 2 blocks be under the blue st...,Destroy all red blocksfacing north replace the...,q_738,"[q_653, q_170, q_673, q_954, q_580, q_775, q_7...","[Should I destory east or west puyrple?, Where..."
6750,There are only orange blocks not yellow.,Place a yellow block to the west of the bottom...,q_105,"[q_293, q_925, q_280, q_572, q_598, q_604, q_7...",[Should the two purple blocks I'm setting be p...
6766,Left and right as I face which direction?,Facing north place a blue block to the left of...,q_310,"[q_111, q_362, q_960, q_773, q_911, q_603, q_7...","[in any square west of the red blocks?, Where ..."


In [8]:
ranking_train, ranking_dev = train_test_split(ranking_df,test_size=0.15,random_state=42)

In [9]:
ranking_train.to_csv(DATA_PATH / "ranking_train.csv", index=False)
ranking_dev.to_csv(DATA_PATH / "ranking_dev.csv", index=False)