In [None]:
import pandas as pd

datasets = {
    "squad_v2": {
        "url": "hf://datasets/rajpurkar/squad_v2/squad_v2/validation-00000-of-00001.parquet"
    },
    "ai2_arc": {
        "url": "hf://datasets/allenai/ai2_arc/ARC-Challenge/test-00000-of-00001.parquet"
    },
    "boolq": {
        "url": "hf://datasets/google/boolq/data/validation-00000-of-00001.parquet"
    }
}

def clean_data(dataset_name: str, df):
    match dataset_name:
        case "squad_v2":
            df["answers"] = df["answers"].apply(
                lambda x: x["text"][0] if len(x.get("text")) > 0 else ""
            )
            df.rename(
                columns={
                    "answers": "answer",
                },
                inplace=True,
            )
            df.drop(columns="id", inplace=True)
        case "ai2_arc":
            df["choices"] = df["choices"].apply(
                lambda x: " ".join(
                    [f"{label}. {text}" for label, text in zip(x["label"], x["text"])]
                )
            )
            df["question"] = df["question"] + " " + df["choices"]
            df.rename(
                columns={
                    "id": "title",
                    "answerKey": "answer",
                },
                inplace=True,
            )
            df.drop(columns="choices", inplace=True)
        case "boolq":
            df.rename(
                columns={
                    "passage": "context",
                },
                inplace=True,
            )

    df.to_parquet(f"../Datasets/test-{dataset_name}.parquet")

In [17]:
for dataset_name in datasets.keys():
    df = pd.read_parquet(datasets[dataset_name]["url"])
    clean_data(dataset_name, df)

     title                                            context  \
0  Normans  The Normans (Norman: Nourmands; French: Norman...   
1  Normans  The Normans (Norman: Nourmands; French: Norman...   
2  Normans  The Normans (Norman: Nourmands; French: Norman...   
3  Normans  The Normans (Norman: Nourmands; French: Norman...   
4  Normans  The Normans (Norman: Nourmands; French: Norman...   

                                            question  \
0               In what country is Normandy located?   
1                 When were the Normans in Normandy?   
2      From which countries did the Norse originate?   
3                          Who was the Norse leader?   
4  What century did the Normans first gain their ...   

                        answer  
0                       France  
1      10th and 11th centuries  
2  Denmark, Iceland and Norway  
3                        Rollo  
4                 10th century  
               title                                           question an