In [82]:
from datasets import load_dataset
import pandas as pd
import os
import json

In [80]:
repository_directory = os.path.abspath('') + "/repository"
models_jsons = os.listdir(repository_directory)

In [84]:
dataset_model_dict = {}

for model_file in models_jsons:
    with open(repository_directory + "/" + model_file) as model_json:
        data = json.load(model_json)
        for dataset in data['dataset']:
            if dataset not in dataset_model_dict:
                dataset_model_dict[dataset] = []
            
            dataset_model_dict[dataset].append(data['model_name'])

In [85]:
dataset_model_dict

{'squad_v2': ['mrm8488/longformer-base-4096-finetuned-squadv2',
  'allenai/unifiedqa-t5-base',
  'ixa-ehu/SciBERT-SQuAD-QuAC'],
 'hotpot_qa': ['AdapterHub/roberta-base-pf-hotpotqa'],
 'cuad': ['Rakib/roberta-base-on-cuad', 'akdeniz27/deberta-v2-xlarge-cuad'],
 'trivia_qa': ['allenai/longformer-large-4096-finetuned-triviaqa'],
 'squad': ['ozcangundes/T5-base-for-BioQA',
  'MaRiOrOsSi/t5-base-finetuned-question-answering',
  'vanadhi/roberta-base-fiqa-flm-sq-flit'],
 'BeIR/bioasq-generated-queries': ['ozcangundes/T5-base-for-BioQA'],
 'duorc': ['MaRiOrOsSi/t5-base-finetuned-question-answering',
  'MaRiOrOsSi/t5-base-finetuned-question-answering'],
 'pubmed_qa': ['razent/SciFive-base-Pubmed_PMC', 'microsoft/biogpt'],
 'zhengyun21/PMC-Patients': ['razent/SciFive-base-Pubmed_PMC'],
 'boolq': ['allenai/unifiedqa-t5-base'],
 'race': ['allenai/unifiedqa-t5-base'],
 'quoref': ['allenai/unifiedqa-t5-base'],
 'ropes': ['allenai/unifiedqa-t5-base'],
 'drop': ['allenai/unifiedqa-t5-base'],
 'sagnik

In [57]:
def sample_rows_from_dataset(dataset: str,
                             column_names: tuple,
                             *args,
                             num_samples: int = 250,
                             seed: int = 42,
                             **kwargs) -> pd.DataFrame:    
    if not isinstance(column_names, tuple):
        raise Exception("Column names need to be a list of column names as strings.")
    try:
        dataset = load_dataset(dataset, *args, split="test")
    except Exception as e:
        print("Could NOT load dataset for {0}".format(dataset))
        raise Exception("Error while loading dataset {}".format(e))
    shuffled_dataset = dataset.shuffle(seed=seed)
    df = pd.DataFrame(shuffled_dataset[:num_samples])
    try:
        return df[list(column_names)]
    except KeyError as e:
        raise e
    
    
def sample_rows_from_dataset(dataset: str,
                             column_names: tuple,
                             *args,
                             num_samples: int = 3000,
                             seed: int = 42,
                             **kwargs) -> pd.DataFrame:
    if not isinstance(column_names, tuple):
        raise Exception("Column names need to be a list of column names as strings.")
    try:
        dataset = load_dataset(dataset, *args, **kwargs)
    except Exception as e:
        print("Could NOT load dataset for {0}".format(dataset))
        raise Exception("Error while loading dataset {}".format(e))
    shuffled_dataset = dataset.shuffle(seed=seed)
    df = pd.DataFrame(shuffled_dataset[:num_samples])
    try:
        return df[list(column_names)]
    except KeyError as e:
        raise e

### Squad Dataset

In [26]:
dataset_name = "squad"
configs = None
column_tuple = ("question", "context", "answers")

squad_qa_dataset = sample_rows_from_dataset(dataset_name, column_tuple, split="validation")

In [27]:
answers = []

for i in range(len(squad_qa_dataset)):
    curr_ans_list = squad_qa_dataset['answers'][i]['text']
    curr_ans = max(curr_ans_list, key = len)
    answers.append(curr_ans)
    
squad_qa_dataset['answers'] = answers

### Pubmed Biology Dataset

In [32]:
dataset_name = "pubmed_qa"
config = "pqa_labeled"
column_tuple = ("question", "context", "long_answer")

pubmed_qa_dataset = sample_rows_from_dataset(dataset_name, column_tuple, config, split="train")

contexts_strings = []

for i in range(len(pubmed_qa_dataset)):
    contexts_strings.append(' '.join(pubmed_qa_dataset["context"][i]['contexts']))
    
pubmed_qa_dataset['context'] = contexts_strings
pubmed_qa_dataset = pubmed_qa_dataset.rename(columns={"long_answer": "answers"})

### BioASQ dataset

In [30]:
dataset_name = "BeIR/bioasq-generated-queries"
column_tuple = ("text", "query")

bioasq_qa_dataset = sample_rows_from_dataset(dataset_name, column_tuple, split="train")
bioasq_qa_dataset = bioasq_qa_dataset.rename(columns={"text": "context", "query": "question"})
bioasq_qa_dataset = bioasq_qa_dataset[["question", "context"]]

Downloading readme: 100%|██████████| 14.0k/14.0k [00:00<00:00, 40.1MB/s]
Downloading data: 100%|██████████| 7.12G/7.12G [09:22<00:00, 12.7MB/s]
Downloading data files: 100%|██████████| 1/1 [09:22<00:00, 562.45s/it]
Extracting data files: 100%|██████████| 1/1 [01:16<00:00, 76.97s/it]
Generating train split: 14100000 examples [01:29, 158196.47 examples/s]


### cuad (legal) dataset

In [58]:
dataset_name = "cuad"
column_tuple = ("question", "context", "answers")

cuad_qa_dataset = sample_rows_from_dataset(dataset_name, column_tuple, split="train")

In [59]:
answers = []

for i in range(len(cuad_qa_dataset)):
    curr_ans_list = cuad_qa_dataset['answers'][i]['text']
    if len(curr_ans_list)!=0:
        curr_ans = max(curr_ans_list, key = len)
    else:
        curr_ans = ""
    answers.append(curr_ans)
    
cuad_qa_dataset['answers'] = answers

In [62]:
cuad_qa_dataset = cuad_qa_dataset[cuad_qa_dataset["answers"]!=""][:250]

### Combining datasets

In [65]:
cuad_qa_dataset["domain"] = "legal"
#bioasq_qa_dataset["domain"] = "bio"
pubmed_qa_dataset["domain"] = "bio"
squad_qa_dataset["domain"] = "None"

In [95]:
dataset_model_dict['pubmed_qa']

['razent/SciFive-base-Pubmed_PMC', 'microsoft/biogpt']

In [105]:
cuad_qa_dataset['models'] = cuad_qa_dataset['models'].apply(lambda x: dataset_model_dict['cuad'])
pubmed_qa_dataset['models'] = pubmed_qa_dataset['models'].apply(lambda x: dataset_model_dict['pubmed_qa'])
squad_qa_dataset['models'] = squad_qa_dataset['models'].apply(lambda x: dataset_model_dict['squad'])


In [106]:
eval_dataset = pd.concat([cuad_qa_dataset, pubmed_qa_dataset, squad_qa_dataset], ignore_index=True)

In [108]:
eval_dataset.tail(10)

Unnamed: 0,question,context,answers,domain,models
740,What increases student's motivation to learn?,Teachers that exhibit enthusiasm can lead to s...,teacher enthusiasm,,"[ozcangundes/T5-base-for-BioQA, MaRiOrOsSi/t5-..."
741,In 1984 Thomas Murphy contacted Leonard Golden...,"In December 1984, Thomas S. Murphy, chief exec...",Capital Cities Communications,,"[ozcangundes/T5-base-for-BioQA, MaRiOrOsSi/t5-..."
742,How many chloroplasts are in stomatal guard ce...,"In some plants such as cacti, chloroplasts are...",8–15 per cell,,"[ozcangundes/T5-base-for-BioQA, MaRiOrOsSi/t5-..."
743,What physical quantities do not have direction?,Forces act in a particular direction and have ...,denoted scalar quantities,,"[ozcangundes/T5-base-for-BioQA, MaRiOrOsSi/t5-..."
744,Who appoints elders?,"Elders are called by God, affirmed by the chur...",the local church,,"[ozcangundes/T5-base-for-BioQA, MaRiOrOsSi/t5-..."
745,What organization is the IPCC a part of?,The Intergovernmental Panel on Climate Change ...,the United Nations,,"[ozcangundes/T5-base-for-BioQA, MaRiOrOsSi/t5-..."
746,What church in Virginia is maintained by Hugue...,Paul Revere was descended from Huguenot refuge...,Manakin Episcopal Church,,"[ozcangundes/T5-base-for-BioQA, MaRiOrOsSi/t5-..."
747,Where is the oldest pharmacy stated to be loca...,In Europe there are old pharmacies still opera...,"Church of Santa Maria Novella in Florence, Italy",,"[ozcangundes/T5-base-for-BioQA, MaRiOrOsSi/t5-..."
748,What are the three primary expressions used to...,"The best, worst and average case complexity re...","best, worst and average case complexity",,"[ozcangundes/T5-base-for-BioQA, MaRiOrOsSi/t5-..."
749,What are those with lower incomes often unable...,"Firstly, certain costs are difficult to avoid ...",their finances,,"[ozcangundes/T5-base-for-BioQA, MaRiOrOsSi/t5-..."


In [109]:
eval_dataset.to_csv("eval_dataset.csv")