In [1]:
import datasets
import pandas as pd

human_data  = datasets.load_dataset("boda/review_evaluation_human_annotation", name="combined_main_aspects", split="full").to_pandas()
synthetic_data_train = datasets.load_dataset("boda/review_evaluation_automatic_labels", name="all", split="train").to_pandas()
synthetic_data_test = datasets.load_dataset("boda/review_evaluation_automatic_labels", name="all", split="test").to_pandas()



In [7]:
# Remove the column '__index_level_0__' from all datasets if it exists
for df in [human_data, synthetic_data_train, synthetic_data_test]:
    if '__index_level_0__' in df.columns:
        df.drop(columns=['__index_level_0__'], inplace=True)
aspects = [ 'actionability', 'grounding_specificity', 'verifiability', 'helpfulness']


# Update label types in human_data
label_type_mapping = {
    "gold": "FullAgreement",
    "silver": "MajorityAgreement",
    "hard": "LowAgreement"
}

for aspect in aspects:
    label_type_column = f"{aspect}_label_type"
    label_column = f"{aspect}_label"
    human_data[label_type_column] = human_data[label_type_column].replace(label_type_mapping)
    if label_type_column in human_data.columns and label_column in human_data.columns:
        human_data[label_column] = human_data.apply(
            lambda row: None if row[label_type_column] == "LowAgreement" else row[label_column],
            axis=1
        )


# Replace annotator names with X, Y, and Z for each aspect column in human_data
for aspect in aspects:
    if aspect in human_data.columns:
        human_data[aspect] = human_data[aspect].apply(
            lambda x: {'annotators': ['X', 'Y', 'Z'], 'labels': x.get('labels', [])} if isinstance(x, dict) and 'annotators' in x else x
        )


# Save human_data to an Excel file
human_data.to_excel("../data/final_data/human_data.xlsx", index=False)
with pd.ExcelWriter("../data/final_data/synthetic_data.xlsx") as writer:
    synthetic_data_train.to_excel(writer, sheet_name="Train", index=False)
    synthetic_data_test.to_excel(writer, sheet_name="Test", index=False)

In [8]:
human_data

Unnamed: 0,review_point,paper_id,venue,focused_review,batch,actionability,actionability_label,actionability_label_type,id,grounding_specificity,grounding_specificity_label,grounding_specificity_label_type,verifiability,verifiability_label,verifiability_label_type,helpfulness,helpfulness_label,helpfulness_label_type
0,"- Also, since the dataset is artificially crea...",ARR_2022_236_review,ARR_2022,"- My main criticism is that the ""mismatched"" i...",2,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['3'...",3,MajorityAgreement,26,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['4'...",4,MajorityAgreement,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['4'...",,LowAgreement,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['3'...",3,MajorityAgreement
1,1) The paper does not dig into the theory prof...,ACL_2017_554_review,ACL_2017,1) The paper does not dig into the theory prof...,2,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['2'...",2,MajorityAgreement,27,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['2'...",,LowAgreement,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['2'...",2,MajorityAgreement,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['2'...",2,MajorityAgreement
2,- 261&272: any reason you did not consider the...,ACL_2017_516_review,ACL_2017,Missing related work on anchor words Evaluatio...,2,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['5'...",5,MajorityAgreement,28,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['5'...",5,FullAgreement,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['5'...",,LowAgreement,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['5'...",5,FullAgreement
3,3) The description of HIERENC is unclear. From...,ACL_2017_588_review,ACL_2017,and the evaluation leaves some questions unans...,2,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['5'...",,LowAgreement,29,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['5'...",4,MajorityAgreement,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['5'...",5,MajorityAgreement,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['5'...",,LowAgreement
4,- You mention that you only select 10 answers ...,ARR_2022_23_review,ARR_2022,The technical novelty is rather lacking. Altho...,2,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['5'...",5,MajorityAgreement,30,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['5'...",5,MajorityAgreement,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['5'...",X,MajorityAgreement,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['5'...",5,MajorityAgreement
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1425,* Originality The most closely related work of...,NIPS_2017_382,NIPS_2017,weakness that there is much tuning and other s...,10,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['4'...",5,MajorityAgreement,1530,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['3'...",5,MajorityAgreement,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['5'...",5,FullAgreement,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['5'...",5,FullAgreement
1426,* Including a comparison to one of the methods...,bWXIut4pNM,EMNLP_2023,There are 3 potential changes that would impro...,10,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['5'...",5,FullAgreement,1531,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['5'...",5,FullAgreement,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['5'...",5,MajorityAgreement,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['5'...",5,MajorityAgreement
1427,1. The time complexity of the learning algorit...,NIPS_2016_95,NIPS_2016,1. The time complexity of the learning algorit...,10,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['5'...",5,FullAgreement,1532,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['3'...",3,MajorityAgreement,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['5'...",5,FullAgreement,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['5'...",5,MajorityAgreement
1428,"4) for the third point of definition one, is t...",NIPS_2016_537,NIPS_2016,weakness of the paper is the lack of clarity i...,10,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['5'...",5,MajorityAgreement,1533,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['5'...",5,MajorityAgreement,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['X'...",5,MajorityAgreement,"{'annotators': ['X', 'Y', 'Z'], 'labels': ['5'...",5,MajorityAgreement
