In [33]:
# open json file
import json
with open("start_template.qsf", "r") as f:
    survey = json.load(f)

In [34]:
import json
with open("./qasper_data.json", "r") as f:
    qasper_data = json.load(f)

In [35]:
for k, v in qasper_data.items():
    evidence = v["evidence"]
    answers = v["answers"]
    evidence_HTML = []
    for answer, passages in zip(answers, evidence):
        HTML_string = ""
        for passage in passages:
            if answer in passage:
                passage = passage.replace(answer, "<b>" + answer + "</b>")
                HTML_string += f"<br /><br />{passage}"
            elif answer.lower() in passage.lower():
                temp = passage.lower()
                passage_lower = temp.replace(answer.lower(), "<b>" + answer.lower() + "</b>")
                # passage = passage.replace(answer.lower(), "<b>" + answer.lower() + "</b>")
                HTML_string += f"<br /><br />{passage_lower}"
            elif answer == "Unanswerable":
                HTML_string = "<br />The question does not have an answer based on the given document."
        if HTML_string == "":
            HTML_string += f"<br /><br />{passages[0]}"
        evidence_HTML.append(HTML_string)
        
    qasper_data[k]["evidence_HTMLs"] = evidence_HTML

In [37]:
def task1_Q(element, q_key):
    query = qasper_data[q_key]
    titles = query["titles"]
    urls = query["URLs"]

    element["SecondaryAttribute"] = q_key
    element["Payload"]["QuestionText"] = f"""
    <u>Task 1: Answer the question given the Document.</u><br>
    You are given a set of 4 documents and a question that needs to be answered for each of the documents. 
    Your task is to answer the question based on the document's content. 
    If the answer can not be found in the document then answer with "Unanswerable".
    <br><br>
    <u>Question:</u> {q_key}
    """
    element["Payload"]["DataExportTag"] = "Task1-"+q_key
    element["Payload"]["Choices"] = {k+1 : {
        "Display": f"<a href=\"{url}\" target=\"_blank\" rel=\"noopener noreferrer\">{title}</a>",
        "InputHeight": 29,
        "InputWidth": 470,
        "TextEntry": "on"
    } for k, (title, url) in enumerate(zip(titles, urls))}
    element["Payload"]["ChoiceOrder"] = list(range(1,len(titles)+1))
    element["Payload"]["QuestionDescription"] = q_key
    element["Payload"]["Validation"] = {
        "Settings": {
            "ForceResponse": "ON",
            "ForceResponseType": "ON",
            "Type": "None"
        }
    }


def task2_Q(element, q_key, task1ID):
    query = qasper_data[q_key]
    titles = query["titles"]
    urls = query["URLs"]
    gen_answers = query["answers"]
    evidence_HTMLs = query["evidence_HTMLs"]

    element["SecondaryAttribute"] = q_key
    element["Payload"]["QuestionText"] = f"""
    <u>Task 2: Answer the question given the Document, a generated Answer and the corresponding Evidence.</u><br>
    Same as in Task 1, you are given a set of 4 documents and a question that needs to be answered for each of the documents.
    Now, you are also given an answer generated by a language model and evidence passages, meaning passages from the document in which the answer is found.<br>  
    Your task is, again, to answer the question, based on the document's content now with the help of the generated answer and the evidence passages.<br>
    Only open the document if the answer is incorrect and cannot be found in the supporting evidence.<br>
    If the generated answer is correct then leave it as is in the text box. If the generated answer is incorrect then write the correct answer in the text box.<br>
    If the answer can not be found in the document, answer "Unanswerable".
    <br><br>
    <u>Question:</u> {q_key}
    """
    element["Payload"]["DataExportTag"] = "Task2-"+q_key
    element["Payload"]["Choices"] = {k+1 : {
        "Display": f"<a href=\"{url}\" target=\"_blank\" rel=\"noopener noreferrer\">{title}</a><br><b>Answer: </b>{gen_answer}<br><strong>Evidence:</strong>{evidence_HTML}",
        "InputHeight": 29,
        "InputWidth": 470,
    } for k, (title, url, gen_answer, evidence_HTML) in enumerate(zip(titles, urls, gen_answers, evidence_HTMLs))}
    element["Payload"]["DefaultChoices"] = {k+1: {
        "1": {
            "Value": gen_answer
        }
    } for k, gen_answer in enumerate(gen_answers)}
    element["Payload"]["ChoiceOrder"] = list(range(1,len(titles)+1))
    element["Payload"]["QuestionDescription"] = q_key
    element["Payload"]["DisplayLogic"]["0"]["0"]["QuestionID"] = task1ID
    element["Payload"]["Validation"] = {
        "Settings": {
            "ForceResponse": "ON",
            "ForceResponseType": "ON",
            "Type": "None"
        }
    }
    element["Payload"]["Configuration"]= {
        "ChoiceColumnWidthPixels": 400,
    }
    element["Payload"]["Answers"] = {
        "1": {
            "Display": " "
        }
    }

In [30]:
survey_elements = survey["SurveyElements"]
# # Overwrite the values of the four base questions, two for each task
for i, element in enumerate(survey_elements):
    q_key = "what was the baseline?"

    task1_q1_ID = "QID1"
    task2_q1_ID = "QID3"
    if element["PrimaryAttribute"] == task1_q1_ID:
        default_task1_element = element.copy()
        task1_Q(element, q_key)

    elif element["PrimaryAttribute"] == task2_q1_ID:
        default_task2_element = element.copy()
        task2_Q(element, q_key, task1_q1_ID)

    q_key = "what dataset was used?"

    task1_q2_ID = "QID22"
    task2_q2_ID = "QID23"
    if element["PrimaryAttribute"] == task1_q2_ID:
        task1_Q(element, q_key)

    elif element["PrimaryAttribute"] == task2_q2_ID:
        task2_Q(element, q_key, task1_q2_ID)

In [31]:
# In survey_elements, element where element["Element"] == "BL" is the survey blocks
# then element["Payload"] is the list of blocks where element["Payload"]["0"]
# is the block for task1 and element["Payload"]["2"] is the block for task2
# in a block, for the list block["BlockElements"] append
# {
#     "Type": "Question",
#     "QuestionID": "the question's id",
# }
# for each added question.
# Then also for the list block["Options"]["Randomization"]["Advanced"]["RandomSubSet"]
# append the question's id

survey_elements = survey["SurveyElements"]
survey["SurveyEntry"]["SurveyName"] = "Qasper data"

question_counter = int([e for e in survey_elements if e["Element"] == "QC"][0]["SecondaryAttribute"])

# Create new question elements
for q_key in qasper_data.keys():
    # print(q_key)
    # if q_key in ["8d44bd26", "1aaefbd0"]:
    #     continue
    # Add task1 question
    # element1_id = "QID-T1-"+q_key
    question_counter += 1
    element1_id = f"QID{question_counter}"

    element1 = json.loads(json.dumps(default_task1_element)) # deep copy
    task1_Q(element1, q_key)
    element1["PrimaryAttribute"] = element1_id
    element1["Payload"]["QuestionID"] = element1_id
    length = len(survey["SurveyElements"])
    survey["SurveyElements"].append(element1)
    
    # display(survey["SurveyElements"][length-2])
    # display(survey["SurveyElements"][length])

    # Add task2 question
    # element2_id = "QID-T2-"+q_key
    question_counter += 1
    element2_id = f"QID{question_counter}"
    
    element2 = json.loads(json.dumps(default_task2_element)) # deep copy
    task2_Q(element2, q_key, element1_id)
    element2["PrimaryAttribute"] = element2_id
    element2["Payload"]["QuestionID"] = element2_id
    survey["SurveyElements"].append(element2)

    # Update blocks
    blocks = [e for e in survey["SurveyElements"] if e["Element"]=="BL"][0]
    blocks["Payload"]["0"]["BlockElements"].append({
        "Type": "Question",
        "QuestionID": element1_id,
    })
    blocks["Payload"]["2"]["BlockElements"].append({
        "Type": "Question",
        "QuestionID": element2_id,
    })
    blocks["Payload"]["0"]["Options"]["Randomization"]["Advanced"]["RandomSubSet"].append(element1_id)
    blocks["Payload"]["2"]["Options"]["Randomization"]["Advanced"]["RandomSubSet"].append(element2_id)

In [32]:
#save json
with open("productivity_experiment_qasper_data.qsf", "w") as f:
    json.dump(survey, f, indent=4)