# Survey Evaluation

## Loading packages

In [34]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Set-up

In [35]:
N_SUBJ = 37
DATA_LOC = "survey_data.csv"
pd.options.display.max_colwidth = 10000

## Dictionaries with the right answers

In [36]:
# Yes-no questions: simple check
# Problem-solving: manual
# Cloze test: automatic + manual

def construct_yes_no_correct():
    answers = dict()

    with open("answers/VB_yesno.txt") as ansfile:
        ans = ansfile.readlines()
        ans = [a.strip() for a in ans]
        answers["VB"] = ans

    with open("answers/FE_yesno.txt") as ansfile:
        ans = ansfile.readlines()
        ans = [a.strip() for a in ans]
        answers["FE"] = ans

    return answers

def construct_cloze_correct():
    answers = {"VB": [], "FE": []}

    with open("answers/VB_cloze.txt") as ansfile:
        VB_answers = ansfile.readlines()
        for q in VB_answers:
            ans = q.split(",")
            ans = [a.strip() for a in ans]
            answers["VB"].append(ans)

    with open("answers/FE_cloze.txt") as ansfile:
        FE_answers = ansfile.readlines()
        for q in FE_answers:
            ans = q.split(",")
            ans = [a.strip() for a in ans]
            answers["FE"].append(ans)

    return answers

## Loading file

In [37]:
def load_data():
    survey_data = pd.read_csv(DATA_LOC)
    subjects_data = survey_data.iloc[-N_SUBJ:]
    return subjects_data

## Extracting columns

In [38]:
def extract_answers(data):
    yes_no_answers = dict()
    yes_no_answers["VB"] = data["VB_C1":"VB_C12"]
    yes_no_answers["FE"] = data["FAC_1":"FAC_12"]

    problem_solving_answers = dict()
    problem_solving_answers["VB"] = data["VB_P1":"VB_P5"]
    problem_solving_answers["FE"] = data["FE_P1":"FE_P5"]

    cloze_answers = dict()
    cloze_answers["VB"] = data["Q86"]
    cloze_answers["FE"] = data["Q88"]

    return yes_no_answers, problem_solving_answers, cloze_answers

## Extracting cloze test answers

In [39]:
def extract_cloze_answers(cloze_answers):
    answers = {"VB": [], "FE": []}
    try:
        answers["VB"] = cloze_answers["VB"].split(";")
    except:
        answers["VB"] = []
        print("[WARNING] NaN or other non-string entry detected")

    try:
        answers["FE"] = cloze_answers["FE"].split(";")
    except:
        answers["FE"] = []
        print("[WARNING] NaN or other non-string entry detected")
    
    answers["VB"] = answers["VB"][:-1]
    answers["FE"] = answers["FE"][:-1]

    return answers

## Checking answers

In [40]:
def check_yes_no(correct, answers):
    scores = {"VB": [], "FE": []}

    for i in range(len(correct["VB"])):
        if correct["VB"][i] == "u":
            scores["VB"].append(1)
        elif answers["VB"][i].lower() == correct["VB"][i].lower():
            scores["VB"].append(1)
        else:
            scores["VB"].append(0)

    for i in range(len(correct["FE"])):
        if correct["FE"][i] == "u":
            scores["FE"].append(1)
        elif answers["FE"][i].lower() == correct["FE"][i].lower():
            scores["FE"].append(1)
        else:
            scores["FE"].append(0)

    return scores

def check_cloze(correct, answers):
    scores = {"VB": [], "FE": []}

    if len(answers["VB"]) == 0 or len(answers["FE"]) == 0:
        return {"VB": [np.nan], "FE": [np.nan]}

    for i in range(len(correct["VB"])):
        if str(answers["VB"][i]).lower() in correct["VB"][i]:
            scores["VB"].append(1)
        elif str(answers["VB"][i]).lower() == '':
            scores["VB"].append(0)
        else:
            scores["VB"].append(answers["VB"][i])

    for i in range(len(correct["FE"])):
        if str(answers["FE"][i]).lower() in correct["FE"][i]:
            scores["FE"].append(1)
        elif str(answers["FE"][i]).lower() == '':
            scores["FE"].append(0)
        else:
            scores["FE"].append(answers["FE"][i])
            
    return scores

## Determine condition
This function determines whether a participant is control or treatment, and in what order they received the models.

- FL_71 -> control; FL_81 -> treatment;
- FL_51|FL_66 -> VB|FE (control);
- FL_59|FL_75 -> VB|FE (treatment);

In [41]:
def determine_condition(ptc):
    condition = ""
    order = []
    if ptc["FL_87_DO"] == "FL_72":
        condition = "control"
        if ptc["FL_72_DO"] == "FL_51|FL_66":
            order = ["VB", "FE"]
        elif ptc["FL_72_DO"] == "FL_66|FL_51":
            order = ["FE", "VB"]
    elif ptc["FL_87_DO"] == "FL_81":
        condition = "treatment"
        if ptc["FL_81_DO"] == "FL_59|FL_75":
            order = ["VB", "FE"]
        elif ptc["FL_81_DO"] == "FL_75|FL_59":
            order = ["FE", "VB"]

    return condition, order

## Evaluation

In [42]:
# Loading the data
data = load_data()

# Constructing the correct answers
yes_no_correct = construct_yes_no_correct()
cloze_correct = construct_cloze_correct()

# Dataframe in which results will be stored
results = pd.DataFrame(columns = ["condition", "order", "VB-yes-no", "VB-problem-solving", "VB-cloze", "FE-yes-no", "FE-problem-solving", "FE-cloze"],
                        index = range(1, N_SUBJ+1))

for i in data.index:
    # Determine participant
    part = i - 23
    print("[INFO] Processing participant {}, index {}...".format(part, i))

    # Determine condition of participant
    cond, order = determine_condition(data.loc[i])

    # Extract answers from participant
    yes_no_answers, problem_solving_answers, cloze_answers = extract_answers(data.loc[i])
    cloze_answers = extract_cloze_answers(cloze_answers)

    # Compute scores from extracted answers
    score_yn = check_yes_no(yes_no_correct, yes_no_answers)
    score_cloze = check_cloze(cloze_correct, cloze_answers)  
    
    # Insert scores in dataframe
    results.iloc[part] = [cond, order, score_yn["VB"], problem_solving_answers["VB"], score_cloze["VB"], score_yn["FE"], problem_solving_answers["FE"], score_cloze["FE"]]

# Write results to CSV
results.to_csv("results.csv")

data

[INFO] Processing participant 0, index 23...
[INFO] Processing participant 1, index 24...
[INFO] Processing participant 2, index 25...
[INFO] Processing participant 3, index 26...
[INFO] Processing participant 4, index 27...
[INFO] Processing participant 5, index 28...
[INFO] Processing participant 6, index 29...
[INFO] Processing participant 7, index 30...
[INFO] Processing participant 8, index 31...
[INFO] Processing participant 9, index 32...
[INFO] Processing participant 10, index 33...
[INFO] Processing participant 11, index 34...
[INFO] Processing participant 12, index 35...
[INFO] Processing participant 13, index 36...
[INFO] Processing participant 14, index 37...
[INFO] Processing participant 15, index 38...
[INFO] Processing participant 16, index 39...
[INFO] Processing participant 17, index 40...
[INFO] Processing participant 18, index 41...
[INFO] Processing participant 19, index 42...
[INFO] Processing participant 20, index 43...
[INFO] Processing participant 21, index 44..

Unnamed: 0,StartDate,EndDate,Status,IPAddress,Progress,Duration (in seconds),Finished,RecordedDate,ResponseId,RecipientLastName,...,Use2R,Load1,ENG1,ENG2,ENG3,VB_P2 - Parent Topics,VB_P2 - Topics,FL_87_DO,FL_72_DO,FL_81_DO
23,2021-09-22 00:48:23,2021-09-22 04:01:52,IP Address,137.56.58.245,100,11609,True,2021-09-22 04:01:52,R_2EmayCZt6OCt46k,,...,Agree,Strongly disagree,,,,,,FL_72,FL_51|FL_66,
24,2021-09-22 06:19:10,2021-09-22 08:27:51,IP Address,137.56.58.245,100,7721,True,2021-09-22 08:27:51,R_3KUTAsxEzeuetAQ,,...,Somewhat agree,Disagree,,,,,,FL_81,,FL_59|FL_75
25,2021-09-24 02:03:08,2021-09-24 04:07:38,IP Address,137.56.58.245,100,7469,True,2021-09-24 04:07:38,R_3EEm8DzR9sDSbAt,,...,Disagree,Somewhat disagree,,,,,,FL_72,FL_66|FL_51,
26,2021-09-24 06:04:10,2021-09-24 08:06:17,IP Address,137.56.58.245,100,7327,True,2021-09-24 08:06:18,R_1hQDyuqZZklp9YG,,...,Disagree,Neither agree nor disagree,,,,,,FL_81,,FL_75|FL_59
27,2021-09-29 02:17:32,2021-09-29 04:11:30,IP Address,137.56.58.245,100,6837,True,2021-09-29 04:11:30,R_3MR1rvBA5dOWJ1f,,...,Agree,Strongly agree,Neither agree nor disagree,Somewhat agree,Agree,,,FL_81,,FL_75|FL_59
28,2021-09-29 06:21:09,2021-09-29 08:05:58,IP Address,137.56.58.245,100,6288,True,2021-09-29 08:05:58,R_2YlCxizzahmaNMx,,...,Disagree,Somewhat agree,Agree,Agree,Strongly agree,,,FL_72,FL_51|FL_66,
29,2021-10-01 02:09:15,2021-10-01 04:15:54,IP Address,137.56.57.1,100,7598,True,2021-10-01 04:15:54,R_2qDyygPZXyeS5cU,,...,Agree,Somewhat disagree,Somewhat disagree,Somewhat agree,Strongly agree,,,FL_81,,FL_59|FL_75
30,2021-10-01 06:11:08,2021-10-01 07:48:22,IP Address,137.56.57.1,100,5833,True,2021-10-01 07:48:23,R_1252ZcNymsScjk7,,...,Disagree,Disagree,Strongly agree,Strongly agree,Strongly agree,,,FL_72,FL_51|FL_66,
31,2021-10-06 02:11:00,2021-10-06 03:59:51,IP Address,137.56.57.1,100,6530,True,2021-10-06 03:59:51,R_23TwN233GOZK8N4,,...,Neither agree nor disagree,Disagree,Agree,Agree,Agree,,,FL_72,FL_51|FL_66,
32,2021-10-06 06:07:56,2021-10-06 07:48:31,IP Address,137.56.57.1,100,6035,True,2021-10-06 07:48:32,R_3gLt44F2fd14hjQ,,...,Neither agree nor disagree,Somewhat disagree,Neither agree nor disagree,Somewhat agree,Agree,,,FL_81,,FL_59|FL_75
