# User Rep Scores
- Goal: output a csv for each user in the datahunt, with each row corresponding to a single answer
- only looking at the parent questions for now
- so far this only works for language, because the schemas are not updated and I didn't wanna go through and identify which questions were "parent questions" for every task
- also the ordinal questions are not in number form so I had to manually convert them (ex. T1.Q12)




In [13]:
import numpy as np
import pandas as pd
import scipy as sp
from scipy import stats

In [14]:
#array of questions that are "parent" questions
language_parents = ["T1.Q1", "T1.Q12"]

#corresponding list of question types, language_parents[n] maps to language_parent_types[n]
language_parents_types = ["select_all", "ordinal"]

- T1.Q1: Select problems with the language #select_all
- T1.Q12: Does the bolded passage contain a particularly neutral, circumspect, or measured account of a frequently heated issue? If so, please offer them kudos. #ordinal

In [15]:
#read csv into df, set parent_df to the relevant columns of the parent questions
df = pd.read_csv("BETA_Language-2020-03-01T0224-DataHunt.csv")
df = df[["contributor_uuid", "quiz_task_uuid", "question_label", 
         "answer_label", "answer_text", "quiz_taskrun_uuid", "finish_time"]]

parent_df = df.loc[df['question_label'].isin(language_parents)]

In [16]:
#adds a column with the question_type of the question_label
question_types = []
for i in parent_df['question_label']:
    question_type = language_parents_types[language_parents.index(i)]
    question_types.append(question_type)
        
parent_df.insert(2, "question_type", question_types, True)
parent_df

Unnamed: 0,contributor_uuid,quiz_task_uuid,question_type,question_label,answer_label,answer_text,quiz_taskrun_uuid,finish_time
0,f9143626-bfe0-4e69-b652-6d1525ab4eb0,a25ed7d6-4f1b-40a4-9f72-54e13e5658eb,select_all,T1.Q1,T1.Q1.A2,Exaggeration,834a1f2e-ec75-48b3-976c-f8afc38601fb,2019-09-04 22:50:32.477936
1,f9143626-bfe0-4e69-b652-6d1525ab4eb0,a25ed7d6-4f1b-40a4-9f72-54e13e5658eb,select_all,T1.Q1,T1.Q1.A2,Exaggeration,834a1f2e-ec75-48b3-976c-f8afc38601fb,2019-09-04 22:50:32.477936
2,f9143626-bfe0-4e69-b652-6d1525ab4eb0,a25ed7d6-4f1b-40a4-9f72-54e13e5658eb,select_all,T1.Q1,T1.Q1.A5,Shock or surprise,834a1f2e-ec75-48b3-976c-f8afc38601fb,2019-09-04 22:50:32.477936
3,f9143626-bfe0-4e69-b652-6d1525ab4eb0,a25ed7d6-4f1b-40a4-9f72-54e13e5658eb,select_all,T1.Q1,T1.Q1.A6,Slang,834a1f2e-ec75-48b3-976c-f8afc38601fb,2019-09-04 22:50:32.477936
4,f9143626-bfe0-4e69-b652-6d1525ab4eb0,a25ed7d6-4f1b-40a4-9f72-54e13e5658eb,select_all,T1.Q1,T1.Q1.A12,Other problem with language,834a1f2e-ec75-48b3-976c-f8afc38601fb,2019-09-04 22:50:32.477936
...,...,...,...,...,...,...,...,...
3281,3c97d749-8996-4a96-97af-73ecdc5a4716,c251992c-ad2b-4586-bc70-f98f62f8b402,ordinal,T1.Q12,T1.Q12.A2,Good job!,a87ae1c1-7019-4ef8-b33a-d2bd22d689d9,2019-09-21 00:52:12.951237
3284,a31c365c-986b-4dbe-b752-bbb5986cc7f2,c251992c-ad2b-4586-bc70-f98f62f8b402,select_all,T1.Q1,T1.Q1.A7,Typos,fbc4942e-379d-4726-b6be-629f28208278,2019-09-28 08:03:12.014527
3286,a31c365c-986b-4dbe-b752-bbb5986cc7f2,c251992c-ad2b-4586-bc70-f98f62f8b402,ordinal,T1.Q12,T1.Q12.A1,Nope,fbc4942e-379d-4726-b6be-629f28208278,2019-09-28 08:03:12.014527
3289,0e4b8a3e-2714-4b5f-ba10-bb576aaa5cc6,c251992c-ad2b-4586-bc70-f98f62f8b402,select_all,T1.Q1,T1.Q1.A7,Typos,98149bd8-6a20-4fb2-adf5-bbd59af3f7f3,2019-10-02 23:08:42.876182


"Select All That Apply" Questions

In [17]:
#selected is one answer_label, correct is array of correct answer_labels
def selectAllQ(selected, correct):
    if selected in correct:
        return 1
    else:
        return 0
    

"Select One" Questions

In [18]:
def selectOneQ(selected, correct):
    if selected == correct:
        return 1
    return 0

In [19]:
selectOneQ(1, 2)

0

Ordinal Data Questions

In [20]:
def q12_to_int(selection):
    if selection == "T1.Q12.A1":
        return 1
    elif selection == "T1.Q12.A2":
        return 2
    elif selection == "T1.Q12.A3":
        return 3
    else:
        return 4

v_q12_to_int = np.vectorize(q12_to_int)

In [9]:
#selected is an answer_label, question is a question_label, both strings
def ordinalDataQ(selected, question):
    data = parent_df[parent_df['question_label'].str.match(question)]
    data = data['answer_label'].values
    #array of 1,2,3,4s
    data_int = v_q12_to_int(data)
    selected_int = q12_to_int(selected)
    indexOfSelected = np.where(data_int == selected_int)[0][0]
    zscore = sp.stats.zscore(data_int)[indexOfSelected]
    return 1 - abs(zscore)/3.4 #is there better way to scale the data from 0 to 1

In [10]:
user_score_dict = {}
test_dict = {}
for index, row in parent_df.iterrows():
    if row['contributor_uuid'] not in user_score_dict:
        user_score_dict[row['contributor_uuid']] = pd.DataFrame(columns=['quiz_task_uuid', 'quiz_taskrun_uuid', 'question_label', 'question_type', 'answer_label', 'answer_score', 'time_stamp'])
    if row['question_type'] == "select_all":
        score = selectAllQ(row['answer_label'], ['T1.Q1.A2', 'T1.Q1.A5', 'T1.Q1.A6'])
    elif row['question_type'] == "select_one":
        score = selectOneQ(row['answer_label'], 'T1.Q1.A2')
    else:
        score = ordinalDataQ(row['answer_label'], row['question_label'])
    user_score_dict[row['contributor_uuid']] = user_score_dict[row['contributor_uuid']].append({'quiz_task_uuid': row['quiz_task_uuid'], 'quiz_taskrun_uuid': row['quiz_taskrun_uuid'], 'question_label': row['question_label'], 'question_type': row['question_type'], 'answer_label': row['answer_label'], 'answer_score': score, 'time_stamp': row['finish_time']}, ignore_index=True)
        

        
    
    

In [21]:
#example output csv for a given contributor_uuid
user_score_dict['f9143626-bfe0-4e69-b652-6d1525ab4eb0']

Unnamed: 0,quiz_task_uuid,quiz_taskrun_uuid,question_label,question_type,answer_label,answer_score,time_stamp
0,a25ed7d6-4f1b-40a4-9f72-54e13e5658eb,834a1f2e-ec75-48b3-976c-f8afc38601fb,T1.Q1,select_all,T1.Q1.A2,1,2019-09-04 22:50:32.477936
1,a25ed7d6-4f1b-40a4-9f72-54e13e5658eb,834a1f2e-ec75-48b3-976c-f8afc38601fb,T1.Q1,select_all,T1.Q1.A2,1,2019-09-04 22:50:32.477936
2,a25ed7d6-4f1b-40a4-9f72-54e13e5658eb,834a1f2e-ec75-48b3-976c-f8afc38601fb,T1.Q1,select_all,T1.Q1.A5,1,2019-09-04 22:50:32.477936
3,a25ed7d6-4f1b-40a4-9f72-54e13e5658eb,834a1f2e-ec75-48b3-976c-f8afc38601fb,T1.Q1,select_all,T1.Q1.A6,1,2019-09-04 22:50:32.477936
4,a25ed7d6-4f1b-40a4-9f72-54e13e5658eb,834a1f2e-ec75-48b3-976c-f8afc38601fb,T1.Q1,select_all,T1.Q1.A12,0,2019-09-04 22:50:32.477936
...,...,...,...,...,...,...,...
58,156e8426-0563-41af-95b1-5e32f270aa1d,408085cb-7454-474f-ad50-111b9d124484,T1.Q1,select_all,T1.Q1.A3,0,2019-09-14 01:00:04.461087
59,156e8426-0563-41af-95b1-5e32f270aa1d,408085cb-7454-474f-ad50-111b9d124484,T1.Q1,select_all,T1.Q1.A4,0,2019-09-14 01:00:04.461087
60,156e8426-0563-41af-95b1-5e32f270aa1d,408085cb-7454-474f-ad50-111b9d124484,T1.Q1,select_all,T1.Q1.A4,0,2019-09-14 01:00:04.461087
61,156e8426-0563-41af-95b1-5e32f270aa1d,408085cb-7454-474f-ad50-111b9d124484,T1.Q1,select_all,T1.Q1.A6,1,2019-09-14 01:00:04.461087
