# User Rep Score

In [115]:
import numpy as np
import pandas as pd
import re

## Loading Data

In [116]:
# Define a helper functions
def set_sum(series):
    ret = set()
    for s in series:
        ret = ret.union(s)
    return ret

def into_set(series):
    ret = set()
    for s in series:
        ret.add(s)
    return ret


#Load in the data, process two consensus answers
data = pd.read_csv("IAA-consensus/evidence_datahunt_with_consensus.csv")
data['adj_consensus'] = data['adj_consensus'].str.replace('\'', '').str.replace('[', '').str.replace(']', '').str.split(", ").apply(lambda x: set(x))
data['iaa_consensus'] = data['iaa_consensus'].str.replace('\'', '').str.replace('[', '').str.replace(']', '').str.split(", ").apply(lambda x: set(x))


#Reviewers' current reputation score
old_rep_scores = pd.read_csv("./User Rep Score/score.csv").set_index('contributor_uuid')
old_rep_scores = {user: old_rep_scores.loc[user, "score"] if user in old_rep_scores.index else 0.5 
              for user in data["contributor_uuid"].unique()}
rep_scores = {user:0 for user in old_rep_scores.keys()}

#Produce pivot tables we need for calculating consensus
adj_table = pd.pivot_table(data, values='adj_consensus', index='article_number', columns='question_label', aggfunc=set_sum)
iaa_table = pd.pivot_table(data, values='iaa_consensus', index='article_number', columns='question_label', aggfunc=set_sum)

## Update User Rep Score With Adjacent Consensus

In [101]:
df = pd.pivot_table(data,
                    values='answer_label', 
                    index='contributor_uuid', 
                    columns='question_label', 
                    aggfunc=into_set)
for user in df.index:
    review = df.loc[df.index == user, :]
    n_adj_answers = 0
    n_correct = 0
    for question_label in df.columns:
        if question_label in adj_table.columns and len(adj_table[question_label].iloc[0]) > 0:
            n_adj_answers += 1
            adj_ans = adj_table[question_label].iloc[0]
            user_ans = review.loc[:, question_label].iloc[0]
            if type(user_ans) is set and len(user_ans.intersection(adj_ans)) > 0:
                n_correct += 1
    rep_scores[user] = n_correct/n_adj_answers
                
for user in rep_scores.keys():
    rep_scores[user] = 0.5 * rep_scores[user] + 0.5 * old_rep_scores[user]

## Update User Rep Score With IAA Consensus

In [102]:
old_rep_scores = rep_scores
rep_scores = {user:[] for user in old_rep_scores.keys()}

df = pd.pivot_table(data,
                    values='answer_label', 
                    index='contributor_uuid', 
                    columns='question_label', 
                    aggfunc=into_set)
for user in df.index:
    review = df.loc[df.index == user, :]
    n_iaa_answers = 0
    n_correct = 0
    for question_label in df.columns:
        if question_label in iaa_table.columns and len(iaa_table[question_label].iloc[0]) > 0:
            n_iaa_answers += 1
            iaa_ans = iaa_table[question_label].iloc[0]
            user_ans = review.loc[:, question_label].iloc[0]
            if type(user_ans) is set and len(user_ans.intersection(iaa_ans)) > 0:
                n_correct += 1
    rep_scores[user].append(n_correct/n_iaa_answers)
                
for user in rep_scores.keys():
    rep_scores[user] = 0.3 * sum(rep_scores[user])/len(rep_scores[user]) + 0.7 * old_rep_scores[user]

In [112]:
#Update the repscore csv file
csv = pd.read_csv("./User Rep Score/score.csv").set_index('contributor_uuid')
for user in rep_scores.keys():
    if user in csv.index:
        csv.loc[user, 'score'] = rep_scores[user]
    else:
        helper_dict = {user:rep_scores[user]}
        helper_df = pd.DataFrame.from_dict(helper_dict, orient="index")
        helper_df.columns = ['score']
        csv = csv.append(helper_df)
csv = csv.reset_index()
# csv.columns = ['contributor_uuid', 'score']
# csv.to_csv("./User Rep Score/score.csv", index=False)