In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
sns.set()
from collections import Counter

### Read data

In [2]:
raw = pd.read_csv('./binary_helpfulness_mturk_results.csv')

### Flatten data

In [3]:
data = []
for i,row in raw.iterrows():
    for j in range(1, 13):
        query = row['Input.q%d' % j]
        response = row['Input.r%d' % j]
        gender = row['Input.g%d' % j]
        
        if row['Answer.answer%d' % j] is not np.nan:
            answer = row['Answer.answer%d' % j]
        else:
            answer = row['Answer.answer%d.label' % j]
        
        data.append((row['HITId'], j, row['WorkerId'], gender, query, response, answer))

In [4]:
melted = pd.DataFrame(data)
melted.columns = ['HITId', 'q_idx', 'WorkerId', 'gender', 'query', 'response', 'answer']

In [5]:
# include spammers:
include = True
if not include:
    spammers = ['A3A0RP6IUR41PP']
    melted = melted[~melted.WorkerId.isin(spammers)].copy()

In [6]:
data = []
for i, group in melted.groupby(['HITId', 'q_idx']):
    row = []
    row.append(group.iloc[0]['HITId'])
    row.append(group.iloc[0]['gender'])
    row.append(group.iloc[0]['query'])
    row.append(group.iloc[0]['response'])
    for j in range(0,5):
        if j < len(group):
            row.append(group.iloc[j]['WorkerId'])
            row.append(group.iloc[j]['answer'])
    data.append(row)

In [7]:
flat = pd.DataFrame(data)
flat.columns = ['HITId', 'gender', 'query', 'response'] + \
    sum([ ['WorkerId%d' % i, 'answer%d' % i] for i in range(0,5) ], [])

In [8]:
flat.head(1)

Unnamed: 0,HITId,gender,query,response,WorkerId0,answer0,WorkerId1,answer1,WorkerId2,answer2,WorkerId3,answer3,WorkerId4,answer4
0,31YWE12TE0DRJ74BZD6OFEL5Q7O7X4,W,What if you dont have friends or family to tur...,"Well, I guess you could get a psychiatrist or ...",A12JKMVUW5HNKO,Helpful,A1ET2J1PIP0RGO,Helpful,A2ARIPM4X6WZAK,Helpful,AYWWE1UD0XJFB,Helpful,AFIK3VBMMX6G6,Unhelpful


### Collapse labels

In [9]:
flat.answer0.unique()

array(['Helpful', 'Unhelpful'], dtype=object)

In [10]:
def i_no_i(x):
    if 'Response satisfactorily' in x:
        return 'i'
    elif 'provides some' in x:
        return 'i'
    elif 'does not make sense' in x:
        return 'ni'
    elif 'provides no' in x:
        return 'ni'

### Interannotator agreement

In [15]:
order=['Helpful', 'Unhelpful']
ls = []

for i, row in flat.iterrows():
    labels = [ row['answer%d' %i ] for i in range(0, 5) ]
    #labels = list(map(i_no_i, labels))
    unique, counts = np.unique(labels,return_counts=True)
    unique = unique.tolist()
    
    l = []
    for i in order:
        if i in unique:   
            idx = unique.index(i)
            count = counts[idx]
        else:
            count = 0
        l.append(str(count))
        ls.append(l)
    print('\t'.join(l))

4	1
2	3
4	1
5	0
4	1
5	0
3	2
3	2
4	1
3	2
3	2
3	2
4	1
2	3
3	2
3	2
5	0
2	3
1	4
5	0
4	1
3	2
5	0
1	4
1	4
3	2
3	2
4	1
4	1
5	0
4	1
4	1
3	2
4	1
3	2
4	1
2	3
3	2
3	2
2	3
2	3
5	0
5	0
2	3
3	2
3	2
1	4
3	2
4	1
2	3
2	3
2	3
1	4
4	1
2	3
1	4
2	3
5	0
3	2
2	3
5	0
3	2
3	2
2	3
3	2
4	1
2	3
3	2
3	2
5	0
5	0
2	3
3	2
2	3
4	1
4	1
3	2
2	3
2	3
5	0
4	1
3	2
4	1
3	2
3	2
4	1
1	4
1	4
3	2
2	3
4	1
3	2
3	2
1	4
3	2
1	4
4	1
2	3
3	2
5	0
2	3
3	2
3	2
3	2
5	0
3	2
5	0
2	3
3	2
2	3
4	1
2	3
2	3
2	3
4	1
0	5
3	2
2	3
5	0
2	3


In [14]:
# number of near agreements
np.mean([ '3' in i for i in ls ])

0.5666666666666667

collapsed:
54.67%
Kappa: 0.09

### Majority labels

In [32]:
def majority(x, collapse=i_no_i, n=5):
    labels = [ x['answer%d' % i] for i in range(0, n)]
    labels = [ i for i in labels if i ]
    labels = list(map(collapse, labels))
    
    c = Counter(labels)
    commons = c.most_common(2)
    
    if len(commons) > 1 and commons[0][1] == commons[1][1]:
        return 'tie'
    else:
        return commons[0][0]

def near_majority(x, collapse=lambda x: x, n=5):
    labels = [ x['answer%d' % i] for i in range(0, n)]
    labels = [ i for i in labels if i ]
    labels = list(map(collapse, labels))
    
    c = Counter(labels)
    commons = c.most_common(2)
    
    return commons[0][1] == 3

In [33]:
flat['binary_majority'] = flat.apply(majority, axis=1)
flat['near_majority'] = flat.apply(near_majority, axis=1)
flat['majority'] = flat.apply(lambda x: majority(x, collapse=lambda x:x), axis=1)

### Analysis

In [34]:
for i, group in flat.groupby('gender'):
    print(i)
    print(group.majority.value_counts(normalize=True))

M
Helpful      0.666667
Unhelpful    0.333333
Name: majority, dtype: float64
W
Helpful      0.666667
Unhelpful    0.333333
Name: majority, dtype: float64


In [35]:
for i, group in flat.groupby('gender'):
    print(i)
    print(group.binary_majority.value_counts(normalize=False))

M
Series([], Name: binary_majority, dtype: int64)
W
Series([], Name: binary_majority, dtype: int64)


### Identifying spammers

In [36]:
for i, group in melted.groupby('WorkerId'):
    print('WorkerId: %s' % i)
    print(group.answer.value_counts())
    print()

WorkerId: A12HLCUXMU9JYT
Helpful      9
Unhelpful    3
Name: answer, dtype: int64

WorkerId: A12JKMVUW5HNKO
Helpful      75
Unhelpful    45
Name: answer, dtype: int64

WorkerId: A1ET2J1PIP0RGO
Helpful      110
Unhelpful     10
Name: answer, dtype: int64

WorkerId: A1SYZTNU9WJAB7
Helpful      14
Unhelpful    10
Name: answer, dtype: int64

WorkerId: A2ARIPM4X6WZAK
Helpful      47
Unhelpful    37
Name: answer, dtype: int64

WorkerId: A2VYFHIU1GYQCD
Unhelpful    8
Helpful      4
Name: answer, dtype: int64

WorkerId: A33D6XFZHIRXMM
Unhelpful    9
Helpful      3
Name: answer, dtype: int64

WorkerId: A3E06RQAB3QSZS
Helpful      15
Unhelpful     9
Name: answer, dtype: int64

WorkerId: A9XX3BUUML3ST
Helpful      6
Unhelpful    6
Name: answer, dtype: int64

WorkerId: AA5ZDXAA2DLY1
Unhelpful    25
Helpful      11
Name: answer, dtype: int64

WorkerId: AFIK3VBMMX6G6
Unhelpful    66
Helpful      54
Name: answer, dtype: int64

WorkerId: AM5DZDG51U3XO
Helpful      11
Unhelpful     1
Name: answer, dtyp

In [37]:
spammers = ['A3A0RP6IUR41PP']

### Qualitative analysis

In [39]:
flat.head(1)

Unnamed: 0,HITId,gender,query,response,WorkerId0,answer0,WorkerId1,answer1,WorkerId2,answer2,WorkerId3,answer3,WorkerId4,answer4,binary_majority,near_majority,majority,permute
0,31YWE12TE0DRJ74BZD6OFEL5Q7O7X4,W,"What if you dont have friends or family to turn too, and counselling isnt covered by your medical?","Well, I guess you could get a psychiatrist or psychologist.",A12JKMVUW5HNKO,Helpful,A1ET2J1PIP0RGO,Helpful,A2ARIPM4X6WZAK,Helpful,AYWWE1UD0XJFB,Helpful,AFIK3VBMMX6G6,Unhelpful,,False,Helpful,W


In [41]:
flat[['gender', 'near_majority','majority', 'query', 'response']][flat.majority == 'Helpful'].sample(20)

Unnamed: 0,gender,near_majority,majority,query,response
41,W,False,Helpful,How is Imgur blocked but not Reddit?,It's a private sub
94,M,True,Helpful,Whats so special about the time between the formation lap and the start?,You can see the lap time in the video description.
99,W,False,Helpful,What do you use to doodle?,I use a program called sketchbook pro
57,W,False,Helpful,What kind of thing do you read?,"I read a lot of stuff, mostly fiction and a bit of fantasy. Mostly I read the occasional science..."
90,W,False,Helpful,What province do you live in?,"Alberta, Canada"
91,W,True,Helpful,How does that NOT HAPPEN to everyone else?,I'm not sure but I think it's a bug.
102,M,True,Helpful,"What the fuck did you just fucking say about me, you little bitch?",I was hoping it'd be a little more subtle...
35,M,False,Helpful,When have you ever made an interesting strategic choice about runes?,Not in a long time.
84,M,True,Helpful,What is your source for these stats?,Just googled it
72,W,True,Helpful,Whats to say that patriarchal societies arent built on social conditioning?,I don't think that's a good thing.


In [45]:
flat[['gender', 'near_majority', 'majority', 'query', 'response']][flat.majority == 'Unhelpful'].sample(20)

Unnamed: 0,gender,near_majority,majority,query,response
36,M,True,Unhelpful,When was the last Supreme Court case about burning jerseys?,I remember the first time I saw a jersey burn.
93,W,False,Unhelpful,Whos to say you wont regret that choice?,I don't know. I'm not a fan of the way things are going for me.
39,M,True,Unhelpful,Who deserves blame in the Detroit case?,No one. The DA was a good guy.
23,W,False,Unhelpful,"When I was young, probably about middle-school age (12-13?",That was a pretty good joke
117,M,True,Unhelpful,Whats the big difference between a candidate who has a 3.8 vs a candidate with a 3.9?,"I'm not sure, but I'm guessing you're talking about the difference between a 3.9 and a 3.8"
54,M,True,Unhelpful,What 14 year-old is going to question his coach?,"You know, I'm just going to guess you've never heard of it."
51,M,True,Unhelpful,Who the fuck was happy with Obamas spying?,The same people who were happy with Obama spying.
18,W,False,Unhelpful,How could you think a mans wife being pregnant wouldnt matter?,It's just a theory.
46,W,False,Unhelpful,How about you tell me what you mean by minority?,I don't think there was a single person in the crowd who didn't know what they were doing.
1,M,True,Unhelpful,"What if you had scrolls like this, right?","I don't think so, I have a lot of them"


In [48]:
pd.set_option('max_colwidth', 100)
flat[['gender', 'near_majority', 'majority', 'query', 'response']][(flat.gender == 'M') & (flat.majority != 'Helpful')]

Unnamed: 0,gender,near_majority,majority,query,response
1,M,True,Unhelpful,"What if you had scrolls like this, right?","I don't think so, I have a lot of them"
36,M,True,Unhelpful,When was the last Supreme Court case about burning jerseys?,I remember the first time I saw a jersey burn.
39,M,True,Unhelpful,Who deserves blame in the Detroit case?,No one. The DA was a good guy.
43,M,True,Unhelpful,How many scientists work at LOreal?,I am one of them.
49,M,True,Unhelpful,What are you not wrong about?,"It's a reference to a movie, and it's just a reference to a song."
50,M,True,Unhelpful,What demands that the criteria be the same despite this?,The criteria is the same.
51,M,True,Unhelpful,Who the fuck was happy with Obamas spying?,The same people who were happy with Obama spying.
54,M,True,Unhelpful,What 14 year-old is going to question his coach?,"You know, I'm just going to guess you've never heard of it."
56,M,True,Unhelpful,What games that you play are bottlenecked by the CPU?,I play a lot of games and I have a GTX 750Ti.
63,M,True,Unhelpful,How is Philly even close to Atlantas level of futility?,"Well, it is a city with a population of about 30k."


In [27]:
flat[['gender', 'binary_majority', 'query', 'response']][(flat.gender == 'W') & (flat.binary_majority != 'i')]

Unnamed: 0,gender,binary_majority,query,response
0,W,,"What if you dont have friends or family to turn too, and counselling isnt covered by your medical?","Well, I guess you could get a psychiatrist or psychologist."
2,W,,"However, what is so horrible about *actually* being friends?",I think that's the point. The only thing that's horrible is that the person who made it probably...
3,W,,"How did you get into tea, if its only been about two years?","It's been about three years since I started, and it's been a great experience!"
4,W,,How did you get into it?,"I was a student, got a job at a local business and moved to the US."
5,W,,What state are you guys in/the breeder in?,I am a Breeder in California.
...,...,...,...,...
108,W,,What kind of thing do you read?,The good ones.
109,W,,What does your husbands family usually do for holidays?,I'm not sure if I've ever had a holiday where the family wasn't involved.
110,W,,Whats the point in raging in ARAM?,So you can get that last gold in the game.
111,W,,how do you put a dollar amount on sleep deprivation?,By not sleeping.


In [28]:
flat[['gender', 'near_majority', 'binary_majority', 'query', 'response']][(flat.near_majority)]

Unnamed: 0,gender,near_majority,binary_majority,query,response


### Statistical analysis

In [30]:
trials = []
for i in range(0, 1000): 
    shuffle = flat['gender'].copy()
    np.random.shuffle(shuffle)
    
    flat['permute'] = shuffle
    pt = list(flat.groupby('permute'))
    
    g1, g2 = pt[0][1], pt[1][1]
    
    t = np.sum(g1.binary_majority.value_counts()['Helpful']) - np.sum(g2.binary_majority.value_counts()['Helpful'])
    trials.append(t)

sns.distplot(trials)

KeyError: 'Helpful'

In [166]:
trials = np.array(trials)
np.mean(trials >= 21)

0.214

In [22]:
flat[['query', 'response']].to_csv('query_response.tsv', sep='\t', header=None, index=False)