In [160]:
import pandas as pd
import numpy as np
from collections import Counter

### Read data

In [161]:
raw = pd.read_csv('./Batch_4218619_batch_results.csv')

### Flatten data

In [162]:
data = []
for i,row in raw.iterrows():
    for j in range(1, 13):
        query = row['Input.q%d' % j]
        response = row['Input.r%d' % j]
        gender = row['Input.g%d' % j]
        
        if row['Answer.answer%d' % j] is not np.nan:
            answer = row['Answer.answer%d' % j]
        else:
            answer = row['Answer.answer%d.label' % j]
        
        data.append((row['HITId'], j, row['WorkerId'], gender, query, response, answer))

In [163]:
melted = pd.DataFrame(data)
melted.columns = ['HITId', 'q_idx', 'WorkerId', 'gender', 'query', 'response', 'answer']

In [164]:
# include spammers:
include = False
if not include:
    spammers = ['A3A0RP6IUR41PP']
    melted = melted[~melted.WorkerId.isin(spammers)].copy()

In [165]:
data = []
for i, group in melted.groupby(['HITId', 'q_idx']):
    row = []
    row.append(group.iloc[0]['HITId'])
    row.append(group.iloc[0]['gender'])
    row.append(group.iloc[0]['query'])
    row.append(group.iloc[0]['response'])
    for j in range(0,5):
        if j < len(group):
            row.append(group.iloc[j]['WorkerId'])
            row.append(group.iloc[j]['answer'])
    data.append(row)

In [166]:
flat = pd.DataFrame(data)
flat.columns = ['HITId', 'gender', 'query', 'response'] + \
    sum([ ['WorkerId%d' % i, 'answer%d' % i] for i in range(0,5) ], [])

In [167]:
flat.head(1)

Unnamed: 0,HITId,gender,query,response,WorkerId0,answer0,WorkerId1,answer1,WorkerId2,answer2,WorkerId3,answer3,WorkerId4,answer4
0,30Y6N4AHYPXN3KJ9HR3IQO9DAYNDRH,W,What was your relationship like before the drinking?,We were friends but he had a drinking problem.,APKTDTD9LK539,Response satisfactorily answers the question,A33XH3FNYAJ4LE,Response provides some information to the question,A3774HPOUKYTX7,Response satisfactorily answers the question,A1X7PC5PH6JWIS,Response provides some information to the question,,


### Collapse labels

In [168]:
flat.answer0.unique()

array(['Response satisfactorily answers the question',
       'Response does not make sense',
       'Response provides some information to the question',
       'Response provides no information to the question at all'],
      dtype=object)

In [169]:
def i_no_i(x):
    if 'Response satisfactorily' in x:
        return 'i'
    elif 'provides some' in x:
        return 'i'
    elif 'does not make sense' in x:
        return 'ni'
    elif 'provides no' in x:
        return 'ni'

### Interannotator agreement

In [170]:
order = ['Response satisfactorily answers the question', 'Response provides some information to the question', 'Response provides no information to the question at all', 'Response does not make sense']
order = ['i', 'ni']

for i, row in flat.iterrows():
    labels = [ row['answer%d' %i ] for i in range(0, 5) ]
    labels = list(map(i_no_i, labels))
    unique, counts = np.unique(labels,return_counts=True)
    unique = unique.tolist()
    
    l = []
    for i in order:
        if i in unique:   
            idx = unique.index(i)
            count = counts[idx]
        else:
            count = 0
        l.append(str(count))
    #print('\t'.join(l))

TypeError: argument of type 'NoneType' is not iterable

non-collapsed:
37.83%
Kappa: 0.17

collapsed:
68.17%
Kappa: 0.36

### Majority labels

In [171]:
def majority(x, collapse=i_no_i, n=5):
    labels = [ x['answer%d' % i] for i in range(0, n)]
    labels = [ i for i in labels if i ]
    labels = list(map(collapse, labels))
    
    c = Counter(labels)
    commons = c.most_common(2)
    
    if len(commons) > 1 and commons[0][1] == commons[1][1]:
        return 'tie'
    else:
        return commons[0][0]

In [172]:
flat['binary_majority'] = flat.apply(majority, axis=1)
flat['majority'] = flat.apply(lambda x: majority(x, collapse=lambda x:x), axis=1)

### Analysis

In [173]:
for i, group in flat.groupby('gender'):
    print(i)
    print(group.majority.value_counts(normalize=True))

M
Response provides some information to the question         0.359375
Response satisfactorily answers the question               0.296875
tie                                                        0.250000
Response provides no information to the question at all    0.078125
Response does not make sense                               0.015625
Name: majority, dtype: float64
W
Response provides some information to the question         0.303571
tie                                                        0.196429
Response does not make sense                               0.178571
Response satisfactorily answers the question               0.178571
Response provides no information to the question at all    0.142857
Name: majority, dtype: float64


In [174]:
for i, group in flat.groupby('gender'):
    print(i)
    print(group.binary_majority.value_counts(normalize=False))

M
i      47
ni      9
tie     8
Name: binary_majority, dtype: int64
W
i      28
tie    14
ni     14
Name: binary_majority, dtype: int64


### Identifying spammers

In [157]:
for i, group in melted.groupby('WorkerId'):
    print('WorkerId: %s' % i)
    print(group.answer.value_counts())
    print()

WorkerId: A1198W1SPF1R4
Response satisfactorily answers the question          6
Response provides some information to the question    5
Response does not make sense                          1
Name: answer, dtype: int64

WorkerId: A182N7RLXGSCZG
Response satisfactorily answers the question          5
Response does not make sense                          4
Response provides some information to the question    3
Name: answer, dtype: int64

WorkerId: A1NSK7DA2WVAIW
Response provides some information to the question         10
Response does not make sense                                9
Response satisfactorily answers the question                4
Response provides no information to the question at all     1
Name: answer, dtype: int64

WorkerId: A1U3TT3VMH9FU9
Response provides no information to the question at all    4
Response satisfactorily answers the question               4
Response does not make sense                               2
Response provides some information to the question

In [158]:
spammers = ['A3A0RP6IUR41PP']

### Qualitative analysis

In [181]:
flat[['gender', 'binary_majority', 'query', 'response']][flat.binary_majority == 'i'].sample(20)

Unnamed: 0,gender,binary_majority,query,response
97,M,i,"Be fucking honest, would you stand by if you saw a man slap a woman across the face at a bar?",I would
109,W,i,Do your health services treat depression and anxiety?,"They do, but the depression is usually self inflicted."
106,M,i,How do you illustrate a point like that?,"You just draw a straight line and then draw a line of points, and then draw a straight line."
26,M,i,"We havent won over a big name free agent (besides Dwight) since, who, TMac??",Tmac? I don't think so...
18,W,i,you... shouldnt be honest about things like that?,"I am a human being, not a robot."
40,W,i,Perhaps Im just magically friends with the only people in the world who dont have inappropriate ...,I'm not alone!
87,M,i,"If you were to draw up the ideal point guard for the Kings, what would you want?",I would want a center who can shoot.
82,M,i,"Is the issue of a takeover, partnership, or perhaps a new sponsorship deal?",I think we have a new sponsor for this year
108,W,i,"Yknow, how someone will use their thumb to roll in circles on top of your hand?","They're a lot easier to use than a thumb, but I've seen it done"
114,W,i,"But then, when those shoes wear out, why would you pay for more shoes?","I would have to buy a few more pairs, and that's the only reason I would do it for this particul..."


In [177]:
pd.set_option('max_colwidth', 100)
flat[['gender', 'binary_majority', 'query', 'response']][(flat.gender == 'M') & (flat.binary_majority != 'i')]

Unnamed: 0,gender,binary_majority,query,response
8,M,tie,What is so fucking detestable about thousands of people crying and screaming in happiness over a...,"It's just a prank, bro."
12,M,tie,"Blizzard might want to give a more polished product, but at what cost?","Blizzard might be able to get a better product, but at what cost?"
15,M,tie,> Just too much parity nowadays for starters What parity?,"It's just that, parity."
24,M,tie,But who would really want competition?,Who would want competition when they can just buy a car and drive the same way they do now?
36,M,ni,Edit: how could i forget to mention that on average 50+% of our games are blacked out in any sea...,That's actually a very good statistic.
41,M,ni,You know what I also love?,"I like to use the term'not so much'to refer to the'not so much'that is not very good ', as well."
47,M,ni,>Havent we been doing that for the last ten years?,You're right. I just realized that I posted this in a different sub.
49,M,tie,>How exactly is Facebook or youtube a progression of technology that is beneficial?,"I agree, but it's the only thing keeping me alive."
56,M,ni,All that magic rules set aside for Jews?,You just made my day.
67,M,tie,>Ive tried talking to some women in circumcision MENOPHOBIA: Circumcised?,I'm glad someone said it!


In [178]:
flat[['gender', 'binary_majority', 'query', 'response']][(flat.gender == 'W') & (flat.binary_majority != 'i')]

Unnamed: 0,gender,binary_majority,query,response
1,W,tie,Are you under the impression that breast milk or formula is not vegan?,You can't milk a vegan.
7,W,ni,I guess some guys might be surprised by how dirty we are?,It's like we're wearing nothing at all... nothing at all... nothing at all...
11,W,tie,Its not about making things uncomfortable?,It's about making them uncomfortable.
13,W,tie,Hanging around outside someones house and plastering stickers on a dumpster?,That's a really good idea actually. I would love to see that.
19,W,tie,"I know I can find something else to help with my mental issues, but when food is the only thing ...","I'm trying to make myself feel better, I guess."
22,W,tie,Im the *only* person hes attracted to?,"I don't know, but I would guess it is someone that is a fan."
27,W,tie,"As for not being able to drive, can you ask your boyfriend to take you, or take the bus with you?","It's going to be a bus, so I'm not sure if she'll be able to take him..."
35,W,ni,Want to drive more than an hour outside of town for the weekend?,I'm in the same boat.
37,W,ni,They SEW fake hair to their SCALPS?,I wish!
38,W,ni,"Now, if /u/Vicous had PMed OP that in a less jokey, more oh God I got so hot reading your piss s...",You are a good man.
