In [340]:
import pandas as pd
import numpy as np
batch1 = pd.read_csv("batch_1_50rows_HIT.csv")

def str_replace(df):
    df.columns = df.columns.str.replace('Input.', '') 
    df.columns = df.columns.str.replace('Answer.', '')

cols = ['idiom_0',
       'instance_0', 'label_0', 'meaning_0', 'idiom_1', 'instance_1',
       'label_1', 'meaning_1', 'idiom_2', 'instance_2', 'label_2', 'meaning_2',
       'idiom_3', 'instance_3', 'label_3', 'meaning_3', 'idiom_4',
       'instance_4', 'label_4', 'meaning_4', '0_gold', '0_revision', '1_gold',
       '1_revision', '2_gold', '2_revision', '3_gold', '3_revision', '4_gold',
       '4_revision', 'TimeMe', 'AssignmentId', 'WorkerId']

column_groups = [
        ['idiom_0', 'instance_0', 'label_0', 'meaning_0', '0_gold', '0_revision'],
        ['idiom_1', 'instance_1', 'label_1', 'meaning_1', '1_gold', '1_revision'],
        ['idiom_2', 'instance_2', 'label_2', 'meaning_2', '2_gold', '2_revision'],
        ['idiom_3', 'instance_3', 'label_3', 'meaning_3', '3_gold', '3_revision'],
        ['idiom_4', 'instance_4', 'label_4', 'meaning_4', '4_gold', '4_revision'],
    ]

# common columns for all rows
common_cols = ['TimeMe', 'AssignmentId', 'WorkerId']
batch1_result = pd.read_csv("batch_1_result.csv")
batch1_correction = pd.read_csv("batch_1_correction.csv")
str_replace(batch1_result)
str_replace(batch1_correction)
batch1_result = batch1_result[cols].fillna('{}')
batch1_correction = batch1_correction[cols].fillna('{}å')

  df.columns = df.columns.str.replace('Input.', '')
  df.columns = df.columns.str.replace('Answer.', '')
  df.columns = df.columns.str.replace('Input.', '')
  df.columns = df.columns.str.replace('Answer.', '')


In [341]:
def anyna(df):  
    return df[df.isna().any(axis=1)].loc[:, df.isna().any()]

In [343]:
def melt_it(df):
    # list of column sets

    # melt the dataframe
    dfs = []
    for i, column_group in enumerate(column_groups):
        melted = df[column_group + common_cols].copy()  # select the columns
        melted.columns = ['idiom', 'instance', 'label', 'meaning', 'gold', 'revision'] + common_cols  # rename the columns
        melted['group'] = i  # add a column to indicate the original group
        dfs.append(melted)

    # concatenate the melted dataframes
    df_melted = pd.concat(dfs, ignore_index=True)
    return df_melted.drop('group', axis=1)

batch1_result = melt_it(batch1_result)
batch1_correction = melt_it(batch1_correction)

In [344]:
df1 = batch1_result.groupby(['idiom', 'instance', 'label', 'meaning']).agg(list).reset_index()
df2 = batch1_correction.groupby(['idiom', 'instance', 'label', 'meaning']).agg(list).reset_index()

In [345]:
# Concatenate df1 and df2
df = pd.concat([df1, df2], ignore_index=True)

# Drop duplicates based on 'idiom', 'instance', 'label', and 'meaning', keep the last occurrence (df2 entries)
df = df.drop_duplicates(subset=['idiom', 'instance', 'label', 'meaning'], keep='last')

In [346]:
batch1.columns = batch1.columns.str.replace('_', '')

# create an id column to keep track of original rows
batch1['id'] = np.arange(len(batch1))

# reshape with wide_to_long
batch1_melted = pd.wide_to_long(batch1.reset_index(), 
                             stubnames=['idiom', 'instance', 'label', 'meaning'], 
                             i=['index', 'id'], 
                             j='variable', 
                             sep='').reset_index(drop=True)

In [347]:
merged_df = batch1_melted.merge(
    df, on=['idiom', 'instance', 'label', 'meaning'], how='inner')

In [373]:
from collections import Counter

def most_frequent(array):
    count = Counter(array)
    max_count = max(count.values())
    max_elements = [k for k, v in count.items() if v == max_count]

    if len(max_elements) > 1:
        return 'tie'  # return -1 in case of a tie

    return max_elements[0]  # return the most frequent element

In [374]:
merged_df['len'] = merged_df['gold'].apply(len)
NUM_ASSIGNMENTS = 4
labels = ['figurative', 'literal', 'ambiguous', 'discard']
def f(row):
    for i in range(NUM_ASSIGNMENTS):
        row[f'gold_{i}'] = labels[row['gold'][i]]
        row[f'revision_{i}'] = row['revision'][i]
    row['label'] = labels[row['label']]
    if row['len'] > NUM_ASSIGNMENTS:
        row['gold'] = row['gold'][:4]
        row['revision'] = row['revision'][:4]
        row['TimeMe'] = row['TimeMe'][:4]
        row['AssignmentId'] = row['AssignmentId'][:4]
        row['WorkerId'] = row['WorkerId'][:4]
    row['maxVote'] = most_frequent([
        row[f'gold_{i}'] for i in range(NUM_ASSIGNMENTS)])
    return row
final = merged_df.apply(f, axis=1).drop('len', axis=1)

In [380]:
golds = final[[f'gold_{i}' for i in range(NUM_ASSIGNMENTS)]].copy()
def g(row):
    for i in range(NUM_ASSIGNMENTS):
        row[f'gold_{i}'] = labels.index(row[f'gold_{i}'])
    return row
golds = golds.apply(g, axis=1)

In [377]:
final.to_csv("batch_1_agg.csv", index=False)

In [381]:
from statsmodels.stats.inter_rater import aggregate_raters
from statsmodels.stats.inter_rater import fleiss_kappa
golds = aggregate_raters(golds)[0]
kappa = fleiss_kappa(golds, method='fleiss')

In [382]:
kappa

0.6258000395155159