In [3]:
import pandas as pd
import json
from statsmodels.stats.inter_rater import fleiss_kappa,aggregate_raters
import numpy as np
from collections import Counter
from scipy import stats
import krippendorff
import os
from itertools import chain



In [4]:
# dir = '../phase2_annotations_part1'
dir = '../NER_phase2'

In [5]:
annotation_files = [file for file in os.listdir(dir) if file.endswith('.json')]
annotation_files

['coordinating_conjunction-phaseTwo1117-named_entity_recognition-batch8.json',
 'geographical_bias-phaseTwo1117-named_entity_recognition-batch2.json',
 'coordinating_conjunction-phaseTwo1117-named_entity_recognition-batch4.json',
 'grammatical_role-phaseTwo1117-named_entity_recognition-batch6.json',
 'grammatical_role-phaseTwo1117-named_entity_recognition-batch7.json',
 'coordinating_conjunction-phaseTwo1117-named_entity_recognition-batch5.json',
 'geographical_bias-phaseTwo1117-named_entity_recognition-batch3.json',
 'coordinating_conjunction-phaseTwo1117-named_entity_recognition-batch2.json',
 'geographical_bias-phaseTwo1117-named_entity_recognition-batch8.json',
 'geographical_bias-phaseTwo1117-named_entity_recognition-batch4.json',
 'geographical_bias-phaseTwo1117-named_entity_recognition-batch5.json',
 'grammatical_role-phaseTwo1117-named_entity_recognition-batch1.json',
 'geographical_bias-phaseTwo1117-named_entity_recognition-batch9.json',
 'coordinating_conjunction-phaseTwo1117

In [6]:
test_types = set()
for file in annotation_files:
    task = file.split('-')[1]
    task = task.replace('.json', '')
    # print(task)
    test_types.add(task)
len(test_types)

1

In [7]:
annotations_all = {}
for file in annotation_files:
    fn = os.path.join(dir, file)
    with open(fn, 'rb') as f:
        batch_name = file.split('.json')[0]
        annotations = json.load(f)
        obj = {batch_name: annotations}
        annotations_all.update(obj)

In [8]:
len(annotations_all)

24

In [9]:
tasks_name = ['conversational_qa', 'coreference_resolution', 'dialogue_contradiction_detection', 'named_entity_recognition', 'sentiment_analysis']

In [10]:
modification_name = ['active_to_passive', 'capitalization', 'casual','compound_word', 'concept_replacement', 'coordinating_conjunction', 'derivation', 'dialectal', 'discourse', 'geographical_bias', 'grammatical_role', 'length_bias', 'negation', 'punctuation', 'sentiment', 'temporal_bias', 'typo_bias']

In [11]:
have_subtest = ['concept_replacement', 'dialectal', 'discourse', 'negation']

In [12]:
def construct_data(filter_list, filter = False):
    annotation_dict = {}
    for task in modification_name:
        
        annotation_dict[task] = []
        # test_name = task
        
    for batch_name in annotations_all.keys():
        # print(batch_name)
        task = batch_name.split('-')[0]
        bigtask_name = batch_name.split('-')[2]
        test_name = task 
        release = batch_name.split('-')[1]
        
        for task_name in modification_name:
            if task_name == task:
                
                df_task = []
                for annotator, value in annotations_all[batch_name].items():
                    if filter:
                        if annotator in filter_list:
                            continue
                    # for sample in annotator['answers']:
                    # print(annotator)
                    # print(value['answers'])
                    df = pd.DataFrame(value['answers'])
                    subtests = []
                    tests = [test_name]*len(df)
                    tasks = [bigtask_name]*len(df)
                    new_row = {}
                    for i, row in df.iterrows():
                        if row['is_control'] == True:
                            subtest = 'control'
                        else:
                            if release == 'release0808':
                                if task_name == 'concept_replacement':
                                    subtest = 'synonym'
                                elif task_name == 'discourse':
                                    subtest = 'reverse'
                                elif task_name == 'negation':
                                    subtest = 'verbal'
                                else:
                                    subtest = ''
                            else:
                                subtest = row['original_question'].get('type','')
                            if test_name not in have_subtest:
                                subtest = test_name
                        subtests.append(subtest)
                    # if len(df) == 22:
                    #     worker_id = list(df['worker_id'])[0]
                    #     if df['is_control'].value_counts().get(True) < 3:
                    #         is_control = True
                    #         answer = 1
                    #         subtest = 'control'
                    #     else:
                    #         is_control = False
                    #         answer = 0
                    #     # if bigtask_name == 'active_to_passive':
                    #         # print(df)
                    #     new_row = {'time': None, 'instance_index': len(df)+1, 'worker_id': worker_id,	'explanation': None, 'answer': str(answer),	'original_question': 0,	'is_control':is_control, 'subtest': subtest, 'test': test_name, 'task': bigtask_name}
                    # print(len(df))
                    if len(df) < 15 or len(df)==16:
                        print(task, annotator) #some batches with less than 12 questions
                        continue
                    df['test'] = tests
                    df['task'] = tasks
                    df['subtest'] = subtests
                    if len(df) == 22:
                        new_row_df = pd.DataFrame([new_row])
                        df = pd.concat([df,new_row_df], ignore_index = True)
                    df_task.append(df)
                if len(df_task) <= 1:
                    continue
                df_task = pd.concat(df_task)    
                annotation_dict[task_name].append(df_task)
        # break
    return annotation_dict


In [13]:
annotations = construct_data(filter_list = None)

geographical_bias 571daf6113b1b70013be23dc


In [14]:
annotations['grammatical_role'][2]['subtest'].value_counts()

subtest
grammatical_role    45
control              9
Name: count, dtype: int64

In [15]:
def find_modes_and_counts(arr):
    # print(len(arr))
    # for row in arr:
        # print(len(row))
    arr = list(arr)
    # print(arr)
    arr = np.array(arr)
    # print(arr)
    _, cols = arr.shape
    modes = []
    counts = []
    
    # Process each column separately

    for j in range(cols):
        values, value_counts = np.unique(arr[:, j], return_counts=True)
        max_count = value_counts.max()
        # Find all values that appear max times
        col_modes = values[value_counts == max_count]
        # print(col_modes[0])
        # print(max_count[0])
        
        modes.append(col_modes[0])
        counts.append(max_count)
    
    modes = np.array(modes)
    counts = np.array(counts)
    return modes, counts


In [16]:
def string_xor_logical(arr1, arr2):
    # Using logical_xor for element-wise comparison
    arr1 = np.asarray(arr1)
    arr2 = np.asarray(arr2)
    
    # Convert strings to boolean based on non-emptiness
    # bool1 = arr1 != ''
    # bool2 = arr2 != ''
    # print(bool1)
    # print(bool2)
    # Perform logical XOR
    return np.logical_xor(arr1, arr2)


In [17]:
def extract_label(arr):
    # preds = arr.apply(lambda x: sorted(x, key=lambda item: item.get('text', '')))

    # preds = sorted_values.apply(lambda x: [item['value'] for item in x])
    # preds = sorted_values.apply(lambda x: '_'.join(item['value'] for item in x))
    preds = arr
    preds = list(chain.from_iterable(preds))
    preds = np.array(preds).astype(str)
    return preds

In [18]:
def calculate_krippendof_for_each_batch(df_batch):
    annotation_data = []
    # print(df_batch)
    for group in df_batch.groupby('worker_id'):
        # print(group[1]['answer'])
        preds = group[1]['answer']
        preds = extract_label(preds)

        # print(len(preds))
        annotation_data.append(preds)
    # print(annotation_data)

    data = np.array(annotation_data)
    # print(data)
    # print(data)
    # Transpose the data
    # transposed_data = data.T

    # Transform the transposed data
    # aggregated_data, categories = aggregate_raters(transposed_data)
    # print(aggregated_data)
    # Calculate Fleiss' Kappa
    alpha = krippendorff.alpha(data, level_of_measurement='nominal')
    # kappa = fleiss_kappa(aggregated_data, method='randolph')
    # print(f"Krippendorff alpha: {alpha:.4f}")
    return alpha

In [19]:
def calculate_fleiss_kappa_for_each_batch(df_batch):
    annotation_data = []
    for group in df_batch.groupby('worker_id'):
        # print(group[1]['answer'])
        preds = group[1]['answer']
        preds = extract_label(preds)
        # print(len(preds))
        # print(preds)
        # preds = np.array(group[1]['answer']).astype(str)
        
        annotation_data.append(preds)
    # print(len(annotation_data))
    # for row in annotation_data:
    #     print(len(row))
    # print(annotation_data)
    data = np.array(annotation_data)

    # print(data)
    # print(data)
    # Transpose the data
    transposed_data = data.T

    # Transform the transposed data
    aggregated_data, categories = aggregate_raters(transposed_data)
    # print(aggregated_data)
    # Calculate Fleiss' Kappa
    kappa = fleiss_kappa(aggregated_data, method='randolph')
    # print(f"Fleiss' Kappa: {kappa:.4f}")
    return kappa

In [20]:
def get_majority(batch, df_performance = None):
    annotation_data = []
    annotation_data_no_control = []
    annotation_data_control_only = []
    cur_best_annotator_score = 0
    best_preds = None
    for group in batch.groupby('worker_id'):
        # print(df_annotator[df_annotator[['success_rate_control_only']] == group[0]])
        # preds = list(group[1]['answer'])
        preds = group[1]['answer']
        preds = extract_label(preds)
        # answers_no_control = group[1][group[1]['is_control'] != True]['answer']
        answers_no_control = group[1][group[1]['is_control'] != True]['answer']
        answers_no_control = extract_label(answers_no_control)

        answers_control_only = group[1][group[1]['is_control'] == True]['answer'] 
        answers_control_only = extract_label(answers_control_only)

        if df_performance.empty != True:
            # print(df_performance)
            # print(group[0])
            annotator_score = df_performance[df_performance['annotator_id'] == group[0]]['success_rate_control_only'].values[0]
            # print(annotator_score)
            if annotator_score > cur_best_annotator_score:
                best_preds = preds
                cur_best_annotator_score = annotator_score

        # answers_control_only = group[1][group[1]['is_control'] == True]['answer'] 


        annotation_data.append(preds)
        annotation_data_no_control.append(answers_no_control)
        annotation_data_control_only.append(answers_control_only)


    # print(annotation_data)
    # modes  = stats.mode(annotation_data,axis = 0,keepdims = True).mode[0]
    # counts   = stats.mode(annotation_data,axis = 0,keepdims = True).count[0]
    # print(annotation_data)
    modes, counts = find_modes_and_counts(annotation_data)
    # print(stats.mode(annotation_data_no_control, axis=0, keepdims = True))
    # print(annotation_data_no_control)
    # modes_no_control = stats.mode(annotation_data_no_control, axis=0, keepdims = True).mode[0]
    # counts_no_control = stats.mode(annotation_data_no_control, axis=0, keepdims = True).count[0]
    modes_no_control, counts_no_control = find_modes_and_counts(annotation_data_no_control)
    if df_performance.empty != True:
        for i, count in enumerate(counts):
            if count <= len(batch.groupby('worker_id')) // 2:
                modes[i] = best_preds[i]

    # modes_control_only = stats.mode(annotation_data_control_only, axis=0).mode[0]
    modes_control_only, counts_control_only = find_modes_and_counts(annotation_data_control_only)
    # modes_control_only = np.zeros(len(modes) - len(modes_no_control), dtype = int)
    
    # print(modes.mode[0])
    return modes, modes_no_control, modes_control_only, counts

In [21]:
def calculate_success_rate_majority(batch, df_performance = None):
    rates = []
    rates_no_control = []
    rates_control_only = []
    rates_gpt4 = []
    modes, modes_no_control, modes_control_only, majority_counts = get_majority(batch, df_performance)
    print(len(modes))
    # print(modes)
    # print(modes_no_control)
    # print(modes_control_only)
    df_annotator = []
    # answers = np.array(batch.groupby('worker_id')[0][1]['answer'])
    # print(len(batch.groupby('worker_id')))
    majority = len(batch.groupby('worker_id')) // 2
    # annotator_counts = len(batch.groupby('worker_id'))
    retain_count = 0
    total_count = 0
    retain_samples = [None]*len(modes)
    # retain = False
    
    for j,group in enumerate(batch.groupby('worker_id')):
        # if j == 1:
            # print(len(group[1]))
        cur_idx = 0
        for i,sample in group[1].iterrows():
            if sample['is_control'] == False:
                number_of_entities = len(sample['original_question']['entities'])
                
                item = {}
                if type(sample['original_question']) == int:
                        continue
                item['original_question'] = sample['original_question']                       
                item['task'] = sample['task']
                item['test'] = sample['test']
                item['subtest'] = sample['subtest']
                # item['label'] = sample['answer']
                label_list = []
                for j in range(number_of_entities):
                    label_list.append(modes_no_control[cur_idx])
                    cur_idx += 1
                item['label'] = label_list
                # item['explanation'] = sample['explanation']
                print(item['original_question']['entities'])
                print(number_of_entities)
                print(label_list)
                # print(modes)
                retain_samples[i] = item
        retain = True
        answers = group[1]['answer']
        answers = extract_label(answers)
        answers_no_control = group[1][group[1]['is_control'] != True]['answer']
        answers_no_control = extract_label(answers_no_control)
        answers_control_only = group[1][group[1]['is_control'] == True]['answer']
        answers_control_only = extract_label(answers_control_only)

        
        correct_answer = answers == modes
        success_rate = np.count_nonzero(correct_answer) / len(answers)

        # print('Answer', answers_no_control)
        # print('Modes', modes_no_control)
        # print(np.logical_xor(answers_no_control, modes_no_control))
        correct_answer_no_control =  answers_no_control == modes_no_control

        success_rate_no_control = np.count_nonzero(correct_answer_no_control) / len(answers_no_control)

        correct_answer_control_only = answers_control_only == modes_control_only
        success_rate_control_only = np.count_nonzero(correct_answer_control_only) / len(answers_control_only)

        # retain_count += np.count_nonzero(modes_no_control)
        # total_count += len(answers_no_control)

        success_rate_gpt4 = np.count_nonzero(modes_no_control) / len(modes_no_control)
        negative_count = np.count_nonzero(answers_no_control == 0) 
        # print('Success rate for annotator with control', group[0], success_rate)
        # print('Success rate for annotator without control', group[0], success_rate_no_control)
        # print('Success rate for annotator control only', group[0], success_rate_control_only)
        df_annotator_row = {}
        df_annotator_row['annotator_id'] = group[0]
        df_annotator_row['success_rate_with_control'] = success_rate
        df_annotator_row['success_rate_without_control'] = success_rate_no_control
        df_annotator_row['success_rate_control_only'] = success_rate_control_only
        df_annotator_row['negative_count'] = negative_count
        # df_annotator_row['counts'] = counts
        df_annotator.append(df_annotator_row)

        rates.append(success_rate)
        rates_no_control.append(success_rate_no_control)
        rates_control_only.append(success_rate_control_only)
        rates_gpt4.append(success_rate_gpt4)

    # print(majority_counts)
    # print(majority)
    majority_rate = (majority_counts>majority).sum()/ len(answers)
    # print(len(retain_samples))
    total_count = len(answers_no_control)
    retain_count = len(retain_samples)
    # retain_count = np.count_nonzero(modes_no_control)
    # print(total_count, retain_count)
    # print(retain_samples)
    retain_samples = [x for x in retain_samples if x is not None]
    # print(retain_samples)
    return np.mean(rates), np.mean(rates_no_control), np.mean(rates_control_only), np.mean(rates_gpt4), majority_rate, retain_samples, total_count, retain_count, df_annotator


In [22]:
def calculate_all(filter_list, filter, hit_function, df_performance = None):
    df_task = []
    df_annotator  = []
    df_subtask = []
    df_retain_task = []
    annotation_dict = construct_data(filter_list, filter)
    for key,value in annotation_dict.items():
        # print(key)
        kappa_task = []
        krippendorff_task = []
        success_rate_task = []
        success_rate_task_no_control = []
        success_rate_task_control_only = []
        success_rate_gpt4_task = []
        majority_rate_task = []
        for i, batch in enumerate(value):
            # print(batch)
            # if i==0:
            test_name = key
            task_name = list(batch['task'])[0]
            subtest_name = list(batch['subtest'])
            for name in subtest_name:
                if name != 'control':
                    subtest_name = name
                    break
            print(task_name, test_name, subtest_name)
                
            
            kappa_batch = calculate_fleiss_kappa_for_each_batch(batch)
            krippendorff_batch = calculate_krippendof_for_each_batch(batch)
            if hit_function == 'majority':
                success_rate_batch, success_rate_batch_no_control, success_rate_batch_control_only, success_rate_gpt4_batch, majority_rate_batch, retain_samples_batch, total_count_batch, retain_count_batch,  df_annotator_row = calculate_success_rate_majority(batch, df_performance)
            # print('Fleiss kappa batch', i+1, kappa_batch)
            # print('Krippendorff alpha batch', i+1, krippendorff_batch)

            kappa_task.append(kappa_batch)
            krippendorff_task.append(krippendorff_batch)

            # print('Success rate batch with control', i+1, success_rate_batch)
            # print('Success rate batch without control', i+1, success_rate_batch_no_control)
            success_rate_task.append(success_rate_batch)
            success_rate_task_no_control.append(success_rate_batch_no_control)
            success_rate_task_control_only.append(success_rate_batch_control_only)
            success_rate_gpt4_task.append(success_rate_gpt4_batch)
            majority_rate_task.append(majority_rate_batch)
            # annotator_counts_task.append(annotator_counts_batch)
            df_annotator.extend(df_annotator_row) 
            # df_majority_counts.extend(majority_counts)
            # df_annotator_counts.append(annotator_counts_batch)
            df_retain_task.extend(retain_samples_batch)
            df_subtask_row = {}
            df_subtask_row['task'] = task_name
            df_subtask_row['test'] = test_name
            df_subtask_row['subtest'] = subtest_name
            df_subtask_row['total'] = total_count_batch
            df_subtask_row['retain'] = retain_count_batch
            df_subtask.append(df_subtask_row)
        # print('Fleiss kappa for', key, np.mean(kappa_task))
        # print('Krippendorff alpha for', key, np.mean(krippendorff_task))

        # print('Success rate task with control', key, np.mean(success_rate_task))
        # print('Success rate task without control', key, np.mean(success_rate_task_no_control))
        # print('Success rate task control only', key, np.mean(success_rate_task_control_only))

        df_task_row = {}
        df_task_row['task'] = key
        df_task_row['kappa'] = np.mean(kappa_task)
        df_task_row['krippendorff'] = np.mean(krippendorff_task)

        df_task_row['success_rate_with_control'] = np.mean(success_rate_task)
        df_task_row['success_rate_without_control'] = np.mean(success_rate_task_no_control)
        df_task_row['success_rate_control_only'] = np.mean(success_rate_task_control_only)
        df_task_row['success_rate_gpt4'] = np.mean(success_rate_gpt4_task)
        df_task_row['majority_rate'] = np.mean(majority_rate_task)
        # df_task_row['annotator_counts'] = sum(annotator_counts_task)
        # print('annotator count',df_annotator_counts)
        df_task.append(df_task_row)
    df_task = pd.DataFrame(data = df_task)
    df_subtask = pd.DataFrame(data = df_subtask)
    df_annotator = pd.DataFrame(data = df_annotator)
    # print(df_retain_task)
    df_retain_task = pd.DataFrame(data = df_retain_task)
    return df_task, df_annotator, df_retain_task, df_subtask

In [23]:
df_task, df_annotator, df_retain_task, df_subtask = calculate_all(filter_list = None, filter = False, hit_function = 'majority', df_performance = pd.DataFrame())

geographical_bias 571daf6113b1b70013be23dc
named_entity_recognition coordinating_conjunction coordinating_conjunction
48
['Covaci', 'Baniciu']
2
[np.str_("{'value': 'PERSON', 'text': 'Covaci'}"), np.str_("{'value': 'PERSON', 'text': 'Baniciu'}")]
['Elin Hilderbrand', 'Nicholas Sparks', 'American']
3
[np.str_("{'value': 'PERSON', 'text': 'Elin Hilderbrand'}"), np.str_("{'text': 'Nicholas Sparks', 'value': 'PERSON'}"), np.str_("{'value': 'MISC', 'text': 'American'}")]
['Ronald', 'Iceland']
2
[np.str_("{'text': 'Ronald', 'value': 'PERSON'}"), np.str_("{'text': 'Iceland', 'value': 'LOCATION'}")]
['Reform Jews', 'Conservative Jews', 'Jewish']
3
[np.str_("{'text': 'Reform Jews', 'value': 'ORGANIZATION'}"), np.str_("{'value': 'ORGANIZATION', 'text': 'Conservative Jews'}"), np.str_("{'text': 'Jewish', 'value': 'MISC'}")]
['Ronald', 'Iceland']
2
[np.str_("{'value': 'PERSON', 'text': 'Ronald'}"), np.str_("{'text': 'Iceland', 'value': 'LOCATION'}")]
['Ronald', 'Iceland']
2
[np.str_("{'value': 'PE

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


56
['Tonga Fire Services', "T-80 ``Envoy''", 'Tonga Fire Brigade Unit 51']
3
[np.str_("{'text': 'Tonga Fire Services', 'value': 'ORGANIZATION'}"), np.str_('{\'value\': \'PRODUCT\', \'text\': "T-80 ``Envoy\'\'"}'), np.str_("{'value': 'ORGANIZATION', 'text': 'Tonga Fire Brigade Unit 51'}")]
['Pemadam Kebakaran Indonesia', "MP-200 ``Duta''", 'Jakarta Fire Brigade Unit 51']
3
[np.str_("{'text': 'Pemadam Kebakaran Indonesia', 'value': 'ORGANIZATION'}"), np.str_('{\'text\': "MP-200 ``Duta\'\'", \'value\': \'PRODUCT\'}'), np.str_("{'text': 'Jakarta Fire Brigade Unit 51', 'value': 'ORGANIZATION'}")]
['Battle of Imjin River', 'Seo Hui', 'Joseon Military Reserve']
3
[np.str_("{'text': 'Battle of Imjin River', 'value': 'EVENT'}"), np.str_("{'value': 'PERSON', 'text': 'Seo Hui'}"), np.str_("{'value': 'ORGANIZATION', 'text': 'Joseon Military Reserve'}")]
['Battle of Paysandú', 'Artigas', 'National Defense Corps']
3
[np.str_("{'text': 'Battle of Paysandú', 'value': 'EVENT'}"), np.str_("{'value': 'PE

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [24]:
df_task = df_task.dropna()

In [25]:
df_task = df_task.drop(columns=['success_rate_gpt4'])

In [26]:
df_task

Unnamed: 0,task,kappa,krippendorff,success_rate_with_control,success_rate_without_control,success_rate_control_only,majority_rate
5,coordinating_conjunction,0.331037,0.331022,0.603686,0.604777,0.621131,0.555572
9,geographical_bias,0.345843,0.348345,0.609209,0.610608,0.601369,0.554317
10,grammatical_role,0.402658,0.403734,0.646107,0.640502,0.65787,0.593294


In [27]:
df_annotator_group = df_annotator.groupby('annotator_id').mean().reset_index()
df_annotator_group['success_rate_control_only'].value_counts()

success_rate_control_only
0.750000    9
0.500000    8
0.571429    3
0.625000    3
0.600000    3
0.800000    3
0.400000    3
0.714286    2
0.777778    2
0.454545    2
0.222222    2
0.666667    2
0.555556    2
0.788889    1
0.727273    1
0.461538    1
0.250000    1
0.909091    1
0.833333    1
0.823529    1
0.700000    1
0.585714    1
0.746032    1
0.647059    1
0.600000    1
0.545455    1
0.825758    1
0.416667    1
0.785714    1
1.000000    1
0.464286    1
0.875000    1
0.583333    1
0.416667    1
0.692308    1
0.519231    1
0.642857    1
0.888889    1
0.857143    1
0.764706    1
0.363636    1
0.200000    1
Name: count, dtype: int64

In [30]:
df_retain_task[df_retain_task['test'] == 'coordinating_conjunction']

Unnamed: 0,original_question,task,test,subtest,label
0,"{'index_in_phase1_annotated_data': 2, 'text': ...",named_entity_recognition,coordinating_conjunction,coordinating_conjunction,"[{'value': 'PERSON', 'text': 'Covaci'}, {'valu..."
1,"{'index_in_phase1_annotated_data': 15, 'text':...",named_entity_recognition,coordinating_conjunction,coordinating_conjunction,"[{'value': 'PERSON', 'text': 'Elin Hilderbrand..."
2,"{'index_in_phase1_annotated_data': 1, 'text': ...",named_entity_recognition,coordinating_conjunction,coordinating_conjunction,"[{'text': 'Ronald', 'value': 'PERSON'}, {'text..."
3,"{'index_in_phase1_annotated_data': 3, 'text': ...",named_entity_recognition,coordinating_conjunction,coordinating_conjunction,"[{'text': 'Reform Jews', 'value': 'ORGANIZATIO..."
4,"{'index_in_phase1_annotated_data': 10, 'text':...",named_entity_recognition,coordinating_conjunction,coordinating_conjunction,"[{'value': 'PERSON', 'text': 'Ronald'}, {'text..."
...,...,...,...,...,...
115,"{'index_in_phase1_annotated_data': 10, 'text':...",named_entity_recognition,coordinating_conjunction,coordinating_conjunction,"[{'text': 'Covaci', 'value': 'PERSON'}, {'text..."
116,"{'index_in_phase1_annotated_data': 12, 'text':...",named_entity_recognition,coordinating_conjunction,coordinating_conjunction,"[{'text': 'Jamil', 'value': 'PERSON'}, {'value..."
117,"{'index_in_phase1_annotated_data': 2, 'text': ...",named_entity_recognition,coordinating_conjunction,coordinating_conjunction,"[{'value': 'LOCATION', 'text': 'Queens County'..."
118,"{'index_in_phase1_annotated_data': 11, 'text':...",named_entity_recognition,coordinating_conjunction,coordinating_conjunction,"[{'text': 'This collection', 'value': 'NON-ENT..."


In [29]:
df_retain_task['label'][2]

[np.str_("{'text': 'Ronald', 'value': 'PERSON'}"),
 np.str_("{'text': 'Iceland', 'value': 'LOCATION'}")]

In [30]:
df_retain_task

Unnamed: 0,original_question,task,test,subtest,label
0,"{'index_in_phase1_annotated_data': 2, 'text': ...",named_entity_recognition,coordinating_conjunction,coordinating_conjunction,"[{'value': 'PERSON', 'text': 'Covaci'}, {'valu..."
1,"{'index_in_phase1_annotated_data': 15, 'text':...",named_entity_recognition,coordinating_conjunction,coordinating_conjunction,"[{'value': 'PERSON', 'text': 'Elin Hilderbrand..."
2,"{'index_in_phase1_annotated_data': 1, 'text': ...",named_entity_recognition,coordinating_conjunction,coordinating_conjunction,"[{'text': 'Ronald', 'value': 'PERSON'}, {'text..."
3,"{'index_in_phase1_annotated_data': 3, 'text': ...",named_entity_recognition,coordinating_conjunction,coordinating_conjunction,"[{'text': 'Reform Jews', 'value': 'ORGANIZATIO..."
4,"{'index_in_phase1_annotated_data': 10, 'text':...",named_entity_recognition,coordinating_conjunction,coordinating_conjunction,"[{'value': 'PERSON', 'text': 'Ronald'}, {'text..."
...,...,...,...,...,...
352,"{'index_in_phase1_annotated_data': 15, 'text':...",named_entity_recognition,grammatical_role,grammatical_role,"[{'text': 'Like It or Not', 'value': 'ART'}, {..."
353,"{'index_in_phase1_annotated_data': 1, 'text': ...",named_entity_recognition,grammatical_role,grammatical_role,"[{'text': 'Maximilian III Joseph, Elector of B..."
354,"{'index_in_phase1_annotated_data': 7, 'text': ...",named_entity_recognition,grammatical_role,grammatical_role,"[{'text': 'Holy Trinity Church', 'value': 'BUI..."
355,"{'index_in_phase1_annotated_data': 14, 'text':...",named_entity_recognition,grammatical_role,grammatical_role,"[{'value': 'PERSON', 'text': 'Alex Salmond'}, ..."


In [31]:
# dialog_label_mapping = {0:'No', 1: 'Yes', 2: 'Hard to say'}
# sentiment_label_mapping = {0: 'Negative', 1: 'Positive'}

In [282]:
rows = []
for i, row in df_retain_task.iterrows():
    task  = row['task']
    test = row['test']
    if task == 'coreference_resolution':
        task_dir = 'thinh'
    elif task == 'dialog_contradiction_detection':
        task_dir = 'rongxin'
    elif task == 'sentiment_analysis':
        task_dir = 'yulia'
    file_to_load = '../data_for_phase2/' + task_dir + '/' + test + '.json'
    df_original = pd.read_json(file_to_load)
    # print(row['original_question'])
    if row['original_question'].get('index_in_phase1_annotated_data')!=None:
        index = row['original_question']['index_in_phase1_annotated_data']
        # print(df_original.iloc[index])
        # if df_original.iloc[index].get('original_label')!=None:
        original_label = df_original.iloc[index]['original_label']
        candidates = df_original.iloc[index]['original_candidates']
        # print(candidates)
        original_label = candidates[original_label]

        # else:
            # original_label = df_original.iloc[index]['label']
            
    elif row['original_question'].get('index_in_original_testset')!=None:
        index = row['original_question']['index_in_original_testset']
        # print(index)
        # print(df_original)
        original_label = df_original.loc[df_original[0] == index][1].values[0]['label']
        original_label = dialog_label_mapping[original_label]
        # print()
        # print(original_label)
    else:
        # print(row['original_question'])
        index = row['original_question']['idx']
        original_label = df_original.loc[df_original['idx'] == index]['label'].values[0]
        original_label = sentiment_label_mapping[original_label]

        # print(original_label)
    row['original_label'] = original_label
    rows.append(row)


In [283]:
df_phase2  = pd.DataFrame(data = rows)

In [None]:
df_phase2[df_phase2['task'] == 'coreference_resolution']

In [285]:
df_phase2['label_change'] = (df_phase2['label'] != df_phase2['original_label']).astype(int)

In [None]:
df_phase2

In [None]:
df_phase2[df_phase2['task'] == 'coreference_resolution']

In [None]:
label_change_rate = (df_phase2.groupby(['task', 'test'])['label_change']
                    .mean()  # Calculate mean of label_change (will give us the rate)
                    .multiply(100)  # Convert to percentage
                    .round(2)  # Round to 2 decimal places
                    .reset_index())  # Convert from Series to DataFrame

# Display the results
print("Label Change Rate (%) by Task and Test:")
print(label_change_rate)

In [None]:
# Calculate label change statistics for each group
label_change_stats = (df_phase2.groupby(['task', 'test'])
                     .agg({
                         'label_change': ['size','sum', 'mean' ]  # mean for rate, sum for mismatches, size for total
                     })
                     .round(4))

# Flatten column names and reset index
label_change_stats.columns = ['total_samples', 'samples_with_label_change' ,'change_rate' ]
label_change_stats = label_change_stats.reset_index()

# Convert rate to percentage
label_change_stats['change_rate'] = label_change_stats['change_rate'] * 100

# Sort by change rate in descending order (optional)
label_change_stats = label_change_stats.sort_values('change_rate', ascending=False)

print("Label Change Statistics by Task and Test:")
print(label_change_stats)

In [None]:
label_change_stats

In [31]:
filter_annotators = list(df_annotator_group.loc[df_annotator_group['success_rate_control_only'] < 0.5]['annotator_id'])

In [32]:
len(filter_annotators)

14

In [33]:
df_task_filter, df_annotator_filter, df_retain_task_filter, df_subtask_filter = calculate_all(filter_list = filter_annotators, filter = True, hit_function = 'majority', df_performance= df_annotator_group)

geographical_bias 571daf6113b1b70013be23dc
named_entity_recognition coordinating_conjunction coordinating_conjunction
48
['Covaci', 'Baniciu']
2
[np.str_("{'value': 'PERSON', 'text': 'Covaci'}"), np.str_("{'value': 'PERSON', 'text': 'Baniciu'}")]
['Elin Hilderbrand', 'Nicholas Sparks', 'American']
3
[np.str_("{'value': 'PERSON', 'text': 'Elin Hilderbrand'}"), np.str_("{'value': 'PERSON', 'text': 'Nicholas Sparks'}"), np.str_("{'value': 'MISC', 'text': 'American'}")]
['Ronald', 'Iceland']
2
[np.str_("{'value': 'PERSON', 'text': 'Ronald'}"), np.str_("{'text': 'Iceland', 'value': 'LOCATION'}")]
['Reform Jews', 'Conservative Jews', 'Jewish']
3
[np.str_("{'text': 'Reform Jews', 'value': 'MISC'}"), np.str_("{'value': 'ORGANIZATION', 'text': 'Conservative Jews'}"), np.str_("{'text': 'Jewish', 'value': 'MISC'}")]
['Ronald', 'Iceland']
2
[np.str_("{'value': 'PERSON', 'text': 'Ronald'}"), np.str_("{'text': 'Iceland', 'value': 'LOCATION'}")]
['Ronald', 'Iceland']
2
[np.str_("{'value': 'PERSON', '

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


54
['Soweto', 'Gauteng', 'Protea Primary School', 'Meadowlands High School', 'Orlando West High School']
5
[np.str_("{'text': 'Soweto', 'value': 'LOCATION'}"), np.str_("{'value': 'LOCATION', 'text': 'Gauteng'}"), np.str_("{'text': 'Protea Primary School', 'value': 'ORGANIZATION'}"), np.str_("{'text': 'Meadowlands High School', 'value': 'ORGANIZATION'}"), np.str_("{'text': 'Orlando West High School', 'value': 'ORGANIZATION'}")]
['Tirana', 'Socialist Party of Albania']
2
[np.str_("{'text': 'Tirana', 'value': 'LOCATION'}"), np.str_("{'value': 'ORGANIZATION', 'text': 'Socialist Party of Albania'}")]
['Cochabamba', 'Movement for Socialism party']
2
[np.str_("{'text': 'Cochabamba', 'value': 'LOCATION'}"), np.str_("{'text': 'Movement for Socialism party', 'value': 'ORGANIZATION'}")]
[]
0
[]
['Guangzhou', 'Communist Party of China']
2
[np.str_("{'text': 'Guangzhou', 'value': 'LOCATION'}"), np.str_("{'text': 'Communist Party of China', 'value': 'ORGANIZATION'}")]
['South Sudan', 'CECAFA Cup', '

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [36]:
len(df_retain_task[df_retain_task['test'] == 'coordinating_conjunction'])

120

In [35]:
len(df_retain_task_filter[df_retain_task_filter['test'] == 'coordinating_conjunction'])

75

In [38]:
print(len(df_retain_task[df_retain_task['test'] == 'geographical_bias']))
print(len(df_retain_task_filter[df_retain_task_filter['test'] == 'geographical_bias']))

135
135


In [39]:
print(len(df_retain_task[df_retain_task['test'] == 'grammatical_role']))
print(len(df_retain_task_filter[df_retain_task_filter['test'] == 'grammatical_role']))

102
102


In [None]:
rows = []
rows_coref = []
rows_sentiment = []
rows_dialog = []

for i, row in df_retain_task_filter.iterrows():
    task  = row['task']
    test = row['test']
    if task == 'coreference_resolution':
        task_dir = 'thinh'
    elif task == 'dialog_contradiction_detection':
        task_dir = 'rongxin'
    elif task == 'sentiment_analysis':
        task_dir = 'yulia'
    file_to_load = '../data_for_phase2/' + task_dir + '/' + test + '.json'
    df_original = pd.read_json(file_to_load)
    # print(row['original_question'])
    if row['original_question'].get('index_in_phase1_annotated_data')!=None:
        index = row['original_question']['index_in_phase1_annotated_data']
        # print(df_original.iloc[index])
        # if df_original.iloc[index].get('original_label')!=None:
        original_label = df_original.iloc[index]['original_label']
        candidates = df_original.iloc[index]['original_candidates']
        # print(candidates)
        original_label = candidates[original_label]
        original_row = df_original.iloc[index]
        original_row['modified_label'] = row['label']
        original_row['test'] = row['test']
        rows_coref.append(original_row)
        # else:
            # original_label = df_original.iloc[index]['label']
            
    elif row['original_question'].get('index_in_original_testset')!=None:
        index = row['original_question']['index_in_original_testset']
        # print(index)
        # print(df_original)
        original_label = df_original.loc[df_original[0] == index][1].values[0]['label']
        original_label = dialog_label_mapping[original_label]
        original_row = df_original.loc[df_original[0] == index][1].values[0]
        # print()
        original_row['modified_label'] = row['label']
        original_row['test'] = row['test']

        # print(original_label)
        rows_dialog.append(original_row)
    else:
        # print(row['original_question'])
        index = row['original_question']['idx']
        original_label = df_original.loc[df_original['idx'] == index]['label'].values[0]
        original_label = sentiment_label_mapping[original_label]
        original_row = df_original.loc[df_original['idx'] == index]
        original_row['modified_label'] = row['label']
        original_row['test'] = row['test']
        for i, item in original_row.iterrows():
            print(item)
            rows_sentiment.append(item)
        
        # print(original_label)
    row['original_label'] = original_label

    
    rows.append(row)


In [None]:
df_coref = pd.DataFrame(data = rows_coref)
df_coref

In [None]:
df_sentiment = pd.DataFrame(data = rows_sentiment)
df_sentiment

In [None]:
df_dialog = pd.DataFrame(data = rows_dialog)
df_dialog

In [295]:
df_phase2  = pd.DataFrame(data = rows)
df_phase2['label_change'] = (df_phase2['label'] != df_phase2['original_label']).astype(int)

In [None]:
label_change_stats = (df_phase2.groupby(['task', 'test'])
                     .agg({
                         'label_change': ['size','sum', 'mean' ]  # mean for rate, sum for mismatches, size for total
                     })
                     .round(4))

# Flatten column names and reset index
label_change_stats.columns = ['total_samples', 'samples_with_label_change' ,'change_rate' ]
label_change_stats = label_change_stats.reset_index()

# Convert rate to percentage
label_change_stats['change_rate'] = label_change_stats['change_rate'] * 100

# Sort by change rate in descending order (optional)
label_change_stats = label_change_stats.sort_values('change_rate', ascending=False)

print("Label Change Statistics by Task and Test:")
print(label_change_stats)

In [None]:
label_change_stats

In [None]:
df_annotator_group = df_annotator.groupby('annotator_id').mean().reset_index()
df_annotator_group['success_rate_control_only'].value_counts()

In [717]:
filter_annotators = list(df_annotator_group.loc[df_annotator_group['success_rate_control_only'] < 0.5]['annotator_id'])

In [None]:
df_subtask_filter

In [None]:
df_retain_task

In [758]:
df_retain_task.to_csv('df_ner_phase2.csv',index = None)

In [None]:
df_phase2['task']

In [303]:
df_phase2.to_csv('df_data_phase2.csv',index = None)

In [721]:
df_phase2_ner = df_phase2[df_phase2['task'] == 'named_entity_recognition']

In [None]:
df_phase2_ner

In [None]:
rows = []
for i, row in df_phase2_coref.iterrows():
    test = row['test']
    

In [422]:
df_coref_task = df_coref.groupby('test')

In [None]:
for group, frame in df_coref_task:
    to_write = '../data_after_phase2/thinh/' + group + '.json'
    obj = frame.to_dict(orient='records')
    print(obj)
    with open(to_write,'w') as f:
        json.dump(obj, f)
    # break

In [None]:
df_sentiment_task = df_sentiment.groupby('test')
for group, frame in df_sentiment_task:
    to_write = '../data_after_phase2/yulia/' + group + '.json'
    obj = frame.to_dict(orient='records')
    print(obj)
    with open(to_write,'w') as f:
        json.dump(obj, f)
    # break

In [None]:
df_dialog_task = df_dialog.groupby('test')
for group, frame in df_dialog_task:
    to_write = '../data_after_phase2/rongxin/' + group + '.json'
    obj = frame.to_dict(orient='records')
    print(obj)
    with open(to_write,'w') as f:
        json.dump(obj, f)
    # break

In [None]:
df_ner_task = df_retain_task_filter.groupby('test')
for group, frame in df_ner_task:
    to_write = '../data_after_phase2/rahmad/' + group + '.json'
    obj = frame.to_dict(orient='records')
    print(obj)
    with open(to_write,'w') as f:
        json.dump(obj, f, indent=2)
    # break

In [40]:
row['original_question']

{'index_in_phase1_annotated_data': 9,
 'text': 'The Real Live Brady Bunch was one of the creations of Mary Weiss at The Annoyance Theater, which led to a series of feature film remakes.',
 'entities': ['Mary Weiss', 'Real Live Brady Bunch', 'Annoyance Theater'],
 'labels': [],
 'is_control_question': False,
 'source': 'phase1 annotation'}

In [41]:
row['label']

[np.str_("{'value': 'PERSON', 'text': 'Mary Weiss'}"),
 np.str_("{'value': 'ART', 'text': 'Real Live Brady Bunch'}"),
 np.str_("{'text': 'Annoyance Theater', 'value': 'BUILDING'}")]

In [94]:
rows = []
rows_coref = []
rows_sentiment = []
rows_dialog = []
rows_ner = []
for i, row in df_retain_task_filter.iterrows():
    task  = row['task']
    test = row['test']
    if task == 'coreference_resolution':
        task_dir = 'thinh'
    elif task == 'dialog_contradiction_detection':
        task_dir = 'rongxin'
    elif task == 'sentiment_analysis':
        task_dir = 'yulia'
    elif task == 'named_entity_recognition':
        task_dir = 'rahmad'
    file_to_load = '../data_for_phase2/' + task_dir + '/' + test + '.json'
    df_original = pd.read_json(file_to_load)
    # print(row['original_question'])
    if row['original_question'].get('index_in_phase1_annotated_data')!=None and task == 'coreference_resolution':
        index = row['original_question']['index_in_phase1_annotated_data']
        # print(df_original.iloc[index])
        # if df_original.iloc[index].get('original_label')!=None:
        original_label = df_original.iloc[index]['original_label']
        candidates = df_original.iloc[index]['original_candidates']
        # print(candidates)
        original_label = candidates[original_label]
        original_row = df_original.iloc[index]
        original_row['modified_label'] = row['label']
        original_row['test'] = row['test']
        rows_coref.append(original_row)
        # else:
            # original_label = df_original.iloc[index]['label']
    elif row['original_question'].get('index_in_phase1_annotated_data')!=None and task == 'named_entity_recognition':
        index = row['original_question']['index_in_phase1_annotated_data']
        modified_text = row['original_question']['text']
        original_row = df_original.loc[df_original['modified_text'] == modified_text]
        original_label = original_row['label'].values[0]
        # print(len(row['label']))
        # original_row = original_row.assign(modified_label=row['label'])
        row['label'] = [str(label) for label in row['label']]
        print(row['label'])
        original_row['label'] = str(row['label'])
        original_row['test'] = row['test']
        for i, item in original_row.iterrows():
            # print(item)
            rows_ner.append(item)

        # rows_ner.append(original_row)
        # else:
            # original_label = df_original.iloc[index]['label']

    elif row['original_question'].get('index_in_original_testset')!=None:
        index = row['original_question']['index_in_original_testset']
        # print(index)
        # print(df_original)
        original_row = df_original.loc[df_original[0] == index][1].values[0]
        original_label = df_original.loc[df_original[0] == index][1].values[0]['label']
        original_label = dialog_label_mapping[original_label]
        original_row = df_original.loc[df_original[0] == index][1].values[0]
        # print(row['label'])
        original_row['modified_label'] = row['label']
        
        original_row['test'] = row['test']

        # print(original_label)
        rows_dialog.append(original_row)
    else:
        # print(row['original_question'])
        index = row['original_question']['idx']
        original_label = df_original.loc[df_original['idx'] == index]['label'].values[0]
        original_label = sentiment_label_mapping[original_label]
        original_row = df_original.loc[df_original['idx'] == index]
        original_row['modified_label'] = row['label']
        original_row['test'] = row['test']
        for i, item in original_row.iterrows():
            print(item)
            rows_sentiment.append(item)
        
        # print(original_label)
    row['original_label'] = original_label

    
    rows.append(row)


["{'value': 'PERSON', 'text': 'Covaci'}", "{'value': 'PERSON', 'text': 'Baniciu'}"]
["{'value': 'PERSON', 'text': 'Elin Hilderbrand'}", "{'value': 'PERSON', 'text': 'Nicholas Sparks'}", "{'value': 'MISC', 'text': 'American'}"]
["{'value': 'PERSON', 'text': 'Ronald'}", "{'text': 'Iceland', 'value': 'LOCATION'}"]
["{'text': 'Reform Jews', 'value': 'MISC'}", "{'value': 'ORGANIZATION', 'text': 'Conservative Jews'}", "{'text': 'Jewish', 'value': 'MISC'}"]
["{'value': 'PERSON', 'text': 'Ronald'}", "{'text': 'Iceland', 'value': 'LOCATION'}"]
["{'value': 'PERSON', 'text': 'Ronald'}", "{'text': 'Iceland', 'value': 'LOCATION'}"]
["{'text': 'B-52', 'value': 'PRODUCT'}", "{'text': 'F-16', 'value': 'PRODUCT'}", "{'value': 'PERSON', 'text': 'Larry G.Messinger'}"]
["{'value': 'PERSON', 'text': 'George Hall'}", "{'value': 'LOCATION', 'text': 'Grill Room'}", "{'text': 'Taft Hotel', 'value': 'BUILDING'}", "{'text': 'New York', 'value': 'LOCATION'}", "{'text': 'George Hall and His Hotel Taft Orchestra', 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  original_row['label'] = str(row['label'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  original_row['test'] = row['test']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  original_row['label'] = str(row['label'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_

["{'text': 'Queens County', 'value': 'LOCATION'}", "{'value': 'LOCATION', 'text': 'New York'}", "{'text': 'Richard Hewlett', 'value': 'PERSON'}", '{\'text\': "DeLancey\'s Brigade", \'value\': \'ORGANIZATION\'}', "{'value': 'LOCATION', 'text': 'Long Island'}", "{'text': 'Setauket', 'value': 'LOCATION'}", "{'text': 'Long Island Sound', 'value': 'LOCATION'}", "{'value': 'LOCATION', 'text': 'Fairfield'}"]
["{'text': 'Murphy Oil', 'value': 'ORGANIZATION'}", "{'value': 'ORGANIZATION', 'text': 'ExxonMobil'}"]
["{'text': 'Reform Jews', 'value': 'MISC'}", "{'text': 'Jewish', 'value': 'NON-ENTITY'}"]
["{'value': 'MISC', 'text': 'USP42'}", "{'value': 'NON-ENTITY', 'text': 'its associated factors'}", "{'value': 'MISC', 'text': 'p53'}", "{'text': 'p53', 'value': 'MISC'}"]
["{'text': 'Queens County', 'value': 'LOCATION'}", "{'value': 'LOCATION', 'text': 'New York'}", "{'text': 'Richard Hewlett', 'value': 'PERSON'}", '{\'text\': "DeLancey\'s Brigade", \'value\': \'ORGANIZATION\'}', "{'value': 'LOCATI

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  original_row['label'] = str(row['label'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  original_row['test'] = row['test']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  original_row['label'] = str(row['label'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_

["{'text': 'Azerbaijan', 'value': 'LOCATION'}", "{'text': 'Football World Championship', 'value': 'EVENT'}", "{'text': 'Armenia', 'value': 'LOCATION'}"]
["{'text': 'Ras Dashen', 'value': 'LOCATION'}", "{'text': 'Mount Bwahit', 'value': 'LOCATION'}"]
["{'text': 'Soweto', 'value': 'LOCATION'}", "{'value': 'LOCATION', 'text': 'Gauteng'}", "{'text': 'Protea Primary School', 'value': 'ORGANIZATION'}", "{'text': 'Meadowlands High School', 'value': 'ORGANIZATION'}", "{'text': 'Orlando West High School', 'value': 'ORGANIZATION'}"]
["{'text': 'Tirana', 'value': 'LOCATION'}", "{'value': 'ORGANIZATION', 'text': 'Socialist Party of Albania'}"]
["{'text': 'Cochabamba', 'value': 'LOCATION'}", "{'text': 'Movement for Socialism party', 'value': 'ORGANIZATION'}"]
[]
["{'text': 'Guangzhou', 'value': 'LOCATION'}", "{'text': 'Communist Party of China', 'value': 'ORGANIZATION'}"]
["{'text': 'South Sudan', 'value': 'LOCATION'}", "{'text': 'CECAFA Cup', 'value': 'EVENT'}", "{'text': 'Uganda', 'value': 'LOCAT

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  original_row['label'] = str(row['label'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  original_row['test'] = row['test']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  original_row['label'] = str(row['label'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_

["{'text': 'Adriatic fish', 'value': 'MISC'}", "{'text': 'Montenegro', 'value': 'LOCATION'}"]
["{'text': 'Jellyfish', 'value': 'NON-ENTITY'}", "{'text': 'Andaman Sea', 'value': 'LOCATION'}"]
["{'text': 'Tilapia', 'value': 'MISC'}", '{\'text\': "Côte d\'Ivoire", \'value\': \'LOCATION\'}']
["{'text': 'Asia-Plus', 'value': 'ORGANIZATION'}", "{'value': 'ORGANIZATION', 'text': 'Khovar'}", "{'text': 'Faraj', 'value': 'ORGANIZATION'}", "{'value': 'ORGANIZATION', 'text': 'Ozodagon'}", "{'text': 'Nigoh', 'value': 'ORGANIZATION'}", "{'value': 'ORGANIZATION', 'text': 'Jumhuriyat'}", "{'text': 'Tojikiston', 'value': 'ORGANIZATION'}", "{'value': 'ORGANIZATION', 'text': 'Avesta'}", "{'value': 'ORGANIZATION', 'text': 'Varorud'}"]
["{'text': 'Pelusium', 'value': 'LOCATION'}", "{'text': 'Ahmose', 'value': 'PERSON'}", "{'value': 'PERSON', 'text': 'Cleopatra'}", "{'value': 'LOCATION', 'text': 'Pelusium'}", "{'value': 'LOCATION', 'text': 'Damietta'}"]
["{'text': 'Ashgabat', 'value': 'LOCATION'}", "{'value

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  original_row['label'] = str(row['label'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  original_row['test'] = row['test']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  original_row['label'] = str(row['label'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_

['{\'value\': \'PRODUCT\', \'text\': "Morin Khuur extended 4\'\'"}', "{'value': 'PERSON', 'text': 'Batbold'}", "{'value': 'NON-ENTITY', 'text': 'Mongolian horse'}", "{'text': 'high-endurance trait', 'value': 'NON-ENTITY'}"]
['{\'value\': \'PRODUCT\', \'text\': "Sofia Special 4\'\'"}', "{'value': 'PERSON', 'text': 'Georgi'}", "{'text': 'Balkan GT', 'value': 'PRODUCT'}", "{'text': 'V8 engine', 'value': 'NON-ENTITY'}"]
["{'text': 'New York', 'value': 'LOCATION'}", "{'value': 'ORGANIZATION', 'text': 'United States House of Representatives'}"]
["{'text': 'Vin', 'value': 'PERSON'}", "{'text': 'Siddhi', 'value': 'PERSON'}", "{'value': 'PERSON', 'text': 'Rudra'}"]
["{'value': 'LOCATION', 'text': 'Kadavu Province'}", "{'text': 'Fiji', 'value': 'LOCATION'}", "{'text': 'Lau', 'value': 'LOCATION'}", "{'value': 'LOCATION', 'text': 'Lomaiviti'}", "{'text': 'Rotuma', 'value': 'LOCATION'}"]
["{'text': 'United States House of Representatives', 'value': 'ORGANIZATION'}", "{'text': 'New York', 'value': '

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  original_row['label'] = str(row['label'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  original_row['test'] = row['test']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  original_row['label'] = str(row['label'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_

["{'text': 'hypertension', 'value': 'MISC'}", "{'value': 'MISC', 'text': 'cardiac failure,'}", "{'text': 'pre-eclampsia', 'value': 'MISC'}"]
["{'text': 'Back to Basics', 'value': 'ART'}"]
["{'text': 'amino acid tyrosine', 'value': 'MISC'}", "{'text': 'PO43−', 'value': 'MISC'}"]
["{'text': 'Boeing B-52', 'value': 'PRODUCT'}"]
["{'value': 'LOCATION', 'text': 'Hunt'}", "{'text': 'Dundee', 'value': 'LOCATION'}", "{'value': 'LOCATION', 'text': 'Omaha'}", "{'text': 'Benson', 'value': 'LOCATION'}", "{'text': 'Keystone', 'value': 'LOCATION'}"]
["{'text': 'Sturdza', 'value': 'PERSON'}"]
["{'text': 'Del Imaginario Discos', 'value': 'ART'}", "{'text': 'Argentina', 'value': 'LOCATION'}"]
["{'text': 'Pune', 'value': 'ORGANIZATION'}", "{'text': 'Amrinder', 'value': 'PERSON'}"]
["{'text': 'Del Imaginario Discos', 'value': 'ORGANIZATION'}", "{'text': 'Argentina', 'value': 'LOCATION'}"]
["{'text': 'Hunt', 'value': 'LOCATION'}", "{'text': 'Omaha', 'value': 'LOCATION'}", "{'value': 'LOCATION', 'text': 'D

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  original_row['label'] = str(row['label'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  original_row['test'] = row['test']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  original_row['label'] = str(row['label'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_

In [95]:
df_ner_phase2 = pd.DataFrame(data = rows_ner)
df_ner_phase2.drop_duplicates(subset=['modified_text'], inplace=True)

In [97]:
list(df_ner_phase2['label'])[3]

'["{\'text\': \'Reform Jews\', \'value\': \'MISC\'}", "{\'value\': \'ORGANIZATION\', \'text\': \'Conservative Jews\'}", "{\'text\': \'Jewish\', \'value\': \'MISC\'}"]'

In [98]:
list(df_ner_phase2['modified_text'])[3]


'In particular, Reform Jews and Conservative Jews may be lampooned for their rejection of traditional Jewish beliefs.'

In [99]:
df_ner_task = df_ner_phase2.groupby('test')
for group, frame in df_ner_task:
    to_write = '../data_after_phase2/rahmad/' + group + '.json'
    obj = frame.to_dict(orient='records')
    print(obj)
    with open(to_write,'w') as f:
        json.dump(obj, f, indent=2)
    # break

[{'original_text': 'Internal conflicts, especially between Covaci and Baniciu, were making headlines in the media.', 'modified_text': 'Internal conflicts, especially between Covaci and Baniciu, were escalating and making headlines in the media.', 'label': '["{\'value\': \'PERSON\', \'text\': \'Covaci\'}", "{\'value\': \'PERSON\', \'text\': \'Baniciu\'}"]', 'original_text_highlight_spans': [], 'modified_text_highlight_spans': [[64, 79]], 'type': 'coordinating_conjunction', 'Label': [{'Covaci': 'PERSON'}, {'Baniciu': 'PERSON'}], 'Rationale': "Added a new verb phrase 'escalating' to the existing verb phrase 'making headlines in the media' with the coordinating conjunction 'and' to test the model's ability to handle complex verb phrases.", 'test': 'coordinating_conjunction'}, {'original_text': 'Elin Hilderbrand is an American writer mostly of romance novels.', 'modified_text': 'Elin Hilderbrand and Nicholas Sparks are American writers mostly of romance novels.', 'label': '["{\'value\': \'P