In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score

In [None]:
from google.colab import files
files.upload()

In [None]:
bert_labels = pd.read_csv('bert_labels_full.csv')
unhealthy_full = pd.read_csv('unhealthy_full.csv')

In [None]:
attributes = [
    'antagonise' , 'condescending', 'dismissive', 'generalisation',
    'generalisation_unfair', 'healthy', 'hostile', 'sarcastic']

In [None]:
label_ids = bert_labels._unit_id
unhealthy_test = unhealthy_full[unhealthy_full['_unit_id'].isin(label_ids)]
unhealthy_test['concat_id'] = unhealthy_test.apply(lambda x: str(x['_unit_id']) + str(x['_worker_id']), axis=1)

In [None]:
unhealthy_test.sort_values(by='_unit_id',inplace=True,ignore_index=True)
bert_labels.sort_values(by='_unit_id',inplace=True,ignore_index=True)

In [None]:
human_auc_dict = {attribute: [] for attribute in attributes}
bert_auc_dict = {attribute: [] for attribute in attributes}

In [None]:
def generate_aucs(unhealthy_test, bert_labels, human_auc_dict, bert_auc_dict):
  random_worker = unhealthy_test.groupby('_unit_id', as_index=False).apply(lambda x: x.loc[np.random.choice(x.index)])
  other_workers = unhealthy_test[~unhealthy_test['concat_id'].isin(random_worker.concat_id)]

  agg_dict = {attribute: 'mean' for attribute in attributes}
  agg_dict.update({'comment': 'first'})
  other_workers_agg = other_workers.groupby('_unit_id', as_index=False).agg(agg_dict)

  assert all(other_workers_agg._unit_id == random_worker._unit_id)
  assert all(other_workers_agg._unit_id == bert_labels._unit_id)

  for attribute in attributes:
    other_workers_agg['binary_' + attribute] = other_workers_agg[attribute] > 0.5
    #print(attribute)
    human_auc = roc_auc_score(other_workers_agg['binary_' + attribute],random_worker[attribute])
    #print(f'Human AUC: {human_auc}')
    human_auc_dict[attribute].append(human_auc)
    bert_auc = roc_auc_score(other_workers_agg['binary_'+ attribute], bert_labels['pred_' + attribute])
    #print(f'BERT AUC: {bert_auc}')
    bert_auc_dict[attribute].append(bert_auc)

In [None]:
for i in range(5):
  print(i)
  generate_aucs(unhealthy_test, bert_labels, human_auc_dict, bert_auc_dict)

In [None]:
human_auc_dict

In [None]:
for attribute in attributes:
  print(attribute)
  avg_human_auc = np.mean(human_auc_dict[attribute])
  print(f'Average Human AUC: {avg_human_auc}')
  avg_bert_auc = np.mean(bert_auc_dict[attribute])
  print(f'Average BERT AUC: {avg_bert_auc}')