#Data Helper

In [0]:
from google.colab import drive
drive.mount('/content/drive')
folder = "data/"  # set your path of data

In [0]:
import pandas as pd
import numpy as np

def loaddata(labelfilename,gtfilename):
  gtdf = pd.read_csv(gtfilename,sep='\t',encoding='utf-8')

  sentences = gtdf['sentence'].values.tolist()
  truelabels = {}
  for idx,row in gtdf.iterrows():
    truelabels[row['sentence']] = row['trueanswer'] 

  labeldf = pd.read_csv(labelfilename,sep='\t',encoding='utf-8')

  workers = []
  swlabels = []
  
  for idx,row in labeldf.iterrows():
    worker = row['worker']
    if worker not in workers:
      workers.append(worker)
    workerid = workers.index(worker)
    sentenceid = sentences.index(row['sentence'])
    label = row['workeranswer'].strip()
    swlabels.append((sentenceid,workerid,label))
  
  return (workers,sentences,swlabels,truelabels)

def labelformatconversion(workers,sentences,swlabels):
  wlabelidlists = {}
  wsentenceidlists = {}
  for worker in workers:
    wlabelidlists[worker] = []
    wsentenceidlists[worker] = []
    
  slabelidlists = {}
  sworkeridlists = {}
  for sentence in sentences:
    slabelidlists[sentence] = []
    sworkeridlists[sentence] = []
    
  labellist = []
  labelidx = 0
  for (sentenceid,workerid,label) in swlabels:
    labellist.append(label)
    sentence = sentences[sentenceid]
    slabelidlists[sentence].append(labelidx)
    sworkeridlists[sentence].append(workerid)
    worker = workers[workerid]
    wlabelidlists[worker].append(labelidx)
    wsentenceidlists[worker].append(sentenceid)
    labelidx += 1
  
  return (labellist,slabelidlists,sworkeridlists,wlabelidlists,wsentenceidlists)

def truelabelformatonversion(sentences,truelabels):
  truelabellist = []
  for i in range(len(sentences)):
    sentence = sentences[i]
    truelabellist.append(truelabels[sentence])
    
  return truelabellist

# Universal Sentence Encoder

In [0]:
# Install the latest Tensorflow version.
!pip3 install --quiet "tensorflow>=1.7"
# Install TF-Hub.
!pip3 install --quiet tensorflow-hub
!pip3 install --quiet seaborn

In [0]:
import tensorflow as tf
import tensorflow_hub as hub
import os
import re

In [0]:
module_url = "https://tfhub.dev/google/universal-sentence-encoder/2" #@param ["https://tfhub.dev/google/universal-sentence-encoder/2", "https://tfhub.dev/google/universal-sentence-encoder-large/3"]

# Evaluation Method

In [0]:
DEFAULT_SIM_TYPE = 'COSINE'
def similarity(embed1, embed2, simtype = DEFAULT_SIM_TYPE):
  if (simtype == 'COSINE'):
    l1 = np.sqrt(np.sum(embed1**2))
    l2 = np.sqrt(np.sum(embed2**2))
    sim = np.inner(embed1,embed2) / (l1*l2)
  
  return sim

## Embedding

In [0]:
def evaluationbyEmbedding(sentences, elabels, label_embeddings, truelabels, truelabel_embeddings):
  totalsim = 0
  for sentence in elabels:
    elabel_embedding = label_embeddings[elabels[sentence]]
    truelabel_embedding = truelabel_embeddings[sentences.index(sentence)]
    totalsim += similarity(elabel_embedding,truelabel_embedding)
    
  #print(len(elabels),totalsim/len(elabels))
  return totalsim/len(elabels)
    

## GLEU

In [0]:
#https://github.com/gcunhase/NLPMetrics
#https://colab.research.google.com/github/gcunhase/NLPMetrics/blob/master/notebooks/gleu.ipynb

import nltk
import nltk.translate.gleu_score as gleu

try:
  nltk.data.find('tokenizers/punkt')
except LookupError:
  nltk.download('punkt')

def evaluationbyGLEU(sentences, labels, elabelidxs, truelabels):
  n = len(sentences)
  sim = 0
  for i in range(n):
    sentence = sentences[i]
    labelidx = elabelidxs[sentence]
    label = labels[labelidx]
    truelabel = truelabels[sentence]
    sim += gleu.sentence_gleu([truelabel.split()], label.split())
  #print(n,sim/n)
  return sim/n

#Methods

## SMV

In [0]:
def SMVSelectionAndEvaluationbyEmbedding(sentences, slabelidlists, label_embeddings, truelabel_embeddings):
  total_sim = 0
  for i in range(len(sentences)):
    sentence = sentences[i]
    truelabel_embedding = truelabel_embeddings[sentences.index(sentence)]
    slabel_embeddings = label_embeddings[slabelidlists[sentence]]
    onetotal_sim = 0
    for label_embedding in slabel_embeddings:
      onetotal_sim += similarity(label_embedding,truelabel_embedding)
    total_sim += onetotal_sim/len(slabel_embeddings)
    
  #print(len(sentences),total_sim/len(sentences))
  return total_sim/len(sentences)

In [0]:
def SMVSelectionAndEvaluationbyGLEU(sentences, slabelidlists, labels, truelabels):
  total_sim = 0
  for i in range(len(sentences)):
    sentence = sentences[i]
    truelabel = truelabels[sentences.index(sentence)]
    slabels = labels[slabelidlists[sentence]]
    onetotal_sim = 0
    for label in slabels:
      onetotal_sim += gleu.sentence_gleu([truelabel.split()], label.split())
    total_sim += onetotal_sim/len(slabels)
    
  #print(len(sentences),total_sim/len(sentences))
  return total_sim/len(sentences)

##SMS

In [0]:
def SMSSelectionOne(labelidlist, slabel_embeddings):
  labelnum = len(slabel_embeddings)
  simmat = np.zeros((labelnum,labelnum))
  for i in range(labelnum):
    for j in range(labelnum):
      simmat[i][j] = similarity(slabel_embeddings[i],slabel_embeddings[j])
      
  sim = np.sum(simmat,axis=1)
  maxlabelidx = np.argmax(sim)
  return labelidlist[maxlabelidx]

def SMSSelection(sentences, slabelidlists, label_embeddings):
  elabels = {}
  for i in range(len(sentences)):
    sentence = sentences[i]
    slabel_embeddings = label_embeddings[slabelidlists[sentence]]
    elabel = SMSSelectionOne(slabelidlists[sentence], slabel_embeddings)
    elabels[sentence] = elabel
  return elabels

## RASA

In [0]:
from scipy.stats import chi2

def RASAInference(workers, sentences, slabelidlists, sworkeridlists, wlabelidlists, wsentenceidlists, label_embeddings):
  max_ite = 1000
  
  # initilizing the estimated embedding by using SMV
  estimated_embeddings = []
  for i in range(len(sentences)):
    sentence = sentences[i]
    slabel_embeddings = label_embeddings[slabelidlists[sentence]]
    estimated_embeddings.append(np.mean(slabel_embeddings, axis=0))
  estimated_embeddings = np.asarray(estimated_embeddings)
  
  w_num = len(workers)
  reliability = np.zeros(w_num)
  s_num = len(sentences)
  
  ite = 0
  eb_change = 1
  while (ite <= max_ite) and (eb_change > 0.0000001):
    old_estimated_embeddings = np.copy(estimated_embeddings)
    
    # compute reliability
    for j in range(w_num): 
      worker = workers[j]
      westimated_embeddings = estimated_embeddings[wsentenceidlists[worker]]
      nw = len(wsentenceidlists[worker])
      chiw = chi2.isf(q=0.025, df=nw)
      wlabelidlist = wlabelidlists[worker]
      wlabel_embeddings = label_embeddings[wlabelidlist]
      diff_embedding = (westimated_embeddings - wlabel_embeddings)
      if (np.sum(diff_embedding) == 0):
        reliability[j] = 1
      else: 
        reliability[j] = chiw / np.sum(diff_embedding*diff_embedding)
    
    # compute estimated embeddings
    for i in range(s_num):
      sentence = sentences[i]
      slabelidlist = slabelidlists[sentence]
      slabel_embeddings = label_embeddings[slabelidlist]
      sworkeridlist = sworkeridlists[sentence]
      sworkeridrelaiblity = reliability[sworkeridlist]
      for j in range(len(sworkeridrelaiblity)):
        slabel_embeddings[j] = slabel_embeddings[j] * sworkeridrelaiblity[j]
      estimated_embeddings[i] = np.sum(slabel_embeddings, axis=0) / np.sum(sworkeridrelaiblity)
      
    eb_diff = (estimated_embeddings - old_estimated_embeddings)
    eb_change = np.sum(eb_diff * eb_diff)
    ite += 1
  
  return (estimated_embeddings,reliability)

def RASASelectionOne(estimated_embedding, labelidlist, label_embeddings):
  slabel_num = len(labelidlist)
  slabel_embeddings = label_embeddings[labelidlist]
  simvec = np.zeros(slabel_num)
  for k in range(slabel_num):
    simvec[k] = similarity(estimated_embedding,slabel_embeddings[k])
  maxlabelidx = np.argmax(simvec)
  return labelidlist[maxlabelidx]

def RASASelection(sentences, slabelidlists, label_embeddings, estimated_embeddings):
  elabels = {}
  for i in range(len(sentences)):
      sentence = sentences[i]
      slabelidlist = slabelidlists[sentence]
      elabel = RASASelectionOne(estimated_embeddings[i], slabelidlist, label_embeddings)
      elabels[sentence] = elabel
  return elabels

##Optimal

In [0]:
def OptimalSelectionAndEvaluationbyEmbedding(sentences, slabelidlists, label_embeddings, truelabel_embeddings):
  total_sim = 0
  for i in range(len(sentences)):
    sentence = sentences[i]
    truelabel_embedding = truelabel_embeddings[sentences.index(sentence)]
    slabel_embeddings = label_embeddings[slabelidlists[sentence]]
    ns = len(slabel_embeddings)
    sims = np.zeros(ns)
    for k in range(ns):
      label_embedding = slabel_embeddings[k]
      sims[k] = similarity(label_embedding,truelabel_embedding)
    maxsim = np.max(sims)
    total_sim += maxsim
  #print(len(sentences),total_sim/len(sentences))
  return total_sim/len(sentences)

In [0]:
def OptimalSelectionAndEvaluationbyGLEU(sentences, slabelidlists, labels, truelabels):
  total_sim = 0
  for i in range(len(sentences)):
    sentence = sentences[i]
    truelabel = truelabels[sentences.index(sentence)]
    slabels = labels[slabelidlists[sentence]]
    ns = len(slabels)
    sims = np.zeros(ns)
    for k in range(ns):
      label = slabels[k]
      sims[k] = gleu.sentence_gleu([truelabel.split()], label.split())
    maxsim = np.max(sims)
    total_sim += maxsim
  #print(len(sentences),total_sim/len(sentences))
  return total_sim/len(sentences)

#Experiments

In [0]:
def run(labelfilename,gtfilename):
  # labellist: all labels for all sentences by all workers
  # slabelidlists: label id list for each sentence
  # sworkeridlists: worker id list for each sentence
  # wlabelidlists: label id list for each worker
  # wsentenceidlists: sentence id list for each worker
  (workers,sentences,swlabels,truelabels) = loaddata(labelfilename,gtfilename)
  (labellist,slabelidlists,sworkeridlists,wlabelidlists,wsentenceidlists) = labelformatconversion(workers,sentences,swlabels)
  (truelabellist) = truelabelformatonversion(sentences,truelabels)

  # label_embeddings: embeddings of all labels of workers
  # truelabel_embeddings: embeddings of all true labels

  # Import the Universal Sentence Encoder's TF Hub module
  embed = hub.Module(module_url)

  # Reduce logging output.
  tf.logging.set_verbosity(tf.logging.ERROR)

  with tf.Session() as session:
    session.run([tf.global_variables_initializer(), tf.tables_initializer()])
    label_embeddings = session.run(embed(labellist))

  with tf.Session() as session:
    session.run([tf.global_variables_initializer(), tf.tables_initializer()])
    truelabel_embeddings = session.run(embed(truelabellist))

  # print('Optimal')
  perf_opt_embedding = OptimalSelectionAndEvaluationbyEmbedding(sentences, slabelidlists, label_embeddings, truelabel_embeddings)
  perf_opt_gleu = OptimalSelectionAndEvaluationbyGLEU(sentences, slabelidlists, np.asarray(labellist), np.asarray(truelabellist)) 

  # print('SMV')
  perf_smv_embedding = SMVSelectionAndEvaluationbyEmbedding(sentences, slabelidlists, label_embeddings, truelabel_embeddings)
  perf_smv_gleu = SMVSelectionAndEvaluationbyGLEU(sentences, slabelidlists, np.asarray(labellist), np.asarray(truelabellist))


  # print('SMS')
  LSNNelabels = SMSSelection(sentences, slabelidlists, label_embeddings)
  perf_sms_embedding = evaluationbyEmbedding(sentences, LSNNelabels, label_embeddings, truelabels, truelabel_embeddings)
  perf_sms_gleu = evaluationbyGLEU(sentences, np.asarray(labellist), LSNNelabels, truelabels)

  # print('RASA')
  (estimated_embeddings, reliability) = RASAInference(workers, sentences, slabelidlists, sworkeridlists, wlabelidlists, wsentenceidlists, label_embeddings)
  LSCATDelabels = RASASelection(sentences, slabelidlists, label_embeddings, estimated_embeddings)
  perf_rasa_embedding = evaluationbyEmbedding(sentences, LSCATDelabels, label_embeddings, truelabels, truelabel_embeddings)
  perf_rasa_gleu = evaluationbyGLEU(sentences, np.asarray(labellist), LSCATDelabels, truelabels)

  print("Evaluation,SMV,SMS,RASA,Optimal")
  print("Embedding,%.4f,%.4f,%.4f,%.4f" % (perf_smv_embedding,perf_sms_embedding,perf_rasa_embedding,perf_opt_embedding))
  print("GLEU,%.4f,%.4f,%.4f,%.4f" % (perf_smv_gleu,perf_sms_gleu,perf_rasa_gleu,perf_opt_gleu))

In [0]:
# J1
labelfilename = folder + 'CrowdWSA2019_J1_label_anonymous.tsv'
gtfilename = folder + 'CrowdWSA2019_J1_gt.tsv'

print("Data:J1")
run(labelfilename,gtfilename)

Data:J1
Evaluation,SMV,SMS,RASA,Optimal
Embedding,0.7354,0.7969,0.7914,0.8853
GLEU,0.1930,0.2627,0.2519,0.4990


In [0]:
# T1
labelfilename = folder + 'CrowdWSA2019_T1_label_anonymous.tsv'
gtfilename = folder + 'CrowdWSA2019_T1_gt.tsv'

print("Data:T1")
run(labelfilename,gtfilename)

Data:T1
Evaluation,SMV,SMS,RASA,Optimal
Embedding,0.7851,0.8377,0.8451,0.9047
GLEU,0.1740,0.2194,0.2296,0.3698


In [0]:
# T2
labelfilename = folder + 'CrowdWSA2019_T2_label_anonymous.tsv'
gtfilename = folder + 'CrowdWSA2019_T2_gt.tsv'

print("Data:T2")
run(labelfilename,gtfilename)

Data:T2
Evaluation,SMV,SMS,RASA,Optimal
Embedding,0.7696,0.8288,0.8339,0.8986
GLEU,0.1616,0.2170,0.2345,0.3637
