# Hate Speech and Offensive Language Detection
This notebook runs our experiments on the Hate Speech and Offensive Language Detection tweet dataset.
WARNING: dataset contains offensive language.

##  AAE detection model
As described in our paper, we use the model from https://github.com/slanglab/twitteraae to detect the dialect of each tweet, we only need the model files from the repo which we have conveniently copied in the same folder.

In [None]:
from __future__ import division
import numpy as np
import sys,os
import numpy as np
import torch
from torchtext import data
from torchtext import datasets
import time
import random
vocabfile = "twitteraae_models/model_vocab.txt" # change path if needed, path inside twitteraae repo is twitteraae/model/model_vocab.txt
modelfile = "twitteraae_models/model_count_table.txt" # change path if needed, path inside twitteraae repo is twitteraae/model/model_vocab.txt

# the following functions are copied from twitteraae for convenience
K=0; wordprobs=None; w2num=None

def load_model():
    """Idempotent"""
    global vocab,w2num,N_wk,N_k,wordprobs,N_w,K, modelfile,vocabfile
    if wordprobs is not None:
        # assume already loaded
        return

    N_wk = np.loadtxt(modelfile)
    N_w = N_wk.sum(1)
    N_k = N_wk.sum(0)
    K = len(N_k)
    wordprobs = (N_wk + 1) / N_k

    vocab = [L.split("\t")[-1].strip() for L in open(vocabfile,encoding="utf8")]
    w2num = {w:i for i,w in enumerate(vocab)}
    assert len(vocab) == N_wk.shape[0]

def infer_cvb0(invocab_tokens, alpha, numpasses):
    global K,wordprobs,w2num
    doclen = len(invocab_tokens)

    # initialize with likelihoods
    Qs = np.zeros((doclen, K))
    for i in range(0,doclen):
        w = invocab_tokens[i]
        Qs[i,:] = wordprobs[w2num[w],:]
        Qs[i,:] /= Qs[i,:].sum()
    lik = Qs.copy()  # pertoken normalized but proportionally the same for inference

    Q_k = Qs.sum(0)
    for itr in range(1,numpasses):
        # print "cvb0 iter", itr
        for i in range(0,doclen):
            Q_k -= Qs[i,:]
            Qs[i,:] = lik[i,:] * (Q_k + alpha)
            Qs[i,:] /= Qs[i,:].sum()
            Q_k += Qs[i,:]

    Q_k /= Q_k.sum()
    return Q_k

def predict_lang(tokens, alpha=1, numpasses=5, thresh1=1, thresh2=0.2):
    invocab_tokens = [w.lower() for w in tokens if w.lower() in w2num]
    # check that at least xx tokens are in vocabulary
    if len(invocab_tokens) < thresh1:
        return None  
    # check that at least yy% of tokens are in vocabulary
    elif len(invocab_tokens) / len(tokens) < thresh2:
        return None
    else:
        posterior = infer_cvb0(invocab_tokens, alpha=alpha, numpasses=numpasses)
        return posterior


In [None]:
# this loads the twitteraae model for detection
load_model()


We load the dataset 'labeled_data.csv', available at https://github.com/t-davidson/hate-speech-and-offensive-language, for convenience we copy it to this repo.

In [None]:
# for cnn

labeled_data_path = "data/labeled_data.csv" # change path if needed

TEXT = data.Field(tokenize = 'spacy', batch_first = True)
LABEL = data.LabelField(dtype = torch.long, sequential=False, use_vocab=False)
EXPERT = data.LabelField(dtype = torch.long,  sequential=False, use_vocab=False)
GROUP = data.LabelField(dtype = torch.long, sequential=False, use_vocab=False)
EXPERTLABEL = data.LabelField(dtype = torch.long, sequential=False, use_vocab=False)

fields = [(None, None),(None, None),('expertlabel', EXPERTLABEL),('group', GROUP),('expert', EXPERT),
          ('label', LABEL), ('text', TEXT)]

train_data_orig = data.TabularDataset.splits(
                                        path = '',
                                        train = labeled_data_path,
                                        format = 'csv',
                                        fields = fields,
                                        skip_header = True)


Augment data with expert predictions and demographic data

In [None]:
# build expert data
all_data = train_data_orig[0]

p = 0.75 # expert probability of being correct for AA tweeet
q = 0.9 # expert probability of being correct for AA tweeet

# tracker variables for statistics
sum = 0
total = 0
i = 0
aa_frac = 0
for example in all_data:
    lang = predict_lang(vars(example)['text'])
    aa = 0
    try:
        if lang[0] >= 0.5:
            aa = 1
    except:
        print("error processing tweet: "+str(vars(example)['text']))
    label = vars(example)['label']
    exp = 0 # 0: expert wrong, 1: expert is right
    exp_label = 0
    if aa == 1: # if tweet is african american

        coin = np.random.binomial(1,p)
        if coin:
            exp =1 
            exp_label = np.long(label)
        else:
            exp_label = np.long(np.argmax(np.random.multinomial(1,[1/3]*3 , size=1)))
            exp =0
    else:
        coin = np.random.binomial(1,q)
        if coin:
            exp =1 # is right 90% of time
            exp_label = np.long(label)
        else:
            exp_label = np.long(np.argmax(np.random.multinomial(1,[1/3]*3 , size=1)))
            exp =0
    #if label =='2' : # 2: neither, 1: offensive, 0: hate speech
    #    aa = 1
    vars(all_data[i])['expertlabel'] = exp_label
    vars(all_data[i])['group'] = str(aa)
    vars(all_data[i])['expert'] = exp
    aa_frac += aa
    i += 1
    total +=1
    sum += exp
#print(sum/total)
#print(aa_frac/total)


Build data for Pytorch and vectorize, this requires the glove.6b.100d embeddings which will be downloaded (862mb).

In [None]:

LABEL.build_vocab(all_data)
EXPERT.build_vocab(all_data)
GROUP.build_vocab(all_data)
EXPERTLABEL.build_vocab(all_data)
MAX_VOCAB_SIZE = 25_000

TEXT.build_vocab(all_data, 
                 max_size = MAX_VOCAB_SIZE, 
                 vectors = "glove.6B.100d", 
                 unk_init = torch.Tensor.normal_)



Split the data for train, test and validation.

In [None]:

train_data, test_data, valid_data  = all_data.split(split_ratio=[0.6,0.1,0.3])

BATCH_SIZE = 64

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, valid_data, test_data), 
    sort = False,
    batch_size = BATCH_SIZE, 
    device = device)


The following code is of two parts:
1) the first part goes through our method and baselines to get results
2) the second combines all models to get std and confidence intervals, but need to go through the first part

# Build model
Model definitions for sentiment analysis adapted from https://github.com/bentrevett/pytorch-sentiment-analysis/blob/master/3%20-%20Faster%20Sentiment%20Analysis.ipynb

In [None]:
import torch.nn as nn
import torch.nn.functional as F


class CNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, n_filters, filter_sizes, output_dim, 
                 dropout, pad_idx):
        
        super().__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx = pad_idx)
        
        self.conv_0 = nn.Conv2d(in_channels = 1, 
                                out_channels = n_filters, 
                                kernel_size = (filter_sizes[0], embedding_dim))
        
        self.conv_1 = nn.Conv2d(in_channels = 1, 
                                out_channels = n_filters, 
                                kernel_size = (filter_sizes[1], embedding_dim))
        
        self.conv_2 = nn.Conv2d(in_channels = 1, 
                                out_channels = n_filters, 
                                kernel_size = (filter_sizes[2], embedding_dim))
        
        self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, text):
                
        #text = [batch size, sent len]
        
        embedded = self.embedding(text)
                
        #embedded = [batch size, sent len, emb dim]
        
        embedded = embedded.unsqueeze(1)
        
        #embedded = [batch size, 1, sent len, emb dim]
        
        conved_0 = F.relu(self.conv_0(embedded).squeeze(3))
        conved_1 = F.relu(self.conv_1(embedded).squeeze(3))
        conved_2 = F.relu(self.conv_2(embedded).squeeze(3))
            
        #conved_n = [batch size, n_filters, sent len - filter_sizes[n] + 1]
        
        pooled_0 = F.max_pool1d(conved_0, conved_0.shape[2]).squeeze(2)
        pooled_1 = F.max_pool1d(conved_1, conved_1.shape[2]).squeeze(2)
        pooled_2 = F.max_pool1d(conved_2, conved_2.shape[2]).squeeze(2)
        
        #pooled_n = [batch size, n_filters]
        
        cat = self.dropout(torch.cat((pooled_0, pooled_1, pooled_2), dim = 1))

        #cat = [batch size, n_filters * len(filter_sizes)]
            
        return self.fc(cat)

class CNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, n_filters, filter_sizes, output_dim, 
                 dropout, pad_idx):
        
        super().__init__()
                
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx = pad_idx)
        
        self.convs = nn.ModuleList([
                                    nn.Conv2d(in_channels = 1, 
                                              out_channels = n_filters, 
                                              kernel_size = (fs, embedding_dim)) 
                                    for fs in filter_sizes
                                    ])
        
        self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
        self.softmax = nn.Softmax()

    def forward(self, text):
                
        #text = [batch size, sent len]
        
        embedded = self.embedding(text)
                
        #embedded = [batch size, sent len, emb dim]
        
        embedded = embedded.unsqueeze(1)
        
        #embedded = [batch size, 1, sent len, emb dim]
        
        conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs]
            
        #conved_n = [batch size, n_filters, sent len - filter_sizes[n] + 1]
                
        pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
        
        #pooled_n = [batch size, n_filters]
        
        cat = self.dropout(torch.cat(pooled, dim = 1))

        #cat = [batch size, n_filters * len(filter_sizes)]
        
        out = self.fc(cat)
        out = self.softmax(out)
        return out

class CNN_rej(nn.Module):
    def __init__(self, vocab_size, embedding_dim, n_filters, filter_sizes, output_dim, 
                 dropout, pad_idx):
        
        super().__init__()
                
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx = pad_idx)
        
        self.convs = nn.ModuleList([
                                    nn.Conv2d(in_channels = 1, 
                                              out_channels = n_filters, 
                                              kernel_size = (fs, embedding_dim)) 
                                    for fs in filter_sizes
                                    ])
        
        self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
        self.embedding_rej = nn.Embedding(vocab_size, embedding_dim, padding_idx = pad_idx)
        
        self.convs_rej = nn.ModuleList([
                                    nn.Conv2d(in_channels = 1, 
                                              out_channels = n_filters, 
                                              kernel_size = (fs, embedding_dim)) 
                                    for fs in filter_sizes
                                    ])
        
        self.fc_rej = nn.Linear(len(filter_sizes) * n_filters, 1)
        
        self.dropout_rej = nn.Dropout(dropout)
        
        self.softmax = nn.Softmax()

    def forward(self, text):
                
        #text = [batch size, sent len]
        
        embedded = self.embedding(text)
                
        #embedded = [batch size, sent len, emb dim]
        
        embedded = embedded.unsqueeze(1)
        
        #embedded = [batch size, 1, sent len, emb dim]
        
        conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs]
            
        #conved_n = [batch size, n_filters, sent len - filter_sizes[n] + 1]
                
        pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
        
        #pooled_n = [batch size, n_filters]
        
        cat = self.dropout(torch.cat(pooled, dim = 1))

        embedded_rej = self.embedding_rej(text)
                
        #embedded = [batch size, sent len, emb dim]
        
        embedded_rej = embedded.unsqueeze(1)
        
        #embedded = [batch size, 1, sent len, emb dim]
        
        conved_rej = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs_rej]
            
        #conved_n = [batch size, n_filters, sent len - filter_sizes[n] + 1]
                
        pooled_rej = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
        
        #pooled_n = [batch size, n_filters]
        
        cat_rej = self.dropout_rej(torch.cat(pooled, dim = 1))

        out_rej = self.fc_rej(cat_rej)
        #cat = [batch size, n_filters * len(filter_sizes)]
        
        out = self.fc(cat)
        out =  torch.cat((out, out_rej), 1)

        out = self.softmax(out)
        return out


In [None]:
# build model
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100 # fixed
N_FILTERS = 300 # hyperparameterr
FILTER_SIZES = [3,4,5]
OUTPUT_DIM = 4
DROPOUT = 0.5
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

#model = CNN(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, PAD_IDX)
model = CNN_rej(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, 3, DROPOUT, PAD_IDX)

pretrained_embeddings = TEXT.vocab.vectors

model.embedding.weight.data.copy_(pretrained_embeddings)
UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]

model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)

## Train the Model

In [None]:
import torch.optim as optim

optimizer = optim.Adam(model.parameters())
model = model.to(device)


In [None]:

def categorical_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """
    max_preds = preds.argmax(dim = 1, keepdim = True) # get the index of the max probability
    correct = max_preds.squeeze(1).eq(y)
    return correct.sum() / torch.FloatTensor([y.shape[0]]).to(device)
def reject_CrossEntropyLoss(outputs, m, labels, m2, n_classes):
    '''
    The L_{CE} loss implementation for hatespeech, identical to CIFAR implementation
    ----
    outputs: network outputs
    m: cost of deferring to expert cost of classifier predicting (I_{m =y})
    labels: target
    m2:  cost of classifier predicting (alpha* I_{m\neq y} + I_{m =y})
    n_classes: number of classes
    '''
    batch_size = outputs.size()[0]            # batch_size
    rc = [n_classes] * batch_size
    rc = torch.tensor(rc)
    outputs =  -m*torch.log2( outputs[range(batch_size), rc]) - m2*torch.log2(outputs[range(batch_size), labels])   # pick the values corresponding to the labels
    return torch.sum(outputs)/batch_size

def train_reject(model, iterator, optimizer,alpha):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    for batch in iterator:
        
        optimizer.zero_grad()
        
        predictions = model(batch.text)
        batch_size = predictions.size()[0]
        # get expert predictions and costs 
        m = (batch.expert)*1.0 # expert agreement with label: I_{m=y}
        m2 = [1] * batch_size
        m2 = torch.tensor(m2)
        for j in range (0,batch_size):
            exp = m[j].item()
            if exp:
                m2[j] = alpha
            else:
                m2[j] = 1

        m2 = m2.to(device)

        loss = reject_CrossEntropyLoss(predictions, m, batch.label, m2, 3)

        acc = categorical_accuracy(predictions, batch.label.to(device))
        
        loss.backward()
        
        optimizer.step()
        epoch_loss += loss.item()
        epoch_acc += acc.item()
    return epoch_loss / len(iterator), epoch_acc / len(iterator)
def evaluate_reject(model, iterator):
    
    epoch_loss = 0
    epoch_acc = 0
    model.eval()
    with torch.no_grad():
        for batch in iterator:
            predictions = model(batch.text)
            batch_size = predictions.size()[0]            # batch_size
            m = batch.expert
            m2 = [1] * batch_size
            m2 = torch.tensor(m2)
            m2 = m2.to(device)
            loss = reject_CrossEntropyLoss(predictions, m, batch.label, m2, 3)
            acc = categorical_accuracy(predictions, batch.label)
            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [None]:
def metrics_print(net, loader):
    net.eval()
    correct = 0
    correct_sys = 0
    exp = 0
    exp_total = 0
    total = 0
    real_total = 0
    alone_correct = 0
    with torch.no_grad():
        for data in loader:
            outputs = net(data.text)
            _, predicted = torch.max(outputs.data, 1)
            batch_size = outputs.size()[0]            # batch_size
            for i in range(0,batch_size):
                r = (predicted[i].item() == 3)
                if r==0:
                    total += 1
                    correct += (predicted[i] == data.label[i]).item()
                    correct_sys += (predicted[i] == data.label[i]).item()
                if r==1:
                    exp +=  data.expert[i].item()
                    correct_sys += data.expert[i].item()
                    exp_total+=1
                real_total += 1
    cov = str(total) + str(" out of") + str(real_total)
    to_print={"coverage":cov, "system accuracy": 100*correct_sys/real_total, "expert accuracy":100* exp/(exp_total+0.0002),"classifier accuracy":100*correct/(total+0.0001), "alone classifier": 100*alone_correct/real_total }
    print(to_print)
    return [100*total/real_total,  100*correct_sys/real_total, 100* exp/(exp_total+0.0002),100*correct/(total+0.0001) ]

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs
def metrics_print_fairness(net, loader):
    net.eval()
    group_1 = 0
    group_1_counts = 0
    group_0 = 0
    group_0_counts = 0

    with torch.no_grad():
        for data in loader:
            outputs = net(data.text)
            _, predicted = torch.max(outputs.data, 1)
            batch_size = outputs.size()[0]            # batch_size
            for i in range(0,batch_size):
                r = (predicted[i].item() == 3)
                prediction = 0
                if r==0:
                    prediction = predicted[i]
                if r==1:
                    prediction = data.expertlabel[i].item()

                if  data.group[i].item() == 0:
                    if data.label[i].item() == 2:
                        group_0_counts += 1
                        if prediction == 1 or prediction ==0:
                            group_0 += 1
                else:
                    if data.label[i].item() == 2:
                        group_1_counts += 1
                        if prediction == 1 or prediction ==0:
                            group_1 += 1


    to_print={"group0":group_0/(group_0_counts+0.0001), "group1": group_1/(group_1_counts+0.0001), "discrimination":group_0/(group_0_counts+0.0001)- group_1/(group_1_counts+0.0001) }
    return [group_0/(group_0_counts+0.0001), group_1/(group_1_counts+0.0001), abs(group_0/(group_0_counts+0.0001)- group_1/(group_1_counts+0.0001))]


Train the model by validation over alpha in [0,1] with steps of 0.1

In [None]:
import copy, time 
for i in range(0,11):
    model = CNN_rej(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, 3, DROPOUT, PAD_IDX)

    pretrained_embeddings = TEXT.vocab.vectors

    model.embedding.weight.data.copy_(pretrained_embeddings)
    UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]

    model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
    model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)

    optimizer = optim.Adam(model.parameters())
    criterion = nn.CrossEntropyLoss()

    model = model.to(device)
    alpha = i/10
    N_EPOCHS = 5

    best_valid_loss = 0
    best_model = None
    for epoch in range(N_EPOCHS):

        start_time = time.time()
        train_loss, train_acc = train_reject(model, train_iterator, optimizer, alpha)

        valid_loss = metrics_print(model,valid_iterator)[1]

        end_time = time.time()

        epoch_mins, epoch_secs = epoch_time(start_time, end_time)

        if valid_loss >= best_valid_loss:
            best_valid_loss = valid_loss
            best_model = copy.deepcopy(model)
    

    print(metrics_print(best_model, valid_iterator))

In [None]:
metrics_print_fairness(best_model, test_iterator)

In [None]:
metrics_print(best_model, test_iterator)

# Baseline: Confidence 

In [None]:
class CNN_(nn.Module):
    def __init__(self, vocab_size, embedding_dim, n_filters, filter_sizes, output_dim, 
                 dropout, pad_idx):
        
        super().__init__()
                
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx = pad_idx)
        
        self.convs = nn.ModuleList([
                                    nn.Conv2d(in_channels = 1, 
                                              out_channels = n_filters, 
                                              kernel_size = (fs, embedding_dim)) 
                                    for fs in filter_sizes
                                    ])
        
        self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        

    def forward(self, text):
                
        #text = [batch size, sent len]
        
        embedded = self.embedding(text)
                
        #embedded = [batch size, sent len, emb dim]
        
        embedded = embedded.unsqueeze(1)
        
        #embedded = [batch size, 1, sent len, emb dim]
        
        conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs]
            
        #conved_n = [batch size, n_filters, sent len - filter_sizes[n] + 1]
                
        pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
        
        #pooled_n = [batch size, n_filters]
        
        cat = self.dropout(torch.cat(pooled, dim = 1))

        #cat = [batch size, n_filters * len(filter_sizes)]
        
        out = self.fc(cat)
        return out

## expert model

In [None]:
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
N_FILTERS = 300
FILTER_SIZES = [3,4,5]
OUTPUT_DIM = 2
DROPOUT = 0.5
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

model_expert = CNN_(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, 2, DROPOUT, PAD_IDX)
pretrained_embeddings = TEXT.vocab.vectors

model_expert.embedding.weight.data.copy_(pretrained_embeddings)
UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]

model_expert.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model_expert.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)

In [None]:
import torch.optim as optim

optimizer = optim.Adam(model_expert.parameters())
criterion = nn.CrossEntropyLoss()

model_expert = model_expert.to(device)
criterion = criterion.to(device)

In [None]:
def train_expert(model_exp, iterator, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model_exp.train()
    
    for batch in iterator:
        
        optimizer.zero_grad()
        
        predictions = model_exp(batch.text)

        
        loss = criterion(predictions, batch.expert)
        
        acc = categorical_accuracy(predictions, batch.expert)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

def evaluate_expert(model_exp, iterator, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    model_exp.eval()
    with torch.no_grad():
        for batch in iterator:
            predictions = model_exp(batch.text)
            loss = criterion(predictions, batch.expert)
            acc = categorical_accuracy(predictions, batch.expert)
            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [None]:
N_EPOCHS = 5

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):

    start_time = time.time()
    
    train_loss, train_acc = train_expert(model_expert, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate_expert(model_expert, valid_iterator, criterion)
    
    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model_expert.state_dict(), 'tut3-model.pt')
    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

## classifier model

In [None]:
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
N_FILTERS = 300
FILTER_SIZES = [3,4,5]
OUTPUT_DIM = 3
DROPOUT = 0.5
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

model_class = CNN_(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, PAD_IDX)
pretrained_embeddings = TEXT.vocab.vectors

model_class.embedding.weight.data.copy_(pretrained_embeddings)
UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]

model_class.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model_class.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)
import torch.optim as optim

optimizer = optim.Adam(model_class.parameters())
criterion = nn.CrossEntropyLoss()

model_class = model_class.to(device)
criterion = criterion.to(device)
def train(model_class, iterator, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model_class.train()
    
    for batch in iterator:
        
        optimizer.zero_grad()
        predictions = model_class(batch.text)
        loss = criterion(predictions, batch.label)
        
        acc = categorical_accuracy(predictions, batch.label)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
    return epoch_loss / len(iterator), epoch_acc / len(iterator)
def evaluate(model_class, iterator, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    model_class.eval()
    with torch.no_grad():
        for batch in iterator:
            predictions = model_class(batch.text)
            loss = criterion(predictions, batch.label)
            acc = categorical_accuracy(predictions, batch.label)
            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)    

In [None]:
N_EPOCHS = 5

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):

    start_time = time.time()
    
    train_loss, train_acc = train(model_class, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model_class, valid_iterator, criterion)
    
    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model_class.state_dict(), 'tut3-model.pt')
    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

In [None]:
from scipy.special import softmax


def metrics_print_confid(net_class, net_exp, loader):
    net_class.eval()
    net_exp.eval()
    correct = 0
    correct_sys = 0
    exp = 0
    exp_total = 0
    total = 0
    real_total = 0
    alone_correct = 0
    with torch.no_grad():
        for data in loader:
            outputs_class = net_class(data.text)
            outputs_exp = net_exp(data.text)
            _, predicted = torch.max(outputs_class.data, 1)
            batch_size = outputs_class.size()[0]            # batch_size
            for i in range(0,batch_size):
                arr = [outputs_class.data[i][0].item(),outputs_class.data[i][1].item(),outputs_class.data[i][2].item()]
                arr = softmax(arr)
                r_score = 1 - np.max(arr) #outputs_class.data[i][predicted[i].item()].item()
                arr_exp = [outputs_exp.data[i][0].item(),outputs_exp.data[i][1].item()]
                arr_exp = softmax(arr_exp)
                r_score = r_score - arr_exp[0]
                r = 0
                if r_score >= 0:
                    r = 1
                if r==0:
                    total += 1
                    correct += (predicted[i] == data.label[i]).item()
                    correct_sys += (predicted[i] == data.label[i]).item()
                if r==1:
                    exp +=  data.expert[i].item()
                    correct_sys +=  data.expert[i].item()
                    exp_total+=1
                real_total += 1
    cov = str(total) + str(" out of") + str(real_total)
    to_print={"coverage":cov, "system accuracy": 100*correct_sys/real_total, "expert accuracy":100* exp/(exp_total+0.0002),"classifier accuracy":100*correct/(total+0.0001), "alone classifier": 100*alone_correct/real_total }
    print(to_print)
    return [100*total/real_total,  100*correct_sys/real_total, 100* exp/(exp_total+0.0002),100*correct/(total+0.0001) ]


def metrics_print_confid_fairness(net_class, net_exp, loader):
    net_class.eval()
    net_exp.eval()
    group_1 = 0
    group_1_counts = 0
    group_0 = 0
    group_0_counts = 0
    with torch.no_grad():
        for data in loader:
            outputs_class = net_class(data.text)
            outputs_exp = net_exp(data.text)
            _, predicted = torch.max(outputs_class.data, 1)
            batch_size = outputs_class.size()[0]            # batch_size
            for i in range(0,batch_size):
                arr = [outputs_class.data[i][0].item(),outputs_class.data[i][1].item(),outputs_class.data[i][2].item()]
                arr = softmax(arr)
                r_score = 1 - np.max(arr) #outputs_class.data[i][predicted[i].item()].item()
                arr_exp = [outputs_exp.data[i][0].item(),outputs_exp.data[i][1].item()]
                arr_exp = softmax(arr_exp)
                r_score = r_score - arr_exp[0]
                r = 0
                if r_score >= 0:
                    r = 1
                prediction = 0
                if r==0:
                    prediction = predicted[i]
                if r==1:
                    prediction = data.expertlabel[i].item()
                    
                if  data.group[i].item() == 0:
                    if data.label[i].item() == 2:
                        group_0_counts += 1
                        if prediction == 1 or prediction ==0:
                            group_0 += 1
                else:
                    if data.label[i].item() == 2:
                        group_1_counts += 1
                        if prediction == 1 or prediction ==0:
                            group_1 += 1
    print(group_1_counts)
    print(group_0_counts)

    to_print={"group0":group_0/(group_0_counts+0.0001), "group1": group_1/(group_1_counts+0.0001), "discrimination":group_0/(group_0_counts+0.0001)- group_1/(group_1_counts+0.0001) }
    print(to_print)
    return [group_0/(group_0_counts+0.0001), group_1/(group_1_counts+0.0001), abs(group_0/(group_0_counts+0.0001)- group_1/(group_1_counts+0.0001))]


In [None]:
metrics_print_confid(model_class, model_expert,test_iterator)

In [None]:
print(metrics_print_confid_fairness(model_class, model_expert,test_iterator))

# Oracle Baseline

In [None]:
def metrics_print_oracle(net_class, loader):
    # prints classification metrics for Oracle baseline
    correct = 0
    correct_sys = 0
    exp = 0
    exp_total = 0
    total = 0
    real_total = 0
    alone_correct = 0
    with torch.no_grad():
        for data in loader:
            outputs_class = net_class(data.text)
            _, predicted = torch.max(outputs_class.data, 1)
            batch_size = outputs_class.size()[0]            # batch_size
            for i in range(0,batch_size):
                r = 0
                arr = [outputs_class.data[i][0].item(),outputs_class.data[i][1].item(),outputs_class.data[i][2].item()]
                arr = softmax(arr)
                #r = (data.group[i].item() == 0)
                if data.group[i].item() == 0:
                    if np.max(arr) <= 0.90:
                        r = 1
                else:
                    if np.max(arr) <= 0.75:
                        r = 1
                if r==0:
                    total += 1
                    correct += (predicted[i] == data.label[i]).item()
                    correct_sys += (predicted[i] == data.label[i]).item()
                if r==1:
                    exp +=  data.expert[i].item()
                    correct_sys +=  data.expert[i].item()
                    exp_total+=1
                real_total += 1
    cov = str(total) + str(" out of") + str(real_total)
    to_print={"coverage":cov, "system accuracy": 100*correct_sys/real_total, "expert accuracy":100* exp/(exp_total+0.0002),"classifier accuracy":100*correct/(total+0.0001), "alone classifier": 100*alone_correct/real_total }
    print(to_print)
    return [100*total/real_total,  100*correct_sys/real_total, 100* exp/(exp_total+0.0002),100*correct/(total+0.0001) ]

def metrics_print_oracle_fairness(net_class, loader):
    net_class.eval()
    group_1 = 0
    group_1_counts = 0
    group_0 = 0
    group_0_counts = 0
    with torch.no_grad():
        for data in loader:
            outputs_class = net_class(data.text)
            _, predicted = torch.max(outputs_class.data, 1)
            batch_size = outputs_class.size()[0]            # batch_size
            for i in range(0,batch_size):
                r = 0
                arr = [outputs_class.data[i][0].item(),outputs_class.data[i][1].item(),outputs_class.data[i][2].item()]
                arr = softmax(arr)
                #r = (data.group[i].item() == 0)
                if data.group[i].item() == 0:
                    if np.max(arr) <= 0.90:
                        r = 1
                else:
                    if np.max(arr) <= 0.75:
                        r = 1                
                if r==0:
                    prediction = predicted[i]
                if r==1:
                    prediction = data.expertlabel[i].item()
                    
                if  data.group[i].item() == 0:
                    if data.label[i].item() == 2:
                        group_0_counts += 1
                        if prediction == 1 or prediction ==0:
                            group_0 += 1
                else:
                    if data.label[i].item() == 2:
                        group_1_counts += 1
                        if prediction == 1 or prediction ==0:
                            group_1 += 1

    to_print={"group0":group_0/(group_0_counts+0.0001), "group1": group_1/(group_1_counts+0.0001), "discrimination":group_0/(group_0_counts+0.0001)- group_1/(group_1_counts+0.0001) }
    return [group_0/(group_0_counts+0.0001), group_1/(group_1_counts+0.0001), abs(group_0/(group_0_counts+0.0001)- group_1/(group_1_counts+0.0001))]


In [None]:
def metrics_print_classifier(net_class, loader):
    # print classification metrics of the classifier alone on all the dataset
    correct = 0
    correct_sys = 0
    exp = 0
    exp_total = 0
    total = 0
    real_total = 0
    alone_correct = 0
    with torch.no_grad():
        for data in loader:
            outputs_class = net_class(data.text)
            _, predicted = torch.max(outputs_class.data, 1)
            batch_size = outputs_class.size()[0]            # batch_size
            for i in range(0,batch_size):
                total += 1
                correct += (predicted[i] == data.label[i]).item()
                correct_sys += (predicted[i] == data.label[i]).item()
                real_total += 1
    cov = str(total) + str(" out of") + str(real_total)
    to_print={"coverage":cov, "system accuracy": 100*correct_sys/real_total, "expert accuracy":100* exp/(exp_total+0.0002),"classifier accuracy":100*correct/(total+0.0001), "alone classifier": 100*alone_correct/real_total }
    print(to_print)
    return [100*total/real_total,  100*correct_sys/real_total, 100* exp/(exp_total+0.0002),100*correct/(total+0.0001) ]


def metrics_print_classifier_fairness(net_class, loader):
    # print fairness metrics of the classifier alone on all the dataset
    net_class.eval()
    group_1 = 0
    group_1_counts = 0
    group_0 = 0
    group_0_counts = 0
    with torch.no_grad():
        for data in loader:
            outputs_class = net_class(data.text)
            _, predicted = torch.max(outputs_class.data, 1)
            batch_size = outputs_class.size()[0]            # batch_size
            for i in range(0,batch_size):
                prediction = predicted[i]
                if  data.group[i].item() == 0:
                    if data.label[i].item() == 2:
                        group_0_counts += 1
                        if prediction == 1 or prediction ==0:
                            group_0 += 1
                else:
                    if data.label[i].item() == 2:
                        group_1_counts += 1
                        if prediction == 1 or prediction ==0:
                            group_1 += 1

    to_print={"group0":group_0/(group_0_counts+0.0001), "group1": group_1/(group_1_counts+0.0001), "discrimination":group_0/(group_0_counts+0.0001)- group_1/(group_1_counts+0.0001) }
    print(to_print)
    return [group_0/(group_0_counts+0.0001), group_1/(group_1_counts+0.0001), abs(group_0/(group_0_counts+0.0001)- group_1/(group_1_counts+0.0001))]


In [None]:

def metrics_print_expert_fairness( loader):
    # print fairness metrics of the expert on all the dataset
    group_1 = 0
    group_1_counts = 0
    group_0 = 0
    group_0_counts = 0
    with torch.no_grad():
        for data in loader:
            batch_size =len(data)            # batch_size
            for i in range(0,batch_size):
                prediction = data.expertlabel[i].item()
                    
                if  data.group[i].item() == 0:
                    if data.label[i].item() == 2:
                        group_0_counts += 1
                        if prediction == 1 or prediction ==0:
                            group_0 += 1
                else:
                    if data.label[i].item() == 2:
                        group_1_counts += 1
                        if prediction == 1 or prediction ==0:
                            group_1 += 1

    to_print={"group0":group_0/(group_0_counts+0.0001), "group1": group_1/(group_1_counts+0.0001), "discrimination":group_0/(group_0_counts+0.0001)- group_1/(group_1_counts+0.0001) }
    print(to_print)
    return [group_0/(group_0_counts+0.0001), group_1/(group_1_counts+0.0001), abs(group_0/(group_0_counts+0.0001)- group_1/(group_1_counts+0.0001))]


In [None]:
metrics_print_expert_fairness( test_iterator)

In [None]:
metrics_print_classifier_fairness(model_class, test_iterator)

In [None]:
metrics_print_oracle(model_class,test_iterator)

In [None]:
metrics_print_oracle_fairness(model_class,test_iterator)

# Experiment: repeat data building for each expert type
the following repeats the above code many times to obtain error bars

In [None]:
import numpy as np
# build expert data
all_data = train_data_orig[0]
p = 0.75 # expert probability of being correct for AA tweeet
q = 0.9 # expert probability of being correct for AA tweeet
sum = 0
total = 0
i = 0
aa_frac = 0
for example in all_data:
    lang = predict_lang(vars(example)['text'])
    aa = 0
    try:
        if lang[0] >= 0.5:
            aa = 1
    except:
        print(vars(example)['text'])
    label = vars(example)['label']
    exp = 0 # 0: expert wrong, 1: expert is right
    exp_label = 0
    if aa == 1: # if tweet is african american
        #if label == '2':
        #    exp = 0 # never predict neither
        #else:
        coin = np.random.binomial(1,p) 
        if coin:
            exp =1 # is right 90% of time
            exp_label = np.long(label)
        else:
            exp_label = np.long(np.argmax(np.random.multinomial(1,[1/3]*3 , size=1)))
            exp =0
    else:
        coin = np.random.binomial(1,q)
        if coin:
            exp =1 # is right 90% of time
            exp_label = np.long(label)
        else:
            exp_label = np.long(np.argmax(np.random.multinomial(1,[1/3]*3 , size=1)))
            exp =0
    #if label =='2' : # 2: neither, 1: offensive, 0: hate speech
    #    aa = 1
    vars(all_data[i])['expertlabel'] = exp_label
    vars(all_data[i])['group'] = str(aa)
    vars(all_data[i])['expert'] = exp
    aa_frac += aa
    i += 1
    total +=1
    sum += exp



In [None]:
train_data, test_data, valid_data  = all_data.split(split_ratio=[0.7,0.2,0.1])

BATCH_SIZE = 64

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, valid_data, test_data), 
    sort = False,
    batch_size = BATCH_SIZE, 
    device = device)

In [None]:
# needs to be changed for each expert model
def metrics_print_oracle(net_class, loader):
    correct = 0
    correct_sys = 0
    exp = 0
    exp_total = 0
    total = 0
    real_total = 0
    alone_correct = 0
    with torch.no_grad():
        for data in loader:
            outputs_class = net_class(data.text)
            _, predicted = torch.max(outputs_class.data, 1)
            batch_size = outputs_class.size()[0]            # batch_size
            for i in range(0,batch_size):
                r = 0
                arr = [outputs_class.data[i][0].item(),outputs_class.data[i][1].item(),outputs_class.data[i][2].item()]
                arr = softmax(arr)
                #r = (data.group[i].item() == 0)
                if data.group[i].item() == 0:
                    if np.max(arr) <= q:
                        r = 1
                else:
                    if np.max(arr) <= p:
                        r = 1
                if r==0:
                    total += 1
                    correct += (predicted[i] == data.label[i]).item()
                    correct_sys += (predicted[i] == data.label[i]).item()
                if r==1:
                    exp +=  data.expert[i].item()
                    correct_sys +=  data.expert[i].item()
                    exp_total+=1
                real_total += 1
    cov = str(total) + str(" out of") + str(real_total)
    to_print={"coverage":cov, "system accuracy": 100*correct_sys/real_total, "expert accuracy":100* exp/(exp_total+0.0002),"classifier accuracy":100*correct/(total+0.0001), "alone classifier": 100*alone_correct/real_total }
    return [100*total/real_total,  100*correct_sys/real_total, 100* exp/(exp_total+0.0002),100*correct/(total+0.0001) ]

def metrics_print_oracle_fairness(net_class, loader):
    net_class.eval()
    group_1 = 0
    group_1_counts = 0
    group_0 = 0
    group_0_counts = 0
    with torch.no_grad():
        for data in loader:
            outputs_class = net_class(data.text)
            _, predicted = torch.max(outputs_class.data, 1)
            batch_size = outputs_class.size()[0]            # batch_size
            for i in range(0,batch_size):
                r = 0
                arr = [outputs_class.data[i][0].item(),outputs_class.data[i][1].item(),outputs_class.data[i][2].item()]
                arr = softmax(arr)
                #r = (data.group[i].item() == 0)
                if data.group[i].item() == 0:
                    if np.max(arr) <= q:
                        r = 1
                else:
                    if np.max(arr) <= p:
                        r = 1                
                
                if r==0:
                    prediction = predicted[i]
                if r==1:
                    prediction = data.expertlabel[i].item()
                    
                if  data.group[i].item() == 0:
                    if data.label[i].item() == 2:
                        group_0_counts += 1
                        if prediction == 1 or prediction ==0:
                            group_0 += 1
                else:
                    if data.label[i].item() == 2:
                        group_1_counts += 1
                        if prediction == 1 or prediction ==0:
                            group_1 += 1

    to_print={"group0":group_0/(group_0_counts+0.0001), "group1": group_1/(group_1_counts+0.0001), "discrimination":group_0/(group_0_counts+0.0001)- group_1/(group_1_counts+0.0001) }
    return [group_0/(group_0_counts+0.0001), group_1/(group_1_counts+0.0001), abs(group_0/(group_0_counts+0.0001)- group_1/(group_1_counts+0.0001))]


In [None]:
exp_conf = []
exp_rej = []
exp_ora = []
exp_conf_fairness = []
exp_rej_fairness = []
exp_ora_fairness = []
max_trials = 1
for exp in range(0,max_trials):
    train_data, test_data, valid_data  = all_data.split(split_ratio=[0.6,0.1,0.3])

    BATCH_SIZE = 64

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
        (train_data, valid_data, test_data), 
        sort = False,
        batch_size = BATCH_SIZE, 
        device = device)
    ##################################################################################################
    ##################################################################################################
    # baseline confidence
    ##################################################################################################
    INPUT_DIM = len(TEXT.vocab)
    EMBEDDING_DIM = 100
    N_FILTERS = 300
    FILTER_SIZES = [3,4,5]
    OUTPUT_DIM = 2
    DROPOUT = 0.5
    PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

    model_expert = CNN_(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, 2, DROPOUT, PAD_IDX)
    pretrained_embeddings = TEXT.vocab.vectors

    model_expert.embedding.weight.data.copy_(pretrained_embeddings)
    UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]

    model_expert.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
    model_expert.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)
    optimizer = optim.Adam(model_expert.parameters())
    criterion = nn.CrossEntropyLoss()

    model_expert = model_expert.to(device)
    criterion = criterion.to(device)
    N_EPOCHS = 5
    for epoch in range(N_EPOCHS):

        start_time = time.time()

        train_loss, train_acc = train_expert(model_expert, train_iterator, optimizer, criterion)
        #valid_loss, valid_acc = evaluate_expert(model_expert, valid_iterator, criterion)

        end_time = time.time()

        epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    # classifier
    INPUT_DIM = len(TEXT.vocab)
    EMBEDDING_DIM = 100
    N_FILTERS = 300
    FILTER_SIZES = [3,4,5]
    OUTPUT_DIM = 3
    DROPOUT = 0.5
    PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

    model_class = CNN_(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, PAD_IDX)
    pretrained_embeddings = TEXT.vocab.vectors

    model_class.embedding.weight.data.copy_(pretrained_embeddings)
    UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]

    model_class.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
    model_class.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)

    optimizer = optim.Adam(model_class.parameters())
    criterion = nn.CrossEntropyLoss()

    model_class = model_class.to(device)
    criterion = criterion.to(device)
    N_EPOCHS = 5
    for epoch in range(N_EPOCHS):

        start_time = time.time()

        train_loss, train_acc = train(model_class, train_iterator, optimizer, criterion)
        #valid_loss, valid_acc = evaluate(model_class, valid_iterator, criterion)

        end_time = time.time()

        epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    ####################################
    print("Baseline")
    
    conf = metrics_print_confid(model_class, model_expert,test_iterator)
    exp_conf.append(conf)
    conf = metrics_print_confid_fairness(model_class, model_expert,test_iterator)
    exp_conf_fairness.append(conf)
    ##################################################################################################
    # my method 
    ##################################################################################################
    INPUT_DIM = len(TEXT.vocab)
    EMBEDDING_DIM = 100
    N_FILTERS = 1000
    FILTER_SIZES = [3,4,5]
    OUTPUT_DIM = 4
    DROPOUT = 0.5
    PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

    model = CNN(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, 4, DROPOUT, PAD_IDX)

    pretrained_embeddings = TEXT.vocab.vectors

    model.embedding.weight.data.copy_(pretrained_embeddings)
    UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]

    model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
    model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)
    optimizer = optim.Adam(model.parameters())
    criterion = nn.CrossEntropyLoss()

    model = model.to(device)

    N_EPOCHS = 15

    best_valid_loss = 0
    best_model = None
    for epoch in range(N_EPOCHS):
        start_time = time.time()
        train_loss, train_acc = train_reject(model, train_iterator, optimizer, 1)
        #train_loss, train_acc = train_reject_bla(model, train_iterator, optimizer)

        #valid_loss, valid_acc = evaluate_reject(model, valid_iterator)
        valid_loss = metrics_print(model,valid_iterator)[1]

        end_time = time.time()

        epoch_mins, epoch_secs = epoch_time(start_time, end_time)

        if valid_loss >= best_valid_loss:
            best_valid_loss = valid_loss
            best_model = copy.deepcopy(model)
    
    print("Our method")
    rej = metrics_print(best_model, test_iterator)
    exp_rej.append(rej)
    print(rej)
    rej = metrics_print_fairness(best_model, test_iterator)
    exp_rej_fairness.append(rej)
    ##############################################################################################
    # ORACLE
    ora = metrics_print_oracle(model_class, test_iterator)
    print(ora)
    exp_ora.append(ora)
    ora = metrics_print_oracle_fairness(model_class, test_iterator)
    exp_ora_fairness.append(ora)


# Confidence metrics

In [None]:
import numpy as np, scipy.stats as st

In [None]:
metrics_class = ["coverage", "system accuracy", "expert accuracy", "classifier accuracy"]
metrics_fairness = ["FPR for group 0", "FPR for group 1", "discrimination"]

In [None]:
print("Results for Confidence Baseline")
for i in range(0,4):
    print("----")
    print("For " + metrics_class[i])
    arr = [0] * max_trials
    for j in range(0,max_trials):
        arr[j] = exp_conf[j][i]
    print("average: " +str(np.average(arr)))
    print("std: " + str(np.std(arr)))
    print("95 confidence interval: " + str(st.t.interval(0.95, len(arr)-1, loc=np.mean(arr), scale=st.sem(arr))))
print("#############################")

for i in range(0,3):
    print("-----")
    print("For " + metrics_fairness[i])
    arr = [0] * max_trials
    for j in range(0,max_trials):
        arr[j] = exp_conf_fairness[j][i]
    print("average: " +str(np.average(arr)))
    print("std: " + str(np.std(arr)))
    print("95 confidence interval: " + str(st.t.interval(0.95, len(arr)-1, loc=np.mean(arr), scale=st.sem(arr))))


# Oracle baseline

In [None]:
print("Results for Oracle Baseline")
for i in range(0,4):
    print("----")
    print("For " + metrics_class[i])
    arr = [0] * max_trials
    for j in range(0,max_trials):
        arr[j] = exp_ora[j][i]
    print("average: " +str(np.average(arr)))
    print("std: " + str(np.std(arr)))
    print("95 confidence interval: " + str(st.t.interval(0.95, len(arr)-1, loc=np.mean(arr), scale=st.sem(arr))))
print("#############################")

for i in range(0,3):
    print("-----")
    print("For " + metrics_fairness[i])
    arr = [0] * max_trials
    for j in range(0,max_trials):
        arr[j] = exp_ora_fairness[j][i]
    print("average: " +str(np.average(arr)))
    print("std: " + str(np.std(arr)))
    print("95 confidence interval: " + str(st.t.interval(0.95, len(arr)-1, loc=np.mean(arr), scale=st.sem(arr))))


# Our method

In [None]:
print("Results for our method L_{CE}")

for i in range(0,4):
    print("----")
    print("For " + metrics_class[i])
    arr = [0] * max_trials
    for j in range(0,max_trials):
        arr[j] = exp_rej[j][i]
    print("average: " +str(np.average(arr)))
    print("std: " + str(np.std(arr)))
    print("95 confidence interval: " + str(st.t.interval(0.95, len(arr)-1, loc=np.mean(arr), scale=st.sem(arr))))
print("#############################")

for i in range(0,3):
    print("-----")
    print("For " + metrics_fairness[i])
    arr = [0] * max_trials
    for j in range(0,max_trials):
        arr[j] = exp_rej_fairness[j][i]
    print("average: " +str(np.average(arr)))
    print("std: " + str(np.std(arr)))
    print("95 confidence interval: " + str(st.t.interval(0.95, len(arr)-1, loc=np.mean(arr), scale=st.sem(arr))))
