In [96]:
import pandas as pd
import glob
import numpy as np

In [97]:
def clean_text(text):
    # Handle common misinterpretations from double encoding
    replacements = {
        'Ã¡': 'á', 'Ã©': 'é', 'Ã\xad': 'í', 'Ã³': 'ó', 'Ãº': 'ú',
        'Ã£': 'ã', 'Ãµ': 'õ', 'Ã¢': 'â', 'Ãª': 'ê', 'Ã´': 'ô',
        'Ã§': 'ç', 'Ã ': 'à', 'Ãš': 'Ú', 'Ã\x81': 'Á', 'Ã‰': 'É',
        'Ã\x8d': 'Í', 'Ã“': 'Ó', 'Ãš': 'Ú', 'Ã“': 'Ó'
    }
    for wrong, right in replacements.items():
        text = text.replace(wrong, right)
    return text

def read_files(file, model, method):
    with open(file, 'r', encoding="latin1") as f:
        lines = f.readlines()
        for l in lines:
            if l.strip():  # This checks that the line is not empty or just a newline
                # Decode and correct the encoding issues before evaluation
                corrected_line = l.encode('latin1').decode('utf-8', errors='replace')
                corrected_line = clean_text(corrected_line)
                
                # Use eval to convert string to list of tuples
                data = list(eval(corrected_line))
                
                data.sort(key=lambda tup: tup[1], reverse=True)
                pred[model][method].append([i[0].replace(' ', '') for i in data if i[1] > 0])

In [98]:
pred = {}

for model in ['bertimbau', 'distilbertimbau', 'ptt5', 'mbert']:
    pred[model] = {}
    for method in ['lime', 'shap']:
        pred[model][method] = []
        paths = []
        for name in glob.glob('../results/'+model+'_'+method+'*'):
            paths.append(name)

        for filename in sorted(paths):    
            read_files(filename, model, method)

In [99]:
df = pd.read_csv('../data/hatebr_and_rationales.csv', \
                index_col=0, \
                converters={"rationales_offensive_1_normalized": \
                lambda x: x.strip('[]').replace("'", "").split(", "),
                "rationales_offensive_2_normalized": \
                lambda x: x.strip('[]').replace("'", "").split(", ")})

In [100]:
## split to train and val
TRAIN_SIZE = 0.8
TEST_SIZE = 0.1
VAL_SIZE = 0.1
from sklearn.model_selection import train_test_split


x_train, x_test_val, y_train, y_test_val = train_test_split(df['normalized_text'], df['label final'], test_size=TEST_SIZE + VAL_SIZE, random_state=0)
x_test, x_val, y_test, y_val = train_test_split(x_test_val, y_test_val, test_size=VAL_SIZE/(TEST_SIZE + VAL_SIZE), random_state=0)

In [101]:
instances = np.where(y_train == 1)[0][:353]

to_remove_index = [245, 276, 328]
# instances = instances.tolist()
# for i in to_remove_index:
#     instances.pop(i)

In [102]:
def partial_match_score(a, b, threshold=0.5):
        
    intersection= list(set(a) & set(b))
    union = list(set().union(a, b))
    iou = len(intersection)/len(union)
    return iou >= threshold
    
    
def iou_f1_score(annot1, annot2, pred, model, method):
    
    threshold_tps = 0
    count=0
    count2 = 0
    for e, (r1, r2, p) in enumerate(zip(annot1, annot2, pred[model][method])):
        
        if e in to_remove_index:
            continue
        
        a = ' '.join(r1).split(' ')
        b = ' '.join(r2).split(' ')
        
        if partial_match_score(a, p) or partial_match_score(b, p):
            if partial_match_score(a, p):
                count+=1
            if partial_match_score(b, p):
                count2+=1
            threshold_tps += 1
        
    precision = threshold_tps/350
    recall = threshold_tps/350
    f1_score = (2*((recall*precision)/(recall+precision)))
    
#     print('IOU F1-score model '+model+' method '+method+' token-level: ', f1_score)
    print('partial match isa:', count)
    print('partial match fran:', count2)
    return f1_score


def compute_f1(a, b):
    p = len(list(set(a) & set(b)))/len(a) if len(a) > 0 else 0
    r = len(list(set(a) & set(b)))/len(b) if len(b) > 0 else 0
    f1 = 2*((r*p)/(r+p)) if r+p > 0 else 0
    return p, r, f1

def f1_score_token_level(annot1, annot2, pred, model, method):
    f1_score = []
    recs = []
    precs = []
    count = 0
    for e, (r1, r2, p) in enumerate(zip(annot1, annot2, pred[model][method])):
        
        if e in to_remove_index:
            continue
        
        a = ' '.join(r1).split(' ')
        b = ' '.join(r2).split(' ')
        
        
        prec1, rec1, f11 = compute_f1(p, a)
        prec2, rec2, f12 = compute_f1(p, b)
        
        if f12 > f11:
            count += 1
            f1_score.append(f12)
            recs.append(rec2)
            precs.append(prec2)
        else:
            f1_score.append(f11)
            recs.append(rec1)
            precs.append(prec1)

    
#     print('Precision model '+model+' method '+method+' token-level: ', sum(precs)/len(annot1))
#     print('Recall model '+model+' method '+method+' token-level: ', sum(recs)/len(annot1))
#     print('F1-score model '+model+' method '+method+' token-level: ', sum(f1_score)/len(annot1))
#     print(count)
    return np.mean(f1_score), np.mean(precs), np.mean(recs)

In [103]:
df_instances = x_train.iloc[instances].to_frame().join(df.set_index('normalized_text'), on='normalized_text')

In [104]:
results = {}
results['Method'] = []
results['IOU_F1'] = []
results['Token-level Precision'] = []
results['Token-level Recall'] = []
results['Token-level F1'] = []


for model in ['mbert', 'bertimbau', 'distilbertimbau', 'ptt5']:
    for method in ['lime', 'shap']:

        results['Method'].append(model+'_'+method)
        results['IOU_F1'].append(iou_f1_score(df_instances['rationales_offensive_1_normalized'], \
            df_instances['rationales_offensive_2_normalized'], 
           pred, model, method))

        f1, prec, rec = f1_score_token_level(df_instances['rationales_offensive_1_normalized'], \
            df_instances['rationales_offensive_2_normalized'], 
           pred, model, method)
        results['Token-level Precision'].append(prec)
        results['Token-level Recall'].append(rec)
        results['Token-level F1'].append(f1)

partial match isa: 175
partial match fran: 155
partial match isa: 214
partial match fran: 167
partial match isa: 189
partial match fran: 165
partial match isa: 210
partial match fran: 171
partial match isa: 203
partial match fran: 172
partial match isa: 197
partial match fran: 164
partial match isa: 189
partial match fran: 162
partial match isa: 246
partial match fran: 184


In [105]:
pd.DataFrame(results)

Unnamed: 0,Method,IOU_F1,Token-level Precision,Token-level Recall,Token-level F1
0,mbert_lime,0.582857,0.745764,0.693633,0.670131
1,mbert_shap,0.662857,0.714349,0.751974,0.689699
2,bertimbau_lime,0.585714,0.755755,0.684833,0.669851
3,bertimbau_shap,0.66,0.748937,0.709888,0.683152
4,distilbertimbau_lime,0.645714,0.761399,0.727633,0.700331
5,distilbertimbau_shap,0.62,0.754315,0.686219,0.671966
6,ptt5_lime,0.605714,0.748744,0.697806,0.677612
7,ptt5_shap,0.74,0.71775,0.837838,0.736218


In [61]:
pd.DataFrame(results)

Unnamed: 0,Method,IOU_F1,Token-level Precision,Token-level Recall,Token-level F1
0,bertimbau_lime,0.585714,0.755398,0.684847,0.669823
1,bertimbau_shap,0.657143,0.748548,0.709884,0.682974
2,ptt5_lime,0.605714,0.748172,0.697772,0.677533
3,ptt5_shap,0.74,0.71742,0.837881,0.736063
4,distilbertimbau_lime,0.645714,0.761399,0.727633,0.700331
5,distilbertimbau_shap,0.617143,0.753886,0.686166,0.671761


In [83]:
pred['bertimbau']['shap']

[['esquerda',
  'de',
  'nem',
  'sato',
  'para',
  'uma',
  'uma',
  'não',
  'não',
  'qualquer',
  'dar',
  'uma',
  'para',
  'que',
  'tem'],
 ['feia', 'coisa', 'casa', 'vai'],
 ['porcaria', 'acabar', 'vão'],
 ['satanás', 'vai', 'ele'],
 ['meter',
  'nojo',
  'pirralha',
  'calcinha',
  'nem',
  'lavar',
  'ficar',
  'deles',
  'do',
  'esse',
  'uma',
  'o',
  'pra',
  'sabe',
  'o',
  'fundo',
  'usam',
  'eles',
  'tem',
  'en',
  'tanta',
  'pra',
  'dá',
  'pra',
  'querendo',
  'ainda',
  'não',
  'mandar',
  'mundo',
  'se',
  'se'],
 ['globolixo',
  'mentirosas',
  'globolixo',
  'enganados',
  'nem',
  'aposentar',
  'pra',
  'deixar',
  'verdadeiras',
  'se',
  'já',
  'pra',
  'da',
  'está',
  'na',
  'que',
  'anos',
  'deu',
  'jornal',
  'da',
  'jornalismo'],
 ['quatro', 'de', 'é', 'sua', 'posição'],
 ['bilreiro', 'pés', 'macro', 'de', 'não', 'se', 'chega', 'toda'],
 ['trabalha', 'não', 'não', 'esse'],
 ['vagabundo', 'daquele'],
 ['não', 'nada', 'vale', 'aí', 'ess

In [18]:
pred['bertimbau']['lime']

[['leda', 'parabéns', 'um', 'entrevista', 'sato'],
 ['coisa', 'feia', 'sua', 'cuidar', 'casa'],
 ['com', 'país', 'no', 'entrega', 'porcaria', 'e'],
 ['ele', 'aos', 'entregar', 'de'],
 ['brasil', 'que', 'esse', 'do', 'ficar'],
 ['querem', 'deixar', 'globolixo'],
 ['politica', 'posição', 'é', 'de'],
 ['toda', 'de', 'juntar', 'pés', 'a'],
 ['não'],
 ['um'],
 ['esse', 'vale', 'aí', 'nada'],
 ['vão', 'resultados', 'sim', 'respeitar', 'para'],
 ['uma'],
 ['vergonha', 'para', 'profunda', 'dele'],
 ['lodo', 'é', 'porque', 'mesmo'],
 ['política', 'porque', 'ele', 'roubou'],
 ['nojento', 'suja', 'boca'],
 ['ambulante', 'é', 'merda'],
 ['já', 'feda', 'safados'],
 ['ficar', 'calada'],
 ['corrupção', 'trabalham', 'função', 'em', 'vergonha', 'e'],
 [],
 ['em', 'profissão', 'inútil', 'de'],
 ['está', 'bolsonaro', 'demônio', 'esse'],
 ['tempo', 'não', 'saco', 'a', 'mesmo', 'tudo'],
 ['e', 'uma'],
 ['ofender', 'ofensa', 'esquerda'],
 ['do', 'veja', 'corrupção', 'sobre', '14'],
 ['estão', 'do', 'loucos'

In [94]:
for model in ['mbert', 'bertimbau', 'distilbertimbau', 'ptt5']:
    for method in ['lime', 'shap']:
        avg_tokens = 0
        for r in pred[model][method]:
            avg_tokens += len(r)
#         print(avg_tokens)
        print(model, method, avg_tokens/350)

mbert lime 5.311428571428571
mbert shap 8.957142857142857
bertimbau lime 5.317142857142857
bertimbau shap 7.64
distilbertimbau lime 5.5685714285714285
distilbertimbau shap 7.28
ptt5 lime 5.408571428571428
ptt5 shap 10.042857142857143
