## Task B

In [2]:
from hate.settings import corpora
train = corpora['train_es']
X_train, y_train = list(train.X()), list(train.y())
dev = corpora['dev_es']
X_dev, y_dev = list(dev.X()), list(dev.y())

In [9]:
dev.entries[0]

OrderedDict([('id', '20005'),
             ('text', 'Me estoy comiendo la picada árabe más rica de mi vida'),
             ('HS', '0'),
             ('TR', '0'),
             ('AG', '0')])

## Official evaluation

In [23]:
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
import numpy as np
import pandas as pd
def evaluate_b(corpus, preds):
    levels = ["HS", "TargetRange", "Aggressiveness"]
    
    gold = '/users/jsanchez/francolq/HATEVAL/A/public_development_es/dev_es.tsv'
    ground_truth = pd.read_csv(gold, sep="\t", names=["ID", "Tweet-text", "HS", "TargetRange", "Aggressiveness"],
                               #skiprows=check_file(gold, 5),
                               skiprows=1,
                               converters={0: str, 1: str, 2: int, 3: int, 4: int}, header=None)

    #predicted = pd.read_csv(pred, sep="\t", names=["ID"] + levels , skiprows=check_file(pred, 4),
    #                        converters={0: str, 1: int, 2: int, 3: int}, header=None)
    ids = [e['id'] for e in corpus.entries]
    hs = [int(p[0]) for p in preds]
    tr = [int(p[1]) for p in preds]
    ag = [int(p[2]) for p in preds]
    predicted = pd.DataFrame({
        'ID': ids,
        'HS': hs,
        'TargetRange': tr,
        'Aggressiveness': ag,
    })

    # Check length files
    if (len(ground_truth) != len(predicted)):
        sys.exit('Prediction and gold data have different number of lines.')

    # Check predicted classes
    for c in levels:
        gt_class = list(ground_truth[c].value_counts().keys())
        if not (predicted[c].isin(gt_class).all()):
            sys.exit("Wrong value in " + c + " prediction column.")

    data = pd.merge(ground_truth, predicted, on="ID")

    if (len(ground_truth) != len(data)):
        sys.exit('Invalid tweet IDs in prediction.')

    # Compute Performance Measures
    acc_levels = dict.fromkeys(levels)
    p_levels = dict.fromkeys(levels)
    r_levels = dict.fromkeys(levels)
    f1_levels = dict.fromkeys(levels)
    for l in levels:
        acc_levels[l] = accuracy_score(data[l + "_x"], data[l + "_y"])
        p_levels[l], r_levels[l], f1_levels[l], _ = precision_recall_fscore_support(data[l + "_x"], data[l + "_y"], average="macro")
    macro_f1 = np.mean(list(f1_levels.values()))

    # Compute Exact Match Ratio
    check_emr = np.ones(len(data), dtype=bool)
    for l in levels:
        check_label = data[l + "_x"] == data[l + "_y"]
        check_emr = check_emr & check_label
    emr = sum(check_emr) / len(data)

    return macro_f1, emr, acc_levels, p_levels, r_levels, f1_levels


## Basic Statistics

In [11]:
import pandas as pd
pd.options.display.max_colwidth = 0

df = pd.DataFrame(train.entries)

HS=0 implies TR=0 and AG=0

In [12]:
es = train.entries
assert [e for e in es if e['HS'] == '0' and e['TR'] == '1'] == []
assert [e for e in es if e['HS'] == '0' and e['AG'] == '1'] == []

Different combinations:

In [13]:
from collections import Counter
print(Counter([e['HS']+e['TR']+e['AG'] for e in train.entries]))
print(Counter([e['HS']+e['TR']+e['AG'] for e in dev.entries]))

Counter({'000': 2643, '111': 1053, '101': 449, '100': 279, '110': 76})
Counter({'000': 278, '111': 127, '101': 49, '100': 36, '110': 10})


## 1st Approach

Joint multiclass classification.

In [14]:
y_train = [e['HS']+e['TR']+e['AG'] for e in train.entries]
y_dev = [e['HS']+e['TR']+e['AG'] for e in dev.entries]

In [15]:
from hate.classifier import HateClassifier
filename = '/users/jsanchez/francolq/hatEval/tass2018/SENTIMENT_UBAv2_50_2.bin'
clf = HateClassifier(
    lang='es',
    bow=True,
    bow_params={
        'binary': True,
        'ngram_range': (1, 2),
    },
    boc=True,
    boc_params={
        'analyzer': 'char',
        'binary': True,
        'ngram_range': (1, 5),
    },
    #emb='fasttext',
    emb='wfasttext',
    emb_params={
        'file': filename,
    },
    clf='maxent'
)
clf.fit(X_train, y_train)
clf.eval(X_dev, y_dev)

  re.IGNORECASE | re.VERBOSE)
  re.VERBOSE | re.IGNORECASE)
  UrlMatch_re = re.compile(UrlMatch_expression, re.VERBOSE | re.IGNORECASE)
  EmailMatch_re = re.compile(EmailMatch_expression, re.VERBOSE | re.IGNORECASE)


accuracy	0.75

              precision    recall  f1-score   support

         000       0.82      0.85      0.83       278
         100       0.39      0.19      0.26        36
         101       0.51      0.57      0.54        49
         110       0.00      0.00      0.00        10
         111       0.76      0.83      0.79       127

   micro avg       0.75      0.75      0.75       500
   macro avg       0.49      0.49      0.48       500
weighted avg       0.73      0.75      0.74       500

[[235   7  12   0  24]
 [ 15   7  14   0   0]
 [ 13   4  28   0   4]
 [  4   0   0   0   6]
 [ 20   0   1   0 106]]


  'precision', 'predicted', average, warn_for)


In [25]:
y_pred = clf.predict(X_dev)
evaluate_b(dev, y_pred)
# macro_f1, emr, acc_levels, p_levels, r_levels, f1_levels

(0.8250497668428715,
 0.752,
 {'HS': 0.81, 'TargetRange': 0.894, 'Aggressiveness': 0.814},
 {'HS': 0.8084686983690763,
  'TargetRange': 0.8652777777777778,
  'Aggressiveness': 0.7957545187053383},
 {'HS': 0.8055447533864799,
  'TargetRange': 0.8701916309746436,
  'Aggressiveness': 0.8084666105499438},
 {'HS': 0.8067338012409724,
  'TargetRange': 0.8676792945558768,
  'Aggressiveness': 0.8007362047317653})

'110' label is doing really bad, check it:

In [27]:
df = pd.DataFrame({'item':X_dev, 'label': y_dev})
df[df['label'] == '110']

Unnamed: 0,item,label
13,Ea pues ya solo falta que Albert Rivera vaya a Algeciras a abrazarte con los inmigrantes subsaharianos,110
167,"Pobre JP no se merecia ser traicionado de esa forma tan ruin y sucia, esa zorra que iba de enamorada pero luego lo vota traicio... — Pero si Ashley no iba de enamorada, JP era un perrito faldero de Chrissy así que nose donde ves q... https://t.co/WjlNyOf1HE",110
168,"@ferdiazgil @ilseCeroUno01 @Compotita Exacto. Guarra y egoísta, que el que venga detrás se busque la vida.",110
212,"@Namurii @medicaster_xz @Yorha2d @KeloKlau_ Se merecía de puta y no quiere que le metan la verga, jajaja no mamen",110
229,Mi novia es como mi polla. Sé que está ahí porque de vez en cuando se levanta. Una de mis pantalones y la otra de la cama.,110
376,"- El otro día me comí a Laura es muy fácil que guarra jajajaja - Laura también te comió a ti entonces tú eres igual de guarro e igual de fácil. - Puto feminazi putas modas eres feminista solo para ligar bien queda gilipollas - em, ok",110
399,@CaccoL Tú eres más puta jijijiji,110
449,Yo nunca le he dicho puta a una mujer pero socia tú eres senda petardo 😂😂 estas navidades no voy a comprar pirotecnias ya contigo vasta 😂😂😂,110
453,Somos animales: tú eres una perra y yo la ladilla que no se quiere despegar de esa totona.,110
494,"Hoy, 20hrs, haremos un twittazo en contra Rodolfo Noriega, quien, pese a ser un incitador al odio e inmigrante, sigue en Chile. El hashtag, será #FueraRodolfoNoriega. https://t.co/1AvN3N7AvJ",110


## Test and Submit

In [28]:
from hate.settings import corpora
test = corpora['test_es']
X_test = list(test.X())

In [29]:
y_pred = clf.predict(X_test)

In [30]:
set(y_pred)

{'000', '100', '101', '110', '111'}

In [31]:
ids = [e['id'] for e in test.entries]

In [32]:
# write submission
f = open('es_b.tsv', 'w')
for id, y in zip(ids, y_pred):
    f.write('{}\t{}\t{}\t{}\n'.format(id, y[0], y[1], y[2]))
f.close()

In [33]:
from zipfile import ZipFile
with ZipFile('es_b.zip', 'w') as myzip:
    myzip.write('es_b.tsv')

## 2nd Approach

Independent classification.