In [1]:
import sys
sys.path.append('../')
import pandas as pd
import numpy as np
from wykop_scraper.hatebase import hate_words, curse_words
from sklearn.model_selection import cross_validate, StratifiedKFold
import re

Load the annotations dataset:

In [2]:
df = pd.read_csv('../annotations/data/classification_dataset.csv')
df = df.iloc[:, 1:]

In [3]:
df

Unnamed: 0,comment_id,entry_id,date,text,author_login,vote_count,receiver,hate_word_counts,is_hateful,annotation
0,185386257,52292479,2020-09-23 00:18:39,Ty no kurwa że też ja na to nie wpadłem ale ze...,Cybek-Marian,1,atteint,2,1,0
1,174804569,49417029,2020-05-14 19:45:44,coś ponad 1 a mniej niż 2,wytrzzeszcz,1,Kosciany,0,0,0
2,189533891,53440779,2020-11-09 20:41:58,Motor ma już w garażu,piSSowiec39,3,Zagmadfany2,0,0,0
3,172538589,48804329,2020-04-17 01:00:24,znow robic na tego zlodzieja,ranunculus,3,Graner,0,0,0
4,185485247,52313979,2020-09-24 11:51:11,kurwa człowieku no do kurwy nędzy chociaż wytn...,Cybek-Marian,0,AgentGRU,3,1,1
...,...,...,...,...,...,...,...,...,...,...
5816,186254803,52523329,2020-10-04 01:25:55,ale jak to Nocna a ty w gaciach,Paula_pi,2,Graner,0,0,0
5817,190203499,53620979,2020-11-17 20:48:21,mam ledwo 21 lat ja tam z dorosłością mam niew...,Anty_Chryst,0,SkrytyZolw,0,0,0
5818,188107553,53038579,2020-10-25 15:19:22,tylko niech potem nikogo nie zdziwi że protest...,galicjanin,0,muwieszeptem,1,1,0
5819,174952993,49460679,2020-05-16 17:17:34,a wiesz co jest najgorsze ze jak Michau bedzie...,niezdiagnozowany,0,Gon70,0,0,1


Define the baseline model class:

In [4]:
class BaselineModel:
    def __init__(self, dictionary: list):
        self.dictionary = dictionary
        
    def fit(self, X, y):
        pass
    
    def get_params(self, deep=True):
        return {'dictionary': self.dictionary}
    
    def predict(self, X):
        """X: a list of strings"""
        return np.reshape(np.array([1 if any(word in text for word in self.dictionary) else 0 for text in X]), (-1,))

### HateBase model

In [5]:
baseline = BaselineModel(hate_words)
skf = StratifiedKFold(n_splits=5, random_state=0, shuffle=True)
X, y = df.text.to_numpy(), df.annotation.to_numpy()

Calculate metrics for every split:

In [7]:
results = cross_validate(baseline, X, y, cv=skf, scoring = ['accuracy', 'precision', 'recall', 'f1', 'f1_macro'])
accuracy = results['test_accuracy']
precision = results['test_precision']
recall = results['test_recall']
f1 = results['test_f1']
macro_f1 = results['test_f1_macro']
print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1-Score: {f1}')
print(f'Macro F1-Score: {macro_f1}')

Accuracy: [0.8832618  0.88917526 0.89003436 0.88402062 0.88316151]
Precision: [0.29166667 0.41666667 0.46341463 0.27272727 0.32258065]
Recall: [0.05555556 0.08       0.152      0.048      0.08      ]
F1-Score: [0.09333333 0.13422819 0.22891566 0.08163265 0.12820513]
Macro F1-Score: [0.51547401 0.53751336 0.58485561 0.50986722 0.53279501]


Calculate averaged values:

In [9]:
print(f'Avg accuracy: {np.array(accuracy).mean()}')
print(f'Avg precision: {np.array(precision).mean()}')
print(f'Avg recall: {np.array(recall).mean()}')
print(f'Avg F1-Score: {np.array(f1).mean()}')
print(f'Avg macro F1-Score: {np.array(macro_f1).mean()}')

Avg accuracy: 0.8859307110304855
Avg precision: 0.35341117707364755
Avg recall: 0.08311111111111111
Avg F1-Score: 0.1332629930339503
Avg macro F1-Score: 0.5361010422053678


### HateBase + curse words model

In [10]:
baseline2 = BaselineModel(hate_words + curse_words)

Metrics for the 2nd baseline:

In [11]:
results2 = cross_validate(baseline2, X, y, cv=skf, scoring = ['accuracy', 'precision', 'recall', 'f1', 'f1_macro'])
accuracy2 = results2['test_accuracy']
precision2 = results2['test_precision']
recall2 = results2['test_recall']
f12 = results2['test_f1']
macro_f12 = results2['test_f1_macro']
print(f'Accuracy: {accuracy2}')
print(f'Precision: {precision2}')
print(f'Recall: {recall2}')
print(f'F1-Score: {f12}')
print(f'Macro F1-Score: {macro_f12}')

Accuracy: [0.85493562 0.85738832 0.8685567  0.86082474 0.86340206]
Precision: [0.31623932 0.2970297  0.39230769 0.2967033  0.32291667]
Recall: [0.29365079 0.24       0.408      0.216      0.248     ]
F1-Score: [0.30452675 0.26548673 0.4        0.25       0.28054299]
Macro F1-Score: [0.61177463 0.59325716 0.66309696 0.58664773 0.60254012]


In [12]:
print(f'Avg accuracy: {np.array(accuracy2).mean()}')
print(f'Avg precision: {np.array(precision2).mean()}')
print(f'Avg recall: {np.array(recall2).mean()}')
print(f'Avg F1-Score: {np.array(f12).mean()}')
print(f'Avg macro F1-Score: {np.array(macro_f12).mean()}')

Avg accuracy: 0.8610214887246876
Avg precision: 0.3250393349774538
Avg recall: 0.28113015873015873
Avg F1-Score: 0.3001112922120499
Avg macro F1-Score: 0.6114633207354213
