# Training standard classifiers for misogyny identification task

## Pre-processing steps

In [1]:
#install needed libraries
!pip install nltk emoji==0.6.0
!pip install -U sentence-transformers


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting emoji==0.6.0
  Downloading emoji-0.6.0.tar.gz (51 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.0/51.0 KB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: emoji
  Building wheel for emoji (setup.py) ... [?25l[?25hdone
  Created wheel for emoji: filename=emoji-0.6.0-py3-none-any.whl size=49735 sha256=213ddd3a5e7bf5a842dfc4d361b4bce4e59a78b595373cfdbef607adc1d7ae3d
  Stored in directory: /root/.cache/pip/wheels/43/3d/82/e7baffa5e86346c6178d7750dba6e8ef063282a37fc563f8f8
Successfully built emoji
Installing collected packages: emoji
Successfully installed emoji-0.6.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting sentence-transformers
  Downloading sentence-transformers-2.2.2.tar.gz (85 kB)
[2K     [

In [2]:
#import needed libraries
import pandas as pd
import numpy as np
from TweetNormalize import normalizeTweet

import warnings

from sklearn.feature_extraction.text import TfidfTransformer, CountVectorizer
from sentence_transformers import SentenceTransformer


In [4]:
#import datasets
df_en_train = pd.read_csv("/content/data_sets/corrected_df.csv", sep=',', header=0)
df_en_test = pd.read_csv("/content/data_sets/en_testing_labeled.tsv", sep='\t', header=0)

df_en_train

print(df_en_train.misogyny_category.value_counts())


0                    2254
discredit             982
sexual_harassment     334
stereotype            176
dominance             145
derailing              90
Name: misogyny_category, dtype: int64


In [5]:
#perform a normalization with TweetNormalize library and isolate in a new dataframe only mysogynous tweets
x_train_binary = [normalizeTweet(i) for i in df_en_train['text']]
y_train_binary = [i for i in df_en_train['misogynous']]
x_test_binary = [normalizeTweet(i) for i in df_en_test['text']]
y_test_binary = [i for i in df_en_test['misogynous']]

df_en_train.drop(df_en_train[df_en_train['misogyny_category'] == '0'].index, inplace = True)
df_en_test.drop(df_en_test[df_en_test['misogyny_category'] == '0'].index, inplace = True)


x_train_multi = [normalizeTweet(i) for i in df_en_train['text']]
y_train_multi = [i for i in df_en_train['misogyny_category']]
x_test_multi = [normalizeTweet(i) for i in df_en_test['text']]
y_test_multi = [i for i in df_en_test['misogyny_category']]

In [7]:
#import nltk and needed libraries for lemmatizzation, bigrams extraction and so on

import nltk
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('omw-1.4')

from nltk.corpus import stopwords, wordnet
from nltk import word_tokenize, pos_tag
from nltk.stem.wordnet import WordNetLemmatizer

import spacy
import re
nlp = spacy.load('en_core_web_sm')

stopword_list = stopwords.words('english')

doc_counter = 0
def reset_counter():
  global doc_counter
  doc_counter = 0

def increase_counter():
  global doc_counter
  doc_counter += 1
  if doc_counter % 100 == 0:
    print(doc_counter)

def spacy_nlp_tokenizer(text):
    increase_counter()

    # substituting all space characters with a single space
    text = re.sub('\s+', ' ', text)

    # we use spacy for main nlp tasks
    doc = nlp(text)
    # lemmatized tokens, skipping stopwords
    lemmas = ['LEMMA_'+token.lemma_ for token in doc if not token.is_stop]
    # entity_types
    entity_types = ['NER_'+token.ent_type_ for token in doc if token.ent_type_]

    # in case an entity linker is available, we can use it do put actual entities as
    # features, e.g. Queen Elizabeth, Elizabeth II, Her Majesty -> KB2912
    # see https://spacy.io/usage/training#entity-linker
    # entities = ['ENT_'+token.ent_kb_id_ for token in doc if token.ent_kb_id_]

    # we use a simple nltk function to create ngrams
    lemma_bigrams = ['BI_'+p1+'_'+p2 for p1,p2 in nltk.ngrams(lemmas,2)]
    lemma_trigrams = ['TRI_'+p1+'_'+p2+'_'+p3 for p1,p2,p3 in nltk.ngrams(lemmas,3)]

    all_tokens = list()
    all_tokens.extend(lemmas)
    all_tokens.extend(lemma_bigrams)
    all_tokens.extend(lemma_trigrams)
    all_tokens.extend(entity_types)
    return all_tokens

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


# Standard binary classification (set-up)

In [8]:
#find sentence embeddings with sentence_transformers for classification

from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-MiniLM-L6-v2')

embeddings_train = model.encode(x_train_binary)
embeddings_test = model.encode(x_test_binary)



Downloading (…)e9125/.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)7e55de9125/README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading (…)55de9125/config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading (…)125/data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading (…)e9125/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

Downloading (…)9125/train_script.py:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

Downloading (…)7e55de9125/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)5de9125/modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [9]:
embeddings_train

array([[ 0.07823314,  0.00166578, -0.02215503, ..., -0.00751157,
         0.03990845, -0.04104037],
       [ 0.00813215, -0.00782378,  0.02371388, ..., -0.0024921 ,
        -0.00565706, -0.00044518],
       [ 0.05174644,  0.03939102,  0.01488177, ...,  0.05016293,
         0.01630843, -0.00589664],
       ...,
       [ 0.02854619,  0.01384385, -0.01831001, ...,  0.02950617,
         0.08947185, -0.01513391],
       [-0.01538778,  0.0250418 , -0.0024812 , ...,  0.06980995,
        -0.01515081,  0.03236844],
       [ 0.00791458,  0.04946415,  0.02696154, ...,  0.0313763 ,
         0.06957997,  0.07036553]], dtype=float32)

In [10]:
#find lingustic features relevant for classification

#given a list of tokens, find how many occurences of "reference" are present in such list
def find_how_many(string_list,reference):
  count = 0
  for x in string_list:
    if x == reference:
      count += 1

  return count

#extract adjectives from a string
def extract_adjs(string):
  tweet_adjs= []
  doc = nlp(string)
  adjs = 0
  for token in doc:
    if token.pos_ == 'ADJ':
     adjs += 1
  return adjs

#extractl linguistic features such as the number of HTTPURL and @USER and adjectives in a reference string
def linguistic_extraction(dataset):
  linguistic_features = []
  for x in dataset:
    lenght = len(x)
    urls = find_how_many(x.split(),'HTTPURL')
    users = find_how_many(x.split(),'@USER')
    adjs =  extract_adjs(x)
    linguistic_features.append([lenght] + [urls] + [users] + [adjs])
  return linguistic_features


In [11]:
linguistic_features_train = linguistic_extraction(x_train_binary)
linguistic_features_test = linguistic_extraction(x_test_binary)

In [12]:
vect = CountVectorizer(analyzer=spacy_nlp_tokenizer, min_df=5)  

reset_counter()

X_train_tok = vect.fit_transform(x_train_binary)

reset_counter()

X_test_tok = vect.transform(x_test_binary)

100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
100
200
300
400
500
600
700
800
900
1000


In [13]:
X_train_tok

<3981x1845 sparse matrix of type '<class 'numpy.int64'>'
	with 46142 stored elements in Compressed Sparse Row format>

In [14]:
tfidf = TfidfTransformer()  # weighting
tfidf.fit(X_train_tok)
X_train_vec_bin = tfidf.transform(X_train_tok)
X_test_vec_bin =tfidf.transform(X_test_tok)

In [15]:
print(X_train_vec_bin[0])

  (0, 1577)	0.2669786247139648
  (0, 1464)	0.3188958591027836
  (0, 1240)	0.418877067378498
  (0, 1109)	0.21147753205359276
  (0, 885)	0.15274627264994894
  (0, 786)	0.44950173260906195
  (0, 748)	0.38039708619421275
  (0, 707)	0.2570178618378364
  (0, 587)	0.41316309726076283


In [16]:
#transfomr the extracted features matrix in an array and enrich it with linguistic features extracted before along with
#embeddings (in a sense, extend the number of features associated to each tweet/vector for classification purposes)

X_train_vec_bin_arr = X_train_vec_bin.toarray()
X_test_vec_bin_arr = X_test_vec_bin.toarray()

X_train_full = []
X_test_full = []

for i in range(len(X_train_vec_bin_arr)):
  arr1 = X_train_vec_bin_arr[i]
  arr2 = embeddings_train[i]
  arr3 = linguistic_features_train[i]
  X_train_full.append(np.concatenate((arr1,arr2,arr3), axis =0))

for i in range(len(X_test_vec_bin_arr)):
  arr1 = X_test_vec_bin_arr[i]
  arr2 = embeddings_test[i]
  arr3 = linguistic_features_test[i]
  X_test_full.append(np.concatenate((arr1,arr2,arr3), axis =0))


In [17]:
from scipy import sparse

In [18]:
X_train_full = sparse.csr_matrix(X_train_full)
X_test_full = sparse.csr_matrix(X_test_full)

In [19]:
#import needed libraries and define functions for gridsearch

from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import GridSearchCV, StratifiedKFold

In [20]:
skf = StratifiedKFold(shuffle=True, random_state = 42)

In [21]:
#the following function performs a gridsearch considering a refernece classifier, a set of parameters, a cv-object from sklearn
#library, a training and final test set and a reference scoring function.

#it then outputs the best model coming from the model selection, after having printed results on the test set

def perform_gridsearch(classifier, param_grid, cv, X_train, y_train, X_test, y_test, scoring = None):

  clf = GridSearchCV(estimator = classifier, param_grid = param_grid, n_jobs = -1, cv = cv, scoring = scoring)
  clf.fit(X_train, y_train)

  best_model = clf.best_estimator_

  predictions = best_model.predict(X_test)




  print('Best configuration', clf.best_params_)
  print('Best mean score for the validation', clf.best_score_)
  print('Std for the best mean score across folds',clf.cv_results_['std_test_score'][clf.best_index_])

  print('******************')

  print('Classification report:')
  print(classification_report(y_test, predictions, digits = 7))
  print('Confusion matrix:')
  cm = confusion_matrix(y_test, predictions)
  print(cm)

  print('******************')


  print(clf.cv_results_['mean_test_score'])




  return best_model






# Stratified cross-validation with Logistic Regression

In [22]:
parameters_logistic = {'penalty': ('none', 'l2', 'l1'), 'C':[1.0,0.1,0.001], 
                       'solver' : [ 'lbfgs','newton-cg','liblinear']}

In [23]:
#RESULTS WITH ORIGINAL SPARSE MATRIX

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    perform_gridsearch(LogisticRegression(), parameters_logistic, skf, X_train_vec_bin, y_train_binary, X_test_vec_bin, y_test_binary)


Best configuration {'C': 1.0, 'penalty': 'l2', 'solver': 'lbfgs'}
Best mean score for the validation 0.7701581937289962
Std for the best mean score across folds 0.007985883122220741
******************
Classification report:
              precision    recall  f1-score   support

           0  0.6436782 0.7259259 0.6823325       540
           1  0.6214834 0.5282609 0.5710928       460

    accuracy                      0.6350000      1000
   macro avg  0.6325808 0.6270934 0.6267126      1000
weighted avg  0.6334686 0.6350000 0.6311622      1000

Confusion matrix:
[[392 148]
 [217 243]]
******************
[0.71162778 0.70383915        nan 0.77015819 0.77015819 0.77015819
        nan        nan 0.76814751 0.71162778 0.70383915        nan
 0.71941483 0.71941483 0.72041922        nan        nan 0.69253955
 0.71162778 0.70383915        nan 0.56618948 0.56618948 0.56618948
        nan        nan 0.56618948]


In [None]:
#RESULTS WITH Embedding 
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    perform_gridsearch(LogisticRegression(), parameters_logistic, skf, embeddings_train, y_train_binary, embeddings_test, y_test_binary)





Best configuration {'C': 1.0, 'penalty': 'l2', 'solver': 'liblinear'}
Best mean score for the validation 0.7779405181490893
Std for the best mean score across folds 0.018893149332023202
******************
Classification report:
              precision    recall  f1-score   support

           0  0.7374749 0.6814815 0.7083734       540
           1  0.6566866 0.7152174 0.6847034       460

    accuracy                      0.6970000      1000
   macro avg  0.6970808 0.6983494 0.6965384      1000
weighted avg  0.7003123 0.6970000 0.6974852      1000

Confusion matrix:
[[368 172]
 [131 329]]
******************
[0.76111612 0.76212115        nan 0.77743864 0.77743864 0.77794052
        nan        nan 0.77718612 0.76111612 0.76212115        nan
 0.76387363 0.76387363 0.76538023        nan        nan 0.71464474
 0.76111612 0.76212115        nan 0.56618948 0.56618948 0.56618948
        nan        nan 0.56618948]


In [None]:
#RESULTS WITH linguistic features 

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    perform_gridsearch(LogisticRegression(), parameters_logistic, skf, linguistic_features_train, y_train_binary, linguistic_features_test, y_test_binary)



Best configuration {'C': 1.0, 'penalty': 'l1', 'solver': 'liblinear'}
Best mean score for the validation 0.5895474864914283
Std for the best mean score across folds 0.010143252056425118
******************
Classification report:
              precision    recall  f1-score   support

           0  0.6069869 0.5148148 0.5571142       540
           1  0.5166052 0.6086957 0.5588822       460

    accuracy                      0.5580000      1000
   macro avg  0.5617960 0.5617552 0.5579982      1000
weighted avg  0.5654113 0.5580000 0.5579275      1000

Confusion matrix:
[[278 262]
 [180 280]]
******************
[0.58854341 0.58854341        nan 0.58854309 0.58879435 0.58854309
        nan        nan 0.58954749 0.58854341 0.58854341        nan
 0.58678619 0.58628368 0.58728776        nan        nan 0.58477803
 0.58854341 0.58854341        nan 0.56342629 0.56367755 0.56392849
        nan        nan 0.56618948]


In [None]:
#RESULTS WITH sparse matrix + embedding + linguistic features
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    perform_gridsearch(LogisticRegression(), parameters_logistic, skf, X_train_full, y_train_binary, X_test_full, y_test_binary)



Best configuration {'C': 1.0, 'penalty': 'l2', 'solver': 'newton-cg'}
Best mean score for the validation 0.8025585896861976
Std for the best mean score across folds 0.01654222808419015
******************
Classification report:
              precision    recall  f1-score   support

           0  0.7316602 0.7018519 0.7164461       540
           1  0.6659751 0.6978261 0.6815287       460

    accuracy                      0.7000000      1000
   macro avg  0.6988177 0.6998390 0.6989874      1000
weighted avg  0.7014451 0.7000000 0.7003841      1000

Confusion matrix:
[[379 161]
 [139 321]]
******************
[0.78246092 0.74377723        nan 0.79502279 0.80255859 0.80205576
        nan        nan 0.7940184  0.78246092 0.74377723        nan
 0.7807056  0.78296407 0.7812059         nan        nan 0.72092142
 0.78246092 0.74377723        nan 0.56694199 0.56694199 0.57121366
        nan        nan 0.56618948]


# Stratified cross-validation with RandomForestClassifier

In [None]:
#perform a grid search of the model with stratified cross validation with k=5 and select the best model
#then, report the test score for that particular model

In [None]:
parameters_rf = {'min_samples_split': [2,4,6,8,10], 'min_samples_leaf':[1,2,3,5,8,10], 
                               'criterion': ['gini','entropy']}

In [None]:
#RESULTS WITH ORIGINAL SPARSE MATRIX

perform_gridsearch(RandomForestClassifier(), parameters_rf, skf, X_train_vec_bin, y_train_binary, X_test_vec_bin, y_test_binary)



Best configuration {'criterion': 'gini', 'min_samples_leaf': 1, 'min_samples_split': 8}
Best mean score for the validation 0.7814599976040807
Std for the best mean score across folds 0.013301938373852235
******************
Classification report:
              precision    recall  f1-score   support

           0  0.6435644 0.7222222 0.6806283       540
           1  0.6192893 0.5304348 0.5714286       460

    accuracy                      0.6340000      1000
   macro avg  0.6314268 0.6263285 0.6260284      1000
weighted avg  0.6323978 0.6340000 0.6303964      1000

Confusion matrix:
[[390 150]
 [216 244]]
******************
[0.77442482 0.77969963 0.77894712 0.78146    0.78145968 0.77668676
 0.77643582 0.77618551 0.78070717 0.7769377  0.76663903 0.77141321
 0.77116038 0.77266855 0.77116101 0.76588652 0.76463024 0.767645
 0.76186705 0.76262019 0.75458125 0.75207184 0.75332718 0.75508313
 0.75960795 0.74830426 0.74328165 0.74704734 0.75282718 0.75207058
 0.77718833 0.77769178 0.77769241 

RandomForestClassifier(min_samples_split=8)

In [None]:
#RESULTS WITH Embedding 

perform_gridsearch(RandomForestClassifier(), parameters_rf, skf, embeddings_train, y_train_binary, embeddings_test, y_test_binary)


Best configuration {'criterion': 'entropy', 'min_samples_leaf': 10, 'min_samples_split': 10}
Best mean score for the validation 0.7636223778869252
Std for the best mean score across folds 0.01914695223140225
******************
Classification report:
              precision    recall  f1-score   support

           0  0.6898955 0.7333333 0.7109515       540
           1  0.6619718 0.6130435 0.6365688       460

    accuracy                      0.6780000      1000
   macro avg  0.6759337 0.6731884 0.6737602      1000
weighted avg  0.6770506 0.6780000 0.6767355      1000

Confusion matrix:
[[396 144]
 [178 282]]
******************
[0.74754954 0.75282277 0.75055926 0.74729671 0.75206837 0.75458251
 0.75709129 0.74755049 0.75508093 0.74905613 0.76161485 0.75432716
 0.74830268 0.75382969 0.74553823 0.7608595  0.75482998 0.75282088
 0.75860009 0.76035762 0.75005801 0.75005927 0.74880488 0.75784853
 0.7518168  0.75307403 0.7558369  0.7538278  0.75734475 0.75081335
 0.75458409 0.75332654 0.762

RandomForestClassifier(criterion='entropy', min_samples_leaf=10,
                       min_samples_split=10)

In [None]:
#RESULTS WITH linguistic features 
perform_gridsearch(RandomForestClassifier(), parameters_rf, skf, linguistic_features_train, y_train_binary, linguistic_features_test, y_test_binary)

Best configuration {'criterion': 'gini', 'min_samples_leaf': 10, 'min_samples_split': 6}
Best mean score for the validation 0.6071325889169814
Std for the best mean score across folds 0.007942775010731843
******************
Classification report:
              precision    recall  f1-score   support

           0  0.5875706 0.5777778 0.5826331       540
           1  0.5138593 0.5239130 0.5188375       460

    accuracy                      0.5530000      1000
   macro avg  0.5507149 0.5508454 0.5507353      1000
weighted avg  0.5536634 0.5530000 0.5532871      1000

Confusion matrix:
[[312 228]
 [219 241]]
******************
[0.56116845 0.56392943 0.5717168  0.56870173 0.5737237  0.5699599
 0.57472904 0.56995801 0.57322434 0.57573564 0.57598847 0.57121334
 0.57347749 0.57523376 0.57975574 0.58955095 0.58804625 0.58452677
 0.58779121 0.58603463 0.59683423 0.59960121 0.60135748 0.59909554
 0.59934648 0.60311659 0.60311501 0.60713259 0.60487254 0.60336595
 0.56242127 0.56568665 0.5679489

RandomForestClassifier(min_samples_leaf=10, min_samples_split=6)

In [None]:
#RESULTS WITH sparse matrix + embedding + linguistic features
perform_gridsearch(RandomForestClassifier(), parameters_rf, skf, X_train_full, y_train_binary, X_test_full, y_test_binary)

Best configuration {'criterion': 'entropy', 'min_samples_leaf': 1, 'min_samples_split': 6}
Best mean score for the validation 0.7701559869611546
Std for the best mean score across folds 0.01464126433308675
******************
Classification report:
              precision    recall  f1-score   support

           0  0.7025090 0.7259259 0.7140255       540
           1  0.6651584 0.6391304 0.6518847       460

    accuracy                      0.6860000      1000
   macro avg  0.6838337 0.6825282 0.6829551      1000
weighted avg  0.6853277 0.6860000 0.6854407      1000

Confusion matrix:
[[392 148]
 [166 294]]
******************
[0.76161422 0.76538149 0.76312428 0.76488055 0.76036014 0.7681472
 0.76789657 0.76036172 0.76462772 0.77015536 0.7648774  0.76362522
 0.76889813 0.76262114 0.75910891 0.75809821 0.76463024 0.76387553
 0.75985543 0.76060793 0.76061014 0.76186295 0.75809978 0.75960543
 0.76136612 0.75910544 0.75910197 0.75860009 0.75885229 0.75608784
 0.76086455 0.75860167 0.770155

RandomForestClassifier(criterion='entropy', min_samples_split=6)

# Stratified cross-validation with MLP

In [None]:
#perform a grid search of the model with stratified cross validation with k=5 and select the best model
#then, report the test score for that particular model

In [None]:
parameters_mlp = {'hidden_layer_sizes': [(8,16,32),(16,32,64,128),(64,128,256),], 
                 'activation':['relu','logistic','tanh'], 
                 'solver': ['lbfgs','sgd','adam'],
                 'max_iter' : [9000],
                  'early_stopping' : [True]}


In [None]:
#RESULTS WITH ORIGINAL SPARSE MATRIX
perform_gridsearch(MLPClassifier(), parameters_mlp, skf, X_train_vec_bin, y_train_binary, X_test_vec_bin, y_test_binary)



Best configuration {'activation': 'relu', 'early_stopping': True, 'hidden_layer_sizes': (8, 16, 32), 'max_iter': 9000, 'solver': 'adam'}
Best mean score for the validation 0.7445350970662598
Std for the best mean score across folds 0.015194744280056016
******************
Classification report:
              precision    recall  f1-score   support

           0  0.6484517 0.6592593 0.6538108       540
           1  0.5920177 0.5804348 0.5861690       460

    accuracy                      0.6230000      1000
   macro avg  0.6202347 0.6198470 0.6199899      1000
weighted avg  0.6224921 0.6230000 0.6226956      1000

Confusion matrix:
[[356 184]
 [193 267]]
******************
[0.72594308 0.56618948 0.7445351  0.73197386 0.56618948 0.74202443
 0.72769967 0.56618948 0.73473642 0.56618948 0.56618948 0.56618948
 0.56618948 0.56618948 0.56618948 0.56618948 0.56618948 0.70563672
 0.73397887 0.56644042 0.74001343 0.72644559 0.56618948 0.72946193
 0.72870816 0.5649332  0.7367471 ]


MLPClassifier(early_stopping=True, hidden_layer_sizes=(8, 16, 32),
              max_iter=9000)

In [None]:
#RESULTS WITH Embedding 
perform_gridsearch(MLPClassifier(), parameters_mlp , skf, embeddings_train, y_train_binary, embeddings_test, y_test_binary)



Best configuration {'activation': 'logistic', 'early_stopping': True, 'hidden_layer_sizes': (64, 128, 256), 'max_iter': 9000, 'solver': 'adam'}
Best mean score for the validation 0.7809527562530343
Std for the best mean score across folds 0.023552819256212623
******************
Classification report:
              precision    recall  f1-score   support

           0  0.7547170 0.5925926 0.6639004       540
           1  0.6180556 0.7739130 0.6872587       460

    accuracy                      0.6760000      1000
   macro avg  0.6863863 0.6832528 0.6755796      1000
weighted avg  0.6918527 0.6760000 0.6746452      1000

Confusion matrix:
[[320 220]
 [104 356]]
******************
[0.72644275 0.56618948 0.77768958 0.73021443 0.56618948 0.77115723
 0.7493055  0.56669199 0.77467324 0.56618948 0.56618948 0.56618948
 0.56618948 0.56618948 0.56618948 0.56618948 0.56618948 0.78095276
 0.73799928 0.56443069 0.77517733 0.73850242 0.56568697 0.76889876
 0.74001532 0.5649332  0.77316886]


MLPClassifier(activation='logistic', early_stopping=True,
              hidden_layer_sizes=(64, 128, 256), max_iter=9000)

In [None]:
#RESULTS WITH linguistic features 
perform_gridsearch(MLPClassifier(), parameters_mlp, skf, linguistic_features_train, y_train_binary, linguistic_features_test, y_test_binary)



Best configuration {'activation': 'logistic', 'early_stopping': True, 'hidden_layer_sizes': (64, 128, 256), 'max_iter': 9000, 'solver': 'lbfgs'}
Best mean score for the validation 0.6131608481554573
Std for the best mean score across folds 0.009729773923887236
******************
Classification report:
              precision    recall  f1-score   support

           0  0.6123188 0.6259259 0.6190476       540
           1  0.5491071 0.5347826 0.5418502       460

    accuracy                      0.5840000      1000
   macro avg  0.5807130 0.5803543 0.5804489      1000
weighted avg  0.5832415 0.5840000 0.5835368      1000

Confusion matrix:
[[338 202]
 [214 246]]
******************
[0.58327144 0.5385516  0.56769607 0.59633235 0.56669042 0.56794827
 0.59432041 0.56593854 0.55915336 0.58326356 0.56618948 0.56618948
 0.56618948 0.56618948 0.56618948 0.61316085 0.56618948 0.56618948
 0.58528086 0.56593822 0.56393006 0.60110149 0.56618948 0.56895298
 0.61240929 0.56618948 0.58000794]


MLPClassifier(activation='logistic', early_stopping=True,
              hidden_layer_sizes=(64, 128, 256), max_iter=9000, solver='lbfgs')

In [None]:
#RESULTS WITH SPARSE MATRIX + embeddings + linguistic features 
perform_gridsearch(MLPClassifier(), parameters_mlp, skf, X_train_full, y_train_binary, X_test_full, y_test_binary)







Best configuration {'activation': 'logistic', 'early_stopping': True, 'hidden_layer_sizes': (64, 128, 256), 'max_iter': 9000, 'solver': 'adam'}
Best mean score for the validation 0.7962774979035705
Std for the best mean score across folds 0.022532199359913515
******************
Classification report:
              precision    recall  f1-score   support

           0  0.7975000 0.5907407 0.6787234       540
           1  0.6316667 0.8239130 0.7150943       460

    accuracy                      0.6980000      1000
   macro avg  0.7145833 0.7073269 0.6969089      1000
weighted avg  0.7212167 0.6980000 0.6954540      1000

Confusion matrix:
[[319 221]
 [ 81 379]]
******************
[0.65613923 0.54558678 0.79100017 0.76864372 0.56292378 0.7862263
 0.76889624 0.56468289 0.78573167 0.72643834 0.56568697 0.56618948
 0.56618948 0.56618948 0.70859189 0.76085351 0.56618948 0.7962775
 0.75206963 0.56618948 0.78171315 0.75383032 0.56618948 0.7922574
 0.76864467 0.56618948 0.7952731 ]


MLPClassifier(activation='logistic', early_stopping=True,
              hidden_layer_sizes=(64, 128, 256), max_iter=9000)

# Standard multi-label classification (set-up)

In [None]:
#repeat the same set-up pipeline for the binary classification task, using a different training set, this time with 5 labels


embeddings_train_multi = model.encode(x_train_multi)
embeddings_test_multi = model.encode(x_test_multi)

linguistic_features_train_multi = linguistic_extraction(x_train_multi)
linguistic_features_test_multi = linguistic_extraction(x_test_multi)

vect = CountVectorizer(analyzer=spacy_nlp_tokenizer, min_df=5)  
reset_counter()

X_train_tok_multi = vect.fit_transform(x_train_multi)
reset_counter()

X_test_tok_multi = vect.transform(x_test_multi)


tfidf = TfidfTransformer()  # weighting
tfidf.fit(X_train_tok_multi)
X_train_vec_multi = tfidf.transform(X_train_tok_multi)
X_test_vec_multi = tfidf.transform(X_test_tok_multi)


X_train_vec_multi_arr = X_train_vec_multi.toarray()
X_test_vec_multi_arr = X_test_vec_multi.toarray()

X_train_full_multi = []
X_test_full_multi = []

for i in range(len(X_train_vec_multi_arr)):
  arr1 = X_train_vec_multi_arr[i]
  arr2 = embeddings_train_multi[i]
  arr3 = linguistic_features_train_multi[i]
  X_train_full_multi.append(np.concatenate((arr1,arr2,arr3), axis =0))

for i in range(len(X_test_vec_multi_arr)):
  arr1 = X_test_vec_multi_arr[i]
  arr2 = embeddings_test_multi[i]
  arr3 = linguistic_features_test_multi[i]
  X_test_full_multi.append(np.concatenate((arr1,arr2,arr3), axis =0))



100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
100
200
300
400


In [None]:
from scipy import sparse
X_train_full_multi = sparse.csr_matrix(X_train_full_multi)
X_test_full_multi = sparse.csr_matrix(X_test_full_multi)


# Multiclass - Logistic regression

In [None]:
#RESULTS WITH ORIGINAL SPARSE MATRIX

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    perform_gridsearch(LogisticRegression(), parameters_logistic, skf, X_train_vec_multi, y_train_multi, X_test_vec_multi, y_test_multi, scoring = 'f1_macro')


Best configuration {'C': 1.0, 'penalty': 'none', 'solver': 'lbfgs'}
Best mean score for the validation 0.38485441941792464
Std for the best mean score across folds 0.02847112727060643
******************
Classification report:
                   precision    recall  f1-score   support

        derailing  0.1818182 0.1818182 0.1818182        11
        discredit  0.5056818 0.6312057 0.5615142       141
        dominance  0.5000000 0.4435484 0.4700855       124
sexual_harassment  0.3269231 0.3863636 0.3541667        44
       stereotype  0.8198198 0.6500000 0.7250996       140

         accuracy                      0.5521739       460
        macro avg  0.4668486 0.4585872 0.4585368       460
     weighted avg  0.5749142 0.5521739 0.5577421       460

Confusion matrix:
[[ 2  3  3  1  2]
 [ 1 89 29 13  9]
 [ 4 41 55 16  8]
 [ 0 18  8 17  1]
 [ 4 25 15  5 91]]
******************
[0.38485442 0.36810478        nan 0.32509107 0.32509107 0.27660041
        nan        nan 0.36010066 0.38485442 

In [None]:
#RESULTS WITH ORIGINAL SPARSE MATRIX

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    perform_gridsearch(LogisticRegression(), parameters_logistic, skf, X_train_vec_multi, y_train_multi, X_test_vec_multi, y_test_multi, scoring = 'f1_macro')


Best configuration {'C': 1.0, 'penalty': 'none', 'solver': 'lbfgs'}
Best mean score for the validation 0.38485441941792464
Std for the best mean score across folds 0.02847112727060643
******************
Classification report:
                   precision    recall  f1-score   support

        derailing  0.1818182 0.1818182 0.1818182        11
        discredit  0.5056818 0.6312057 0.5615142       141
        dominance  0.5000000 0.4435484 0.4700855       124
sexual_harassment  0.3269231 0.3863636 0.3541667        44
       stereotype  0.8198198 0.6500000 0.7250996       140

         accuracy                      0.5521739       460
        macro avg  0.4668486 0.4585872 0.4585368       460
     weighted avg  0.5749142 0.5521739 0.5577421       460

Confusion matrix:
[[ 2  3  3  1  2]
 [ 1 89 29 13  9]
 [ 4 41 55 16  8]
 [ 0 18  8 17  1]
 [ 4 25 15  5 91]]
******************
[0.38485442 0.36810478        nan 0.32509107 0.32509107 0.27660041
        nan        nan 0.36010066 0.38485442 

In [None]:
#RESULTS WITH Embedding 
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    perform_gridsearch(LogisticRegression(), parameters_logistic, skf, embeddings_train_multi, y_train_multi, embeddings_test_multi, y_test_multi, scoring = 'f1_macro')





Best configuration {'C': 1.0, 'penalty': 'l2', 'solver': 'lbfgs'}
Best mean score for the validation 0.43073886495279046
Std for the best mean score across folds 0.049850567472351885
******************
Classification report:
                   precision    recall  f1-score   support

        derailing  0.3333333 0.0909091 0.1428571        11
        discredit  0.3771429 0.9361702 0.5376782       141
        dominance  0.8750000 0.0564516 0.1060606       124
sexual_harassment  0.4705882 0.5454545 0.5052632        44
       stereotype  0.8958333 0.3071429 0.4574468       140

         accuracy                      0.4500000       460
        macro avg  0.5903796 0.3872257 0.3498612       460
     weighted avg  0.6771008 0.4500000 0.3843689       460

Confusion matrix:
[[  1   9   0   0   1]
 [  0 132   0   8   1]
 [  1  99   7  14   3]
 [  0  20   0  24   0]
 [  1  90   1   5  43]]
******************
[0.41546438 0.39044476        nan 0.43073886 0.43073886 0.40056319
        nan        na

In [None]:
#RESULTS WITH linguistic features 

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    perform_gridsearch(LogisticRegression(), parameters_logistic, skf, linguistic_features_train_multi, y_train_multi, linguistic_features_test_multi, y_test_multi, scoring = 'f1_macro')



Best configuration {'C': 1.0, 'penalty': 'none', 'solver': 'lbfgs'}
Best mean score for the validation 0.14499804942290187
Std for the best mean score across folds 0.0002454205374749338
******************
Classification report:
                   precision    recall  f1-score   support

        derailing  0.5000000 0.0909091 0.1538462        11
        discredit  0.3078603 1.0000000 0.4707846       141
        dominance  0.0000000 0.0000000 0.0000000       124
sexual_harassment  0.0000000 0.0000000 0.0000000        44
       stereotype  0.0000000 0.0000000 0.0000000       140

         accuracy                      0.3086957       460
        macro avg  0.1615721 0.2181818 0.1249262       460
     weighted avg  0.1063224 0.3086957 0.1479847       460

Confusion matrix:
[[  1  10   0   0   0]
 [  0 141   0   0   0]
 [  0 124   0   0   0]
 [  1  43   0   0   0]
 [  0 140   0   0   0]]
******************
[0.14499805 0.14490392        nan 0.14499805 0.14490392 0.14499805
        nan       

In [None]:
#RESULTS WITH sparse matrix + embedding + linguistic features
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    perform_gridsearch(LogisticRegression(), parameters_logistic, skf, X_train_full_multi, y_train_multi, X_test_full_multi, y_test_multi, scoring = 'f1_macro')



Best configuration {'C': 1.0, 'penalty': 'l2', 'solver': 'newton-cg'}
Best mean score for the validation 0.4690082926715121
Std for the best mean score across folds 0.03563613166077696
******************
Classification report:
                   precision    recall  f1-score   support

        derailing  0.3000000 0.2727273 0.2857143        11
        discredit  0.4730769 0.8723404 0.6134663       141
        dominance  0.7916667 0.1532258 0.2567568       124
sexual_harassment  0.3809524 0.5454545 0.4485981        44
       stereotype  0.9320388 0.6857143 0.7901235       140

         accuracy                      0.5760870       460
        macro avg  0.5755470 0.5058925 0.4789318       460
     weighted avg  0.6856910 0.5760870 0.5474675       460

Confusion matrix:
[[  3   6   0   1   1]
 [  2 123   3  10   3]
 [  3  76  19  23   3]
 [  1  18   1  24   0]
 [  1  37   1   5  96]]
******************
[0.3822685  0.4475279         nan 0.35178417 0.46900829 0.43593736
        nan        

# Multiclassification with RF

In [None]:
parameters_rf = {'min_samples_split': [2,4,6,8,10], 'min_samples_leaf':[1,2,3,5,8,10], 
                               'criterion': ['gini','entropy']}

In [None]:
#RESULTS WITH SPARSE MATRIX

perform_gridsearch(RandomForestClassifier(), parameters_rf, skf, X_train_vec_multi, y_train_multi, X_test_vec_multi, y_test_multi, scoring = 'f1_macro')




Best configuration {'criterion': 'gini', 'min_samples_leaf': 1, 'min_samples_split': 8}
Best mean score for the validation 0.3772585966702452
Std for the best mean score across folds 0.03162785855369108
******************
Classification report:
                   precision    recall  f1-score   support

        derailing  0.4285714 0.2727273 0.3333333        11
        discredit  0.4408946 0.9787234 0.6079295       141
        dominance  0.8518519 0.1854839 0.3046358       124
sexual_harassment  0.5454545 0.2727273 0.3636364        44
       stereotype  0.9450549 0.6142857 0.7445887       140

         accuracy                      0.5695652       460
        macro avg  0.6423655 0.4647895 0.4708247       460
     weighted avg  0.7148212 0.5695652 0.5378304       460

Confusion matrix:
[[  3   6   1   0   1]
 [  0 138   1   0   2]
 [  3  87  23   9   2]
 [  1  29   2  12   0]
 [  0  53   0   1  86]]
******************
[0.36214019 0.37289016 0.35605673 0.3772586  0.35542503 0.33808209
 

RandomForestClassifier(min_samples_split=8)

In [None]:
#RESULTS WITH Embedding 

perform_gridsearch(RandomForestClassifier(), parameters_rf, skf, embeddings_train_multi, y_train_multi, embeddings_test_multi, y_test_multi, scoring = 'f1_macro')





Best configuration {'criterion': 'entropy', 'min_samples_leaf': 1, 'min_samples_split': 4}
Best mean score for the validation 0.24601030563177234
Std for the best mean score across folds 0.03322429675689442
******************
Classification report:
                   precision    recall  f1-score   support

        derailing  0.5000000 0.0909091 0.1538462        11
        discredit  0.3146067 0.9929078 0.4778157       141
        dominance  0.0000000 0.0000000 0.0000000       124
sexual_harassment  0.7272727 0.1818182 0.2909091        44
       stereotype  1.0000000 0.0142857 0.0281690       140

         accuracy                      0.3282609       460
        macro avg  0.5083759 0.2559842 0.1901480       460
     weighted avg  0.4823034 0.3282609 0.1865391       460

Confusion matrix:
[[  1  10   0   0   0]
 [  0 140   0   1   0]
 [  1 122   0   1   0]
 [  0  36   0   8   0]
 [  0 137   0   1   2]]
******************
[0.23269852 0.22719987 0.24189684 0.23722379 0.22153176 0.231342

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


RandomForestClassifier(criterion='entropy', min_samples_split=4)

In [None]:
#RESULTS WITH linguistic features 

perform_gridsearch(RandomForestClassifier(), parameters_rf, skf, linguistic_features_train_multi, y_train_multi, linguistic_features_test_multi, y_test_multi, scoring = 'f1_macro')



Best configuration {'criterion': 'gini', 'min_samples_leaf': 1, 'min_samples_split': 4}
Best mean score for the validation 0.2159520064908857
Std for the best mean score across folds 0.012601757232990769
******************
Classification report:
                   precision    recall  f1-score   support

        derailing  0.0000000 0.0000000 0.0000000        11
        discredit  0.3259494 0.7304965 0.4507659       141
        dominance  0.3913043 0.0725806 0.1224490       124
sexual_harassment  0.0508475 0.0681818 0.0582524        44
       stereotype  0.3750000 0.0642857 0.1097561       140

         accuracy                      0.2695652       460
        macro avg  0.2286202 0.1871089 0.1482447       460
     weighted avg  0.3243867 0.2695652 0.2101535       460

Confusion matrix:
[[  0   8   0   2   1]
 [  7 103   9  17   5]
 [ 17  81   9  12   5]
 [  7  28   2   3   4]
 [  7  96   3  25   9]]
******************
[0.19677632 0.21595201 0.20300324 0.19229656 0.18532687 0.20226415


RandomForestClassifier(min_samples_split=4)

In [None]:
#RESULTS WITH sparse matrix + embedding + linguistic features

perform_gridsearch(RandomForestClassifier(), parameters_rf, skf, X_train_full_multi, y_train_multi, X_test_full_multi, y_test_multi, scoring = 'f1_macro')


Best configuration {'criterion': 'gini', 'min_samples_leaf': 1, 'min_samples_split': 2}
Best mean score for the validation 0.24586474051121288
Std for the best mean score across folds 0.02161563897726648
******************
Classification report:
                   precision    recall  f1-score   support

        derailing  1.0000000 0.0909091 0.1666667        11
        discredit  0.3175676 1.0000000 0.4820513       141
        dominance  0.0000000 0.0000000 0.0000000       124
sexual_harassment  0.7272727 0.1818182 0.2909091        44
       stereotype  1.0000000 0.0285714 0.0555556       140

         accuracy                      0.3347826       460
        macro avg  0.6089681 0.2602597 0.1990365       460
     weighted avg  0.4951675 0.3347826 0.1964790       460

Confusion matrix:
[[  1  10   0   0   0]
 [  0 141   0   0   0]
 [  0 123   0   1   0]
 [  0  36   0   8   0]
 [  0 134   0   2   4]]
******************
[0.24586474 0.22763264 0.222887   0.22239634 0.23801109 0.21969574


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


RandomForestClassifier()

# Multi-label with MLP

In [None]:
parameters_mlp = {'hidden_layer_sizes': [(8,16,32),(16,32,64,128),(64,128,256),], 
                 'activation':['relu','logistic','tanh'], 
                 'solver': ['lbfgs','sgd','adam'],
                 'max_iter' : [9000],
                  'early_stopping' : [True]}


In [None]:
#RESULTS WITH ORIGINAL SPARSE MATRIX
perform_gridsearch(MLPClassifier(), parameters_mlp,  skf, X_train_vec_multi, y_train_multi, X_test_vec_multi, y_test_multi, scoring = 'f1_macro')

Best configuration {'activation': 'relu', 'early_stopping': True, 'hidden_layer_sizes': (64, 128, 256), 'max_iter': 9000, 'solver': 'lbfgs'}
Best mean score for the validation 0.39134767380652413
Std for the best mean score across folds 0.014566092604740776
******************
Classification report:
                   precision    recall  f1-score   support

        derailing  0.2500000 0.1818182 0.2105263        11
        discredit  0.5027624 0.6453901 0.5652174       141
        dominance  0.5145631 0.4274194 0.4669604       124
sexual_harassment  0.3396226 0.4090909 0.3711340        44
       stereotype  0.8434783 0.6928571 0.7607843       140

         accuracy                      0.5673913       460
        macro avg  0.4900853 0.4713151 0.4749245       460
     weighted avg  0.5879906 0.5673913 0.5712048       460

Confusion matrix:
[[ 2  5  2  1  1]
 [ 0 91 27 12 11]
 [ 6 43 53 17  5]
 [ 0 18  7 18  1]
 [ 0 24 14  5 97]]
******************
[0.34346138 0.14499805 0.14499805 0.38

MLPClassifier(early_stopping=True, hidden_layer_sizes=(64, 128, 256),
              max_iter=9000, solver='lbfgs')

In [None]:
#RESULTS WITH EMBEDDINGS
perform_gridsearch(MLPClassifier(), parameters_mlp,  skf, embeddings_train_multi, y_train_multi, embeddings_test_multi, y_test_multi, scoring = 'f1_macro')

Best configuration {'activation': 'tanh', 'early_stopping': True, 'hidden_layer_sizes': (16, 32, 64, 128), 'max_iter': 9000, 'solver': 'adam'}
Best mean score for the validation 0.44902571422744025
Std for the best mean score across folds 0.0359098573962402
******************
Classification report:
                   precision    recall  f1-score   support

        derailing  0.3333333 0.1818182 0.2352941        11
        discredit  0.3534247 0.9148936 0.5098814       141
        dominance  0.0000000 0.0000000 0.0000000       124
sexual_harassment  0.3636364 0.6363636 0.4628099        44
       stereotype  0.8888889 0.0571429 0.1073826       140

         accuracy                      0.3630435       460
        macro avg  0.3878566 0.3580437 0.2630736       460
     weighted avg  0.4216174 0.3630435 0.2388668       460

Confusion matrix:
[[  2   8   0   1   0]
 [  0 129   1  11   0]
 [  2  90   0  31   1]
 [  0  16   0  28   0]
 [  2 122   2   6   8]]
******************
[0.37122251 0

MLPClassifier(activation='tanh', early_stopping=True,
              hidden_layer_sizes=(16, 32, 64, 128), max_iter=9000)

In [None]:
#RESULTS WITH LINGUISTIC FEATURES
perform_gridsearch(MLPClassifier(), parameters_mlp,  skf, linguistic_features_train_multi, y_train_multi, linguistic_features_test_multi, y_test_multi, scoring = 'f1_macro')

Best configuration {'activation': 'tanh', 'early_stopping': True, 'hidden_layer_sizes': (64, 128, 256), 'max_iter': 9000, 'solver': 'lbfgs'}
Best mean score for the validation 0.1792257237315968
Std for the best mean score across folds 0.018636303489199944
******************
Classification report:
                   precision    recall  f1-score   support

        derailing  0.0000000 0.0000000 0.0000000        11
        discredit  0.3028169 0.9148936 0.4550265       141
        dominance  0.0000000 0.0000000 0.0000000       124
sexual_harassment  0.0625000 0.0227273 0.0333333        44
       stereotype  0.2857143 0.0142857 0.0272109       140

         accuracy                      0.2869565       460
        macro avg  0.1302062 0.1903813 0.1031141       460
     weighted avg  0.1857547 0.2869565 0.1509455       460

Confusion matrix:
[[  0  11   0   0   0]
 [  1 129   0   8   3]
 [  3 116   0   5   0]
 [  5  36   0   1   2]
 [  2 134   0   2   2]]
******************
[0.14499805 0.

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


MLPClassifier(activation='tanh', early_stopping=True,
              hidden_layer_sizes=(64, 128, 256), max_iter=9000, solver='lbfgs')

In [None]:
#RESULTS WITH sparse matrix + embedding + linguistic features

perform_gridsearch(MLPClassifier(), parameters_mlp, skf, X_train_full_multi, y_train_multi, X_test_full_multi, y_test_multi, scoring = 'f1_macro')


Best configuration {'activation': 'tanh', 'early_stopping': True, 'hidden_layer_sizes': (64, 128, 256), 'max_iter': 9000, 'solver': 'lbfgs'}
Best mean score for the validation 0.4305438090984177
Std for the best mean score across folds 0.035730381135979546
******************
Classification report:
                   precision    recall  f1-score   support

        derailing  0.2000000 0.2727273 0.2307692        11
        discredit  0.4371859 0.6170213 0.5117647       141
        dominance  0.5102041 0.2016129 0.2890173       124
sexual_harassment  0.2692308 0.4772727 0.3442623        44
       stereotype  0.7731092 0.6571429 0.7104247       140

         accuracy                      0.4956522       460
        macro avg  0.4379460 0.4451554 0.4172477       460
     weighted avg  0.5373695 0.4956522 0.4894401       460

Confusion matrix:
[[ 3  4  2  1  1]
 [ 4 87 12 19 19]
 [ 6 57 25 32  4]
 [ 0 17  3 21  3]
 [ 2 34  7  5 92]]
******************
[0.28189045 0.14482212 0.14487578 0.354

MLPClassifier(activation='tanh', early_stopping=True,
              hidden_layer_sizes=(64, 128, 256), max_iter=9000, solver='lbfgs')