# Subtask A

In [12]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report, accuracy_score, f1_score

In [13]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [14]:
texts_df = pd.read_csv('subtaskA_train.csv')

In [15]:
texts_df.head()

Unnamed: 0,comment_text,conspiratorial
0,⚡Se non ci fossero soldati non ci sarebbero gu...,0
1,"21/08/21]( [PRE-PRINT]\n\n📄__ ""Shedding of Inf...",1
2,PAURA E DELIRIO ALLA CNN: IL MINISTERO DELLA V...,1
3,L'Aspirina non aumenta la sopravvivenza dei pa...,0
4,L'Italia non puo' dare armi lo vieta la Costit...,0


In [16]:
texts_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1845 entries, 0 to 1844
Data columns (total 2 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   comment_text    1842 non-null   object
 1   conspiratorial  1845 non-null   int64 
dtypes: int64(1), object(1)
memory usage: 29.0+ KB


In [17]:
texts_df[texts_df['comment_text'].isna()]

Unnamed: 0,comment_text,conspiratorial
244,,0
263,,0
665,,0


Delete rows with NaN text

In [18]:
texts_df = texts_df[texts_df.comment_text.notna()]

In [19]:
texts_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1842 entries, 0 to 1844
Data columns (total 2 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   comment_text    1842 non-null   object
 1   conspiratorial  1842 non-null   int64 
dtypes: int64(1), object(1)
memory usage: 43.2+ KB


Count positive and negatie samples

In [20]:
texts_df.conspiratorial.value_counts()

1    925
0    917
Name: conspiratorial, dtype: int64

## Preprocessing

Substitute '\n' with ' '.

In [21]:
texts_df.comment_text = texts_df.comment_text.apply(lambda text: text.replace('\n', ' '))

## Train-Validation split

In [22]:
from sklearn.model_selection import StratifiedShuffleSplit

In [23]:
# Split the data into training and testing datasets using stratified sampling
split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
for train_index, eval_index in split.split(texts_df, texts_df.conspiratorial):
    train_df, eval_df = texts_df.iloc[train_index], texts_df.iloc[eval_index]

In [24]:
print(train_df.info())
print(train_df.conspiratorial.value_counts())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1473 entries, 1512 to 1771
Data columns (total 2 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   comment_text    1473 non-null   object
 1   conspiratorial  1473 non-null   int64 
dtypes: int64(1), object(1)
memory usage: 34.5+ KB
None
1    740
0    733
Name: conspiratorial, dtype: int64


In [25]:
print(eval_df.info())
print(eval_df.conspiratorial.value_counts())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 369 entries, 363 to 670
Data columns (total 2 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   comment_text    369 non-null    object
 1   conspiratorial  369 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 8.6+ KB
None
1    185
0    184
Name: conspiratorial, dtype: int64


The dataset seems balanced in term of positive and negative samples.

## Model

In [26]:
from simpletransformers.classification import ClassificationModel, ClassificationArgs
import logging

  from .autonotebook import tqdm as notebook_tqdm


In [27]:
logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

In [28]:
train_df.columns = ["text", "labels"]
eval_df.columns = ["text", "labels"]

In [29]:
import torch


cuda_available = torch.cuda.is_available()
print(cuda_available)


True


In [30]:
eval_labels = eval_df.labels.tolist()

## Bert-based models 

In [31]:
def train_validate_bert_clf(model_hgf_name, model_class, cased, eval_labels):
    
    batch_size = 8

    # Num steps in epoch = num training samples / batch size
    steps_per_epoch = int(np.ceil(len(train_df) / float(batch_size)))

    print('Each epoch will have {:,} steps.'.format(steps_per_epoch))


    # Optional model configuration
    model_args = ClassificationArgs(num_train_epochs=20, do_lower_case=cased, evaluate_during_training=True, evaluate_during_training_verbose=True, # Main options
                                    use_multiprocessing=False, use_multiprocessing_for_evaluation=False, overwrite_output_dir=True,  # System configurations
                                    output_dir='out_'+model_hgf_name,
                                    eval_batch_size=batch_size, train_batch_size=batch_size, evaluate_during_training_steps=steps_per_epoch, # Batch sizes and steps
                                    use_early_stopping=True, early_stopping_metric='eval_loss', early_stopping_patience=2, early_stopping_metric_minimize=True, # Early stopping
                                    early_stopping_delta=0.01, early_stopping_consider_epochs=True
                                    )

    # Create a ClassificationModel
    model = ClassificationModel(model_class, model_hgf_name, args=model_args, use_cuda=cuda_available)

    # Train the model
    model.train_model(train_df, eval_df=eval_df)

    # Predict on evaluation
    full_pred = model.predict(eval_df.text.tolist())
    pred = full_pred[0]
    raw_pred = full_pred[1]

    # Make classification report
    clf_report = classification_report(eval_labels, pred, target_names=['non-conspiratorial', 'conspiratorial'], digits=4)

    return model, clf_report

Train and evaluate various BERT based models

Models list

In [32]:
# class, huggingface name, cased

bert_models_list = [
    ("bert", "dbmdz/bert-base-italian-cased", True),
    ("distilbert", "indigo-ai/BERTino", False),
    ("bert", "m-polignano-uniba/bert_uncased_L-12_H-768_A-12_italian_alb3rt0", False),
    ("bert", "dbmdz/bert-base-italian-xxl-cased", True)
    
]

In [33]:
trained_bert_models_list = []
eval_report_list = []

for model_class, model_hgf_name, cased in bert_models_list:

    model, clf_report = train_validate_bert_clf(model_hgf_name, model_class, cased, eval_labels)
    trained_bert_models_list.append(model)
    eval_report_list.append(clf_report)

    # Print model stats
    print('#################################')
    print('----', model_hgf_name, '----')
    print(clf_report)

    
   

Each epoch will have 185 steps.


Some weights of the model checkpoint at dbmdz/bert-base-italian-cased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model c

#################################
---- dbmdz/bert-base-italian-cased ----
                    precision    recall  f1-score   support

non-conspiratorial     0.7760    0.7717    0.7738       184
    conspiratorial     0.7742    0.7784    0.7763       185

          accuracy                         0.7751       369
         macro avg     0.7751    0.7751    0.7751       369
      weighted avg     0.7751    0.7751    0.7751       369

Each epoch will have 185 steps.


Some weights of the model checkpoint at indigo-ai/BERTino were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.bias', 'vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_transform.weight', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at indigo-ai/BERTino and are newly initialized: ['classifier.weight', 'classifier.bias', 'pre_classifier.bias', 'pre

#################################
---- indigo-ai/BERTino ----
                    precision    recall  f1-score   support

non-conspiratorial     0.7753    0.7500    0.7624       184
    conspiratorial     0.7592    0.7838    0.7713       185

          accuracy                         0.7669       369
         macro avg     0.7672    0.7669    0.7669       369
      weighted avg     0.7672    0.7669    0.7669       369

Each epoch will have 185 steps.


Some weights of the model checkpoint at m-polignano-uniba/bert_uncased_L-12_H-768_A-12_italian_alb3rt0 were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were

#################################
---- m-polignano-uniba/bert_uncased_L-12_H-768_A-12_italian_alb3rt0 ----
                    precision    recall  f1-score   support

non-conspiratorial     0.9508    0.3152    0.4735       184
    conspiratorial     0.5909    0.9838    0.7383       185

          accuracy                         0.6504       369
         macro avg     0.7709    0.6495    0.6059       369
      weighted avg     0.7704    0.6504    0.6063       369

Each epoch will have 185 steps.


Some weights of the model checkpoint at dbmdz/bert-base-italian-xxl-cased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

#################################
---- dbmdz/bert-base-italian-xxl-cased ----
                    precision    recall  f1-score   support

non-conspiratorial     0.7907    0.7391    0.7640       184
    conspiratorial     0.7563    0.8054    0.7801       185

          accuracy                         0.7724       369
         macro avg     0.7735    0.7723    0.7721       369
      weighted avg     0.7735    0.7724    0.7721       369






In [22]:
# Print model stats

for i, clf_report in enumerate(eval_report_list):

    model_hgf_name = bert_models_list[i][1]
    print('\n')
    print('----', model_hgf_name, '----')
    print(clf_report)



---- dbmdz/bert-base-italian-cased ----
                    precision    recall  f1-score   support

non-conspiratorial     0.6929    0.9076    0.7859       184
    conspiratorial     0.8672    0.6000    0.7093       185

          accuracy                         0.7534       369
         macro avg     0.7801    0.7538    0.7476       369
      weighted avg     0.7803    0.7534    0.7475       369



---- indigo-ai/BERTino ----
                    precision    recall  f1-score   support

non-conspiratorial     0.7626    0.8207    0.7906       184
    conspiratorial     0.8070    0.7459    0.7753       185

          accuracy                         0.7832       369
         macro avg     0.7848    0.7833    0.7829       369
      weighted avg     0.7849    0.7832    0.7829       369



---- m-polignano-uniba/bert_uncased_L-12_H-768_A-12_italian_alb3rt0 ----
                    precision    recall  f1-score   support

non-conspiratorial     0.7486    0.7446    0.7466       184
    co

## Model configuration

In [23]:
# other models

# model = ClassificationModel("bert", "dbmdz/bert-base-italian-cased", args=model_args, use_cuda=cuda_available)
# model = ClassificationModel("distilbert", "indigo-ai/BERTino", args=model_args, use_cuda=cuda_available) --- 0.81 with 5 epochs
# model = ClassificationModel("bert", "m-polignano-uniba/bert_uncased_L-12_H-768_A-12_italian_alb3rt0", args=model_args, use_cuda=cuda_available)

In [24]:
batch_size = 8

# Num steps in epoch = num training samples / batch size
steps_per_epoch = int(np.ceil(len(train_df) / float(batch_size)))

print('Each epoch will have {:,} steps.'.format(steps_per_epoch))

Each epoch will have 185 steps.


In [25]:
# Optional model configuration
model_args = ClassificationArgs(num_train_epochs=20, do_lower_case=False, evaluate_during_training=True, evaluate_during_training_verbose=True, # Main options
                                use_multiprocessing=False, use_multiprocessing_for_evaluation=False, overwrite_output_dir=True, # System configurations
                                eval_batch_size=batch_size, train_batch_size=batch_size, evaluate_during_training_steps=steps_per_epoch, # Batch sizes and steps
                                use_early_stopping=True, early_stopping_metric='eval_loss', early_stopping_patience=2, early_stopping_metric_minimize=True, # Early stopping
                                early_stopping_delta=0.001, early_stopping_consider_epochs=True
                                   )

# Create a ClassificationModel

model = ClassificationModel("bert", "dbmdz/bert-base-italian-xxl-cased", args=model_args, use_cuda=cuda_available)

Some weights of the model checkpoint at dbmdz/bert-base-italian-xxl-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

## Train

In [26]:
# Train the model
model.train_model(train_df, eval_df=eval_df)

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_train_bert_128_2_2
Epoch 1 of 20:   0%|          | 0/20 [00:00<?, ?it/s]INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_bert_128_2_2
INFO:simpletransformers.classification.classification_model:{'mcc': 0.32644187856724644, 'tp': 51, 'tn': 177, 'fp': 7, 'fn': 134, 'auroc': 0.8220329024676851, 'auprc': 0.813226434485485, 'eval_loss': 0.7783598798386594}
INFO:simpletransformers.classification.classification_model: No improvement in eval_loss
INFO:simpletransformers.classification.classification_model: Current step: 1
INFO:simpletransformers.classification.classification_model: Early stopping pat

(555,
 defaultdict(list,
             {'global_step': [185, 185, 370, 370, 555, 555],
              'train_loss': [0.313232421875,
               0.313232421875,
               0.056640625,
               0.056640625,
               0.00090789794921875,
               0.00090789794921875],
              'mcc': [0.32644187856724644,
               0.32644187856724644,
               0.590999620240559,
               0.590999620240559,
               0.5754223461805095,
               0.5754223461805095],
              'tp': [51, 51, 138, 138, 156, 156],
              'tn': [177, 177, 155, 155, 134, 134],
              'fp': [7, 7, 29, 29, 50, 50],
              'fn': [134, 134, 47, 47, 29, 29],
              'auroc': [0.8220329024676851,
               0.8220329024676851,
               0.8816392479435958,
               0.8816392479435958,
               0.891539365452409,
               0.891539365452409],
              'auprc': [0.813226434485485,
               0.813226434485485,
  

## Evaluate

In [27]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [28]:
# Evaluate the model
result, model_outputs, wrong_predictions = model.eval_model(eval_df)
print(result)
print(wrong_predictions)

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_bert_128_2_2
Running Evaluation: 100%|██████████| 47/47 [00:00<00:00, 78.44it/s]
INFO:simpletransformers.classification.classification_model:{'mcc': 0.5754223461805095, 'tp': 156, 'tn': 134, 'fp': 50, 'fn': 29, 'auroc': 0.891539365452409, 'auprc': 0.891354521245363, 'eval_loss': 0.7757770140120324}


{'mcc': 0.5754223461805095, 'tp': 156, 'tn': 134, 'fp': 50, 'fn': 29, 'auroc': 0.891539365452409, 'auprc': 0.891354521245363, 'eval_loss': 0.7757770140120324}
[{'guid': 6, 'text_a': "Vi invito anche a leggere la storia reale della missione del dell'ammiraglio byrd", 'text_b': None, 'label': 0}, {'guid': 7, 'text_a': "E un vaccino facoltativo solo per viaggiare ci sta, tant'è che per andare in Africa è sempre stato obbligatorio il vaccino contro la febbre gialla", 'text_b': None, 'label': 0}, {'guid': 15, 'text_a': "In breve tempo, per accedere a Internet, sarà richiesto un documento d'identità Digitale.", 'text_b': None, 'label': 1}, {'guid': 18, 'text_a': 'Bisogna proprio dirlo: questi NO-VAX seminano odio ovunque e sono dei teppisti.\n[🇮🇹', 'text_b': None, 'label': 0}, {'guid': 31, 'text_a': "Israele sembra essere entrata in una crisi politica permanente. Il governo Bennett dopo appena un anno di vita è già caduto, ed è caduto su una delle questioni vitali della politica del Paese, q

In [29]:
full_pred = model.predict(eval_df.text.tolist())
pred = full_pred[0]
raw_pred = full_pred[1]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
100%|██████████| 47/47 [00:00<00:00, 76.68it/s]


In [30]:
from sklearn.metrics import classification_report, accuracy_score, f1_score

In [31]:
# Functions that prints the classification report
def report_scores(test_label, test_pred):
    print(classification_report(test_label, 
                            test_pred, 
                            target_names=['non-conspiratorial', 'conspiratorial']))

In [32]:
report_scores(eval_df.labels.tolist(), pred)

                    precision    recall  f1-score   support

non-conspiratorial       0.82      0.73      0.77       184
    conspiratorial       0.76      0.84      0.80       185

          accuracy                           0.79       369
         macro avg       0.79      0.79      0.79       369
      weighted avg       0.79      0.79      0.79       369

