## 1. Construct datasets in cycle (+for ablation study)

In [1]:
from glue_deberta.dataset_construction import construct_semeval_dataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
disco_depth = 2
options = [
    [True, True, True],
    [True, True, False],
    [True, False, False],
    [False, False, False]
]

for part in ['train', 'dev', 'test']: 
    for (use_nucsat, use_rels, use_paths) in options:
        construct_semeval_dataset(
            part = part,
            disco_depth=disco_depth,
            use_rels=use_rels,
            use_nucsat=use_nucsat,
            use_paths=use_paths)

100%|███████████████████████████████████████| 446/446 [00:00<00:00, 1679.68it/s]
100%|█████████████████████████████████████████| 446/446 [00:48<00:00,  9.24it/s]
100%|█████████████████████████████████████| 9498/9498 [00:07<00:00, 1298.76it/s]
100%|█████████████████████████████████████████| 90/90 [00:00<00:00, 1183.51it/s]
100%|███████████████████████████████████████████| 90/90 [00:04<00:00, 18.99it/s]
100%|█████████████████████████████████████| 3127/3127 [00:00<00:00, 4245.14it/s]
100%|█████████████████████████████████████████| 54/54 [00:00<00:00, 1744.62it/s]
100%|███████████████████████████████████████████| 54/54 [00:02<00:00, 21.48it/s]
100%|███████████████████████████████████████| 910/910 [00:00<00:00, 5376.18it/s]


In [None]:
#add no-feats version to train the base model
import json

for part in ['train', 'dev', 'test']:
    fn = f'datasets/deberta_propaganda_classif/{part}_custom_feats=2_lvl2--use_rels=False--use_nucsat=True--use_paths=False_multi.json'

    with open(fn, 'r') as outfile:
        data = json.load(outfile)

    for el in data:
        del el['feature']

    with open(fn.replace('.json', '_nofeat.json'), 'w') as outfile:
        json.dump(data, outfile)

## 2. Save weights for loss

In [3]:
import json
import pickle

In [16]:
with open(f'datasets/deberta_propaganda_classif/train_custom_feats=40_lvl2--use_rels=True--use_nucsat=True--use_paths=True_multi.json', 'r') as outfile:
    data_train = json.load(outfile)

In [17]:
data_train[6]

{'text': 'article111111111.txt',
 'sentence1': 'But Tedros voiced alarm that "plague in Madagascar behaved in a very, very different way this year."',
 'label': [0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  1.0,
  0.0,
  0.0,
  0.0],
 'i': 13,
 'span': [1006, 1107],
 'feature': [0.5,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.5,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.5,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  1.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0]}

In [4]:
weights =  []
for i in range(len(data_train[0]['label'])):
    label_vals = [] 
    for item in data_train:
        label_vals.append(item['label'][i])
    num_pos = max(sum(label_vals), 1)
    num_neg = len(label_vals) - num_pos
    weights.append(min(70, num_neg / num_pos))

In [5]:
weights

[60.675324675324674,
 29.638709677419357,
 70,
 70,
 43.59154929577465,
 70,
 17.335907335907336,
 19.38197424892704,
 70,
 32.09407665505226,
 70,
 4.250414593698176,
 8.70173646578141,
 70,
 70,
 16.459558823529413,
 61.07843137254902,
 70,
 70]

In [6]:
with open('glue_deberta/pos_weights.pkl', 'wb') as f:
    pickle.dump(weights, f)

In [18]:
with open('glue_deberta/pos_weights.pkl', 'rb') as f:
    weights = pickle.load(f)

In [19]:
labels_list = ['Appeal_to_Authority',
 'Appeal_to_Fear-Prejudice',
 'Appeal_to_Hypocrisy',
 'Appeal_to_Popularity',
 'Causal_Oversimplification',
 'Conversation_Killer',
 'Doubt',
 'Exaggeration-Minimisation',
 'False_Dilemma-No_Choice',
 'Flag_Waving',
 'Guilt_by_Association',
 'Loaded_Language',
 'Name_Calling-Labeling',
 'Obfuscation-Vagueness-Confusion',
 'Red_Herring',
 'Repetition',
 'Slogans',
 'Straw_Man',
 'Whataboutism']

In [20]:
list(zip(labels_list, weights))

[('Appeal_to_Authority', 60.675324675324674),
 ('Appeal_to_Fear-Prejudice', 29.638709677419357),
 ('Appeal_to_Hypocrisy', 70),
 ('Appeal_to_Popularity', 70),
 ('Causal_Oversimplification', 43.59154929577465),
 ('Conversation_Killer', 70),
 ('Doubt', 17.335907335907336),
 ('Exaggeration-Minimisation', 19.38197424892704),
 ('False_Dilemma-No_Choice', 70),
 ('Flag_Waving', 32.09407665505226),
 ('Guilt_by_Association', 70),
 ('Loaded_Language', 4.250414593698176),
 ('Name_Calling-Labeling', 8.70173646578141),
 ('Obfuscation-Vagueness-Confusion', 70),
 ('Red_Herring', 70),
 ('Repetition', 16.459558823529413),
 ('Slogans', 61.07843137254902),
 ('Straw_Man', 70),
 ('Whataboutism', 70)]

## 3. Train model

In [None]:
#RUN BASE
#--label_all_tokens    checkpointing_steps 'epoch' 10000; 2 -- extra_feature_size=42 2e-5

!CUDA_VISIBLE_DEVICES=0 python glue_deberta/run_glue_no_trainer.py \
  --model_name_or_path "microsoft/deberta-v3-base" \
  --train_file 'datasets/deberta_propaganda_classif/train_custom_feats=2_lvl2--use_rels=False--use_nucsat=True--use_paths=False_multi_nofeat.json' \
  --validation_file 'datasets/deberta_propaganda_classif/dev_custom_feats=2_lvl2--use_rels=False--use_nucsat=True--use_paths=False_multi_nofeat.json' \
  --max_length 256 \
  --pad_to_max_length \
  --per_device_train_batch_size 2 \
  --per_device_eval_batch_size 2 \
  --gradient_accumulation_steps 8 \
  --learning_rate 3e-5 \
  --num_train_epochs 45 \
  --checkpointing_steps 100000 \
  --output_dir "checkpoint_cls/deberta_glue_binary_noo_lr3e-5-16-45ep_w70_2lin__base" \
  --with_tracking

### Run custom model training in loop

In [1]:
from glue_deberta.train_loop import run_train

In [2]:
options = [
     [True, True, True],
     [True, True, False],
    [True, False, False],
   [False, False, False]
]

for (use_nucsat, use_rels, use_paths) in options:
    run_train(level=2,
              lr=2e-5,
              bs=8,
              gac=2,
              n_epochs=45,
              use_rels=use_rels,
              use_nucsat=use_nucsat,
              use_paths=use_paths,
              save_eval_metric='micro_f1',
              device='cuda:0')

## 4. Run inference

In [1]:
import json
import os
import pandas as pd
import pickle
import sys
sys.path.insert(0, 'glue_deberta/')

In [2]:
from inference import run_inference

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
model_name_or_path = 'checkpoint_cls/deberta_glue_binary_noo_lr2e-05-8-45ep_w70_2lin__lvl2--use_rels=True--use_nucsat=True--use_paths=True_multi'

for fn in os.listdir('datasets/deberta_propaganda_classif/'):
    if fn.endswith(model_name_or_path.split('--', 1)[1] + '.json') and fn.startswith('test'):
        dev_json = 'datasets/deberta_propaganda_classif/' + fn

In [4]:
preds = run_inference(model_name_or_path, dev_json)

Found cached dataset json (/home/alchernyavskiy/.cache/huggingface/datasets/json/default-c355148cc7e87df9/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96)
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 316.86it/s]
Loading cached processed dataset at /home/alchernyavskiy/.cache/huggingface/datasets/json/default-c355148cc7e87df9/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-e1bce466a3bd3eff.arrow
114it [00:11, 10.13it/s]


In [5]:
with open("predictions/test_predictions_lvl2_disco.pkl", "wb") as f:
    pickle.dump(preds, f)

In [18]:
model_name_or_path =  "checkpoint_cls/deberta_glue_binary_noo_lr3e-5-16-45ep_w70_2lin__base/"
dev_json = 'datasets/deberta_propaganda_classif/test_custom_feats=2_lvl2--use_rels=False--use_nucsat=True--use_paths=False_multi_nofeat.json'

In [19]:
preds = run_inference(model_name_or_path, dev_json)

Downloading and preparing dataset json/default to /home/alchernyavskiy/.cache/huggingface/datasets/json/default-589407286ff21d76/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96...


Downloading data files: 100%|████████████████████| 1/1 [00:00<00:00, 925.89it/s]
Extracting data files: 100%|██████████████████████| 1/1 [00:00<00:00, 87.67it/s]
                                                      

Dataset json downloaded and prepared to /home/alchernyavskiy/.cache/huggingface/datasets/json/default-589407286ff21d76/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96. Subsequent calls will reuse this data.


100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 406.39it/s]
114it [00:11, 10.16it/s]                                                        


In [20]:
with open("predictions/test_predictions_lvl2_base.pkl", "wb") as f:
    pickle.dump(preds, f)

### Error analysis

In [8]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

In [9]:
labels_list = ['Loaded_Language',
 'Name_Calling-Labeling',
 'Repetition',
 'Exaggeration-Minimisation',
 'Doubt',
 'Appeal_to_Fear-Prejudice',
 'Flag_Waving',
 'Causal_Oversimplification',
 'Slogans',
 'Appeal_to_Authority',
 'False_Dilemma-No_Choice',
 'Conversation_Killer',
 'Guilt_by_Association',
 'Red_Herring',
 'Appeal_to_Hypocrisy',
 'Whataboutism',
 'Obfuscation-Vagueness-Confusion',
 'Appeal_to_Popularity',
 'Straw_Man']

In [10]:
label_list = sorted(labels_list)

In [11]:
with open('datasets/deberta_propaganda_classif/test_custom_feats=40_lvl2--use_rels=True--use_nucsat=True--use_paths=True_multi.json', 'r') as f:
    labels = []
    labels_data = json.load(f)
    for lab_list in labels_data:
        lab_list = lab_list['label']
        lab_names = [label_list[i] for i in range(len(lab_list)) if lab_list[i] == 1.]
        labels.append(lab_names)

In [12]:
with open("predictions/test_predictions_lvl2_base.pkl", "rb") as f:
    preds_base = pickle.load(f)
    
with open("predictions/test_predictions_lvl2_disco.pkl", "rb") as f:
    preds = pickle.load(f)

In [13]:
cnts = [714, 535, 350, 329, 266, 211, 144, 118, 115, 94, 66, 43, 37, 34, 16, 15, 13, 13]

In [None]:
for lab, cnt in zip(labels_list, cnts):
    lab_true = [1 if lab in elem else 0 for elem in labels]
    lab_pred = [1 if lab in elem else 0 for elem in preds]
    lab_pred_base = [1 if lab in elem else 0 for elem in preds_base]
    if sum(lab_true) > 0:
        prec, rec, f1 = precision_score(lab_true, lab_pred), recall_score(lab_true, lab_pred), f1_score(lab_true, lab_pred)
        prec_b, rec_b, f1_b = precision_score(lab_true, lab_pred_base), recall_score(lab_true, lab_pred_base),\
                f1_score(lab_true, lab_pred_base)
        print(lab, round(100 * cnt / sum(cnts), 2))
        print(f'DISCO. Precision: {round(prec, 3)}; Recall {round(rec, 3)}; F1: {round(f1, 3)}')
        print(f'BASE. Precision: {round(prec_b, 3)}; Recall {round(rec_b, 3)}; F1: {round(f1_b, 3)}')
        print('\n')

In [15]:
from sklearn.preprocessing import MultiLabelBinarizer

In [16]:
def compute_metrics(predicted_tags, labels):
    mlb = MultiLabelBinarizer()
    mlb.fit([labels_list])

    predicted_tags_noo, labels_noo = [], []
    for lab, pr in zip(predicted_tags, labels):
        predicted_tags_noo.append([p for p in pr if p != 'O'])
        labels_noo.append([p for p in lab if p != 'O'])

    gold_values = mlb.transform(labels_noo)
    pred_values = mlb.transform(predicted_tags_noo)

    return {
        "macro_f1": f1_score(gold_values, pred_values, average="macro", zero_division=1),
        "micro_f1": f1_score(gold_values, pred_values, average="micro", zero_division=1),
        "accuracy": np.mean(np.all(gold_values == pred_values, axis=1))
    }

In [17]:
compute_metrics(preds, labels)

{'macro_f1': 0.1706033347274226,
 'micro_f1': 0.3884758364312268,
 'accuracy': 0.29010989010989013}

In [18]:
compute_metrics(preds_base, labels)

{'macro_f1': 0.1593540498453423,
 'micro_f1': 0.3169726489321843,
 'accuracy': 0.22747252747252747}