# Persuasion Techniques in Text of Memes - Inference with multi-label models



## Enironment Setup

##### Disk Setup

In [1]:
#from google.colab import drive
#drive.mount('/content/drive')

In [2]:
#folder_name = "/content/drive/MyDrive/persuasion_technique_detection/"
folder_name = "/home/nlp-lab-ws23/nlp_praktikum/persuasion_technique_detection/"

##### Imports

In [3]:
#!pip install transformers datasets wandb evaluate accelerate -qU sklearn_hierarchical_classification sentencepiece

In [4]:
import gc
import re
import json
import numpy as np
import pandas as pd
import random
import torch
import subprocess
import json
import warnings
import shutil
import os

In [5]:
from sklearn import preprocessing
from sklearn.preprocessing import MultiLabelBinarizer,LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
from transformers import EvalPrediction
from datasets import Dataset,load_dataset,DatasetDict,concatenate_datasets
import datasets
from transformers import TrainingArguments
from transformers import AutoModelForSequenceClassification
from datasets import concatenate_datasets
from transformers import Trainer
from transformers import AutoTokenizer, DataCollatorWithPadding

In [6]:
AVAIL_GPUS = 0
if torch.cuda.is_available():
    device = torch.device("cuda")
    AVAIL_GPUS = torch.cuda.device_count()
    print(f'There are {AVAIL_GPUS} GPU(s) available.')
    print('Device name:', torch.cuda.get_device_name(0))

else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
Device name: NVIDIA GeForce RTX 3080 Ti


In [7]:
model_name="microsoft/deberta-v3-base"
summary_dir_path = folder_name + "subtask1/transformer/summaries/inference/summary_inverse_hierarchy_2" + model_name.replace("/","_") + "/"

## Data

In [8]:
# Initialize a counter for empty label samples
empty_label_counter = [0]

def filter_every_second_empty_label(sample):
    if len(sample['labels']) == 0:
        empty_label_counter[0] += 1

        return empty_label_counter[0] % 2 != 0
    return True

In [9]:
val_path=folder_name+"data/subtask1/validation.json"
train_path = folder_name+"data/subtask1/train.json"
test_path=folder_name+"data/subtask1/dev_subtask1_en.json"
train_added_ptc = folder_name+"data/ptc/ptc_added_train.json"

#dataset_memes_2024_files = {"train": train_added_ptc,"validation": val_path}
dataset_memes_2024_files = {"train": train_path,"validation": val_path}
val_files={"val":val_path}
test_files={"test":test_path}

dataset_memes_2024 = load_dataset("json",data_files=dataset_memes_2024_files)

empty_labels = 1
if empty_labels == 0:
  dataset_memes_2024 = dataset_memes_2024.filter(lambda x : len(x['labels']) != 0)
if empty_labels == 0.5:
  dataset_memes_2024 = dataset_memes_2024.filter(filter_every_second_empty_label)

dataset_test=load_dataset("json",data_files=test_files)
dataset_val=load_dataset("json",data_files=val_files)

#### Login in to WandB

In [10]:
import wandb
import os

#wandb.login(relogin=True)
wandb.login()

# setup wandb environment variables
os.environ['WANDB_PROJECT'] = "subtask1_multilabel_transformer_encoder_classification"
os.environ['WANDB_ENTITY'] = "tumnlp"
os.environ["WANDB_LOG_MODEL"]= "end"

[34m[1mwandb[0m: Currently logged in as: [33mmahmudfami[0m ([33mtumnlp[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [11]:
#checkpoint_multilabel = "vinai/bertweet-base"
checkpoint_multilabel = "microsoft/deberta-v3-base"

In [12]:
techniques = [['Black-and-white Fallacy/Dictatorship', 'Loaded Language',
       'Glittering generalities (Virtue)', 'Thought-terminating cliché',
       'Whataboutism', 'Slogans', 'Causal Oversimplification', 'Smears',
       'Name calling/Labeling', 'Appeal to authority',
       'Exaggeration/Minimisation', 'Repetition', 'Flag-waving',
       'Appeal to fear/prejudice', 'Reductio ad hitlerum', 'Doubt',
       "Misrepresentation of Someone's Position (Straw Man)",
       'Obfuscation, Intentional vagueness, Confusion', 'Bandwagon',
       'Presenting Irrelevant Data (Red Herring)']]
num_labels = len(techniques[0])
num_labels

20

### Preprocess Multi-Labels

In [13]:
mlb = MultiLabelBinarizer()
mlb.fit(techniques)

## Inference

In [14]:
tokenizer_multilabel = AutoTokenizer.from_pretrained(checkpoint_multilabel)
data_collator_multilabel = DataCollatorWithPadding(tokenizer=tokenizer_multilabel)
def tokenize_multilabel(examples):
    encoding = tokenizer_multilabel(examples["text"], truncation=True,padding=True)#,max_length=512)
    return encoding



In [15]:
def get_preds_multilabel(binarizer,predicted_logits,threshold=0.5):

  predicted_logits_tensor = torch.from_numpy(predicted_logits)
  sigmoid = torch.nn.Sigmoid()
  probs = sigmoid(predicted_logits_tensor)
  preds=(probs>threshold).int()

  mask = (preds == 1)

  # Use the mask to select the corresponding probabilities
  selected_probs_list = []

  # Iterate over rows and select probabilities using the mask
  for i in range(preds.size(0)):
      selected_probs_row = probs[i][mask[i]].tolist()
      selected_probs_list.append(selected_probs_row)

  names=binarizer.inverse_transform(preds)
  res=[(list(x), y) for x, y in zip(names, selected_probs_list)]
  return res

In [16]:
project_name_multilabel="subtask1_multilabel_transformer_encoder_classification"

def return_trainer_multilabel(model_name,unique_labels):
  api = wandb.Api()
  artifact=api.artifact(model_name)
  model_dir=artifact.download()
  model = AutoModelForSequenceClassification.from_pretrained(model_dir, num_labels=len(unique_labels[0]), problem_type="multi_label_classification", ignore_mismatched_sizes=True)

  trainer = Trainer(model=model)
  if torch.cuda.is_available():
    trainer.model = model.cuda()
  return trainer

In [17]:
prediction_set=dataset_val['val']
prediction_set=prediction_set.remove_columns(["labels"])

In [18]:
best_model_multilabel = "model-microsoft_deberta-v3-base-multilabel-memes_0.4threshold_5e-05learningRate:v5"
best_threshold_multilabel = 0.4

In [19]:
trainer=return_trainer_multilabel(best_model_multilabel, techniques)
threshold=best_threshold_multilabel
prediction_set_tokenized=prediction_set.map(tokenize_multilabel, batched=True)

preds=get_preds_multilabel(mlb,trainer.predict(prediction_set_tokenized).predictions,threshold)
final_ds=dict(zip(prediction_set_tokenized["id"],preds))

[34m[1mwandb[0m: Downloading large artifact model-microsoft_deberta-v3-base-multilabel-memes_0.4threshold_5e-05learningRate:v5, 714.21MB. 8 files... 
[34m[1mwandb[0m:   8 of 8 files downloaded.  
Done. 0:0:1.0


In [20]:
def write_json(path,data,test=False):
  if not isinstance(data, dict):
    data = data.to_dict("records")

  with open(path, "w") as output_file:
      json.dump(data, output_file, indent=2,ensure_ascii=False)

In [21]:
def hierarchical_scores(gold_label_path, pred_label_path):
    scorer = folder_name + "subtask1/subtask_1_2a.py"
    command = f'python3 {scorer} --gold_file_path {gold_label_path} --pred_file_path {pred_label_path}'

    result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, text=True)
    output = result.stdout.strip()

    print(output)
    parts = output.split('\t')
    f1_h = parts[0].split('=')[1]
    prec_h = parts[1].split('=')[1]
    rec_h = parts[2].split('=')[1]

    f1_h = float(f1_h)
    prec_h = float(prec_h)
    rec_h = float(rec_h)

    hierarchical_metrics = {"f1_hierarchical": f1_h, "precision_hierarchical": prec_h, "recall_hierarchical": rec_h}
    return hierarchical_metrics

Only keep highest k predictions

In [22]:
top_k=0
hierarchical_metrics = {"f1_hierarchical": 0, "precision_hierarchical": 0, "recall_hierarchical": 0}
final_df = dict()
for curr_top_k in range(11):
    curr_final_df=dict()
    for k,v in final_ds.items():
      if len(v)>0:
        pred_prob=list(zip(v[0],v[1]))
        pred_prob.sort(key=lambda x: -x[1])

        tmp=[]
        i=0
        for el in pred_prob:
          if i==curr_top_k:
            break
          if el[0] not in tmp:
            tmp.append(el[0])
            i+=1
        curr_final_df[str(k)]=tmp
      else:
        curr_final_df[str(k)]=v

    curr_pred_df=pd.DataFrame.from_dict({"id":curr_final_df.keys(),"labels":curr_final_df.values()})
    curr_val_pred_file="tmp/" + "val_pred.json"
    if not os.path.exists("tmp/"):
        os.makedirs("tmp/")
    write_json(curr_val_pred_file,curr_pred_df)

    curr_hierarchical_metrics = hierarchical_scores(val_path,curr_val_pred_file)

    shutil.rmtree("tmp/")

    print(f"{curr_top_k}:")
    print(curr_hierarchical_metrics)
    if curr_hierarchical_metrics['f1_hierarchical'] >= hierarchical_metrics['f1_hierarchical']:
        hierarchical_metrics = curr_hierarchical_metrics
        top_k = curr_top_k
        final_df = curr_final_df

print(f"\nBest result with top_k = {top_k}")

f1_h=0.00000	prec_h=1.00000	rec_h=0.00000
0:
{'f1_hierarchical': 0.0, 'precision_hierarchical': 1.0, 'recall_hierarchical': 0.0}
f1_h=0.56085	prec_h=0.73366	rec_h=0.45393
1:
{'f1_hierarchical': 0.56085, 'precision_hierarchical': 0.73366, 'recall_hierarchical': 0.45393}
f1_h=0.63454	prec_h=0.70127	rec_h=0.57940
2:
{'f1_hierarchical': 0.63454, 'precision_hierarchical': 0.70127, 'recall_hierarchical': 0.5794}
f1_h=0.64650	prec_h=0.67509	rec_h=0.62022
3:
{'f1_hierarchical': 0.6465, 'precision_hierarchical': 0.67509, 'recall_hierarchical': 0.62022}
f1_h=0.65222	prec_h=0.66850	rec_h=0.63670
4:
{'f1_hierarchical': 0.65222, 'precision_hierarchical': 0.6685, 'recall_hierarchical': 0.6367}
f1_h=0.65252	prec_h=0.66628	rec_h=0.63933
5:
{'f1_hierarchical': 0.65252, 'precision_hierarchical': 0.66628, 'recall_hierarchical': 0.63933}
f1_h=0.65291	prec_h=0.66628	rec_h=0.64007
6:
{'f1_hierarchical': 0.65291, 'precision_hierarchical': 0.66628, 'recall_hierarchical': 0.64007}
f1_h=0.65291	prec_h=0.66628	r

In [23]:
pred_df=pd.DataFrame.from_dict({"id":final_df.keys(),"labels":final_df.values()})

val_pred_file=summary_dir_path + "val_pred.json"
if not os.path.exists(summary_dir_path):
  os.makedirs(summary_dir_path)

write_json(val_pred_file,pred_df)

hierarchical_scores(val_path,val_pred_file)

f1_h=0.65291	prec_h=0.66628	rec_h=0.64007


{'f1_hierarchical': 0.65291,
 'precision_hierarchical': 0.66628,
 'recall_hierarchical': 0.64007}

Inverse Hierarchy

In [24]:
lvl1_parents={"Appeal to authority":["Ethos","Logos"],
              "Appeal to fear/prejudice":["Pathos","Logos"],
              "Bandwagon":["Ethos","Logos"],
              "Black-and-white Fallacy/Dictatorship":["Logos"],
              "Causal Oversimplification":["Logos"],
              "Doubt":["Ethos"],
              "Exaggeration/Minimisation":["Pathos"],
              "Flag-waving":["Pathos","Logos"],
              "Glittering generalities (Virtue)":["Ethos"],
              "Loaded Language":["Pathos"],
              "Misrepresentation of Someone's Position (Straw Man)":["Logos"],
              "Name calling/Labeling":["Ethos"],
              "Obfuscation, Intentional vagueness, Confusion":["Logos"],
              "Presenting Irrelevant Data (Red Herring)":["Logos"],
              "Reductio ad hitlerum":["Ethos"],
              "Repetition":["Logos"],
              "Slogans":["Logos"],
              "Smears":["Ethos"],
              "Thought-terminating cliché":["Logos"],
              "Whataboutism":["Ethos","Logos"]}

lvl2_parents={"Appeal to authority":["Justification"],
              "Appeal to fear/prejudice":["Justification"],
              "Bandwagon":["Justification"],
              "Black-and-white Fallacy/Dictatorship":["Reasoning"],
              "Causal Oversimplification":["Reasoning"],
              "Doubt":["Ad Hominem"],
              "Exaggeration/Minimisation":[],
              "Flag-waving":["Justification"],
              "Glittering generalities (Virtue)":[],
              "Loaded Language":[],
              "Misrepresentation of Someone's Position (Straw Man)":["Reasoning"],
              "Name calling/Labeling":["Ad Hominem"],
              "Obfuscation, Intentional vagueness, Confusion":[],
              "Presenting Irrelevant Data (Red Herring)":["Reasoning"],
              "Reductio ad hitlerum":["Ad Hominem"],
              "Repetition":[],
              "Slogans":["Justification"],
              "Smears":["Ad Hominem"],
              "Thought-terminating cliché":["Reasoning"],
              "Whataboutism":["Ad Hominem","Reasoning"]}

lvl3_parents={"Appeal to authority":[],
              "Appeal to fear/prejudice":[],
              "Bandwagon":[],
              "Black-and-white Fallacy/Dictatorship":["Simplification"],
              "Causal Oversimplification":["Simplification"],
              "Doubt":[],
              "Exaggeration/Minimisation":[],
              "Flag-waving":["Justification"],
              "Glittering generalities (Virtue)":[],
              "Loaded Language":[],
              "Misrepresentation of Someone's Position (Straw Man)":["Distraction"],
              "Name calling/Labeling":[],
              "Obfuscation, Intentional vagueness, Confusion":[],
              "Presenting Irrelevant Data (Red Herring)":["Distraction"],
              "Reductio ad hitlerum":[],
              "Repetition":[],
              "Slogans":[],
              "Smears":[],
              "Thought-terminating cliché":["Simplification"],
              "Whataboutism":["Distraction"]}

In [25]:
preds = get_preds_multilabel(mlb,trainer.predict(prediction_set_tokenized).predictions,0.0)
final_ds=dict(zip(prediction_set_tokenized["id"],preds))
final_ds

{'63135': (['Appeal to authority',
   'Appeal to fear/prejudice',
   'Bandwagon',
   'Black-and-white Fallacy/Dictatorship',
   'Causal Oversimplification',
   'Doubt',
   'Exaggeration/Minimisation',
   'Flag-waving',
   'Glittering generalities (Virtue)',
   'Loaded Language',
   "Misrepresentation of Someone's Position (Straw Man)",
   'Name calling/Labeling',
   'Obfuscation, Intentional vagueness, Confusion',
   'Presenting Irrelevant Data (Red Herring)',
   'Reductio ad hitlerum',
   'Repetition',
   'Slogans',
   'Smears',
   'Thought-terminating cliché',
   'Whataboutism'],
  [0.000617805402725935,
   0.7372586131095886,
   1.1457597111075302e-06,
   0.00044590156176127493,
   0.003280543489381671,
   0.10997763276100159,
   0.00030680798226967454,
   2.4068249331321567e-05,
   1.1550584531505592e-05,
   0.0004459360206965357,
   0.0022957264445722103,
   7.491339601983782e-07,
   0.014898914843797684,
   0.005204477813094854,
   0.029740775004029274,
   0.0011845716508105397,


In [28]:
def grid_search_thresholds(final_ds, lvl1_parents, val_path, summary_dir_path):
    best_score = -1
    best_leaf_threshold = None
    best_parent_threshold = None

    for leaf_threshold in np.arange(0.1, 0.5, 0.05):  
        for parent_threshold in np.arange(0.1, 0.5, 0.05): 
            curr_final_df = {}
            for k, v in final_ds.items():
                if len(v) > 0:
                    pred_prob = list(zip(v[0], v[1]))
                    pred_prob.sort(key=lambda x: -x[1])
                    tmp = []
                    for el in pred_prob:
                        if el[1] > leaf_threshold and el[0] not in tmp:
                            tmp.append(el[0])
                        elif el[1] > parent_threshold and el[0] not in tmp:
                            tmp.append(lvl1_parents[el[0]][0])
                    curr_final_df[str(k)] = tmp
                else:
                    curr_final_df[str(k)] = v

            reversed_pred_df = pd.DataFrame.from_dict({"id": curr_final_df.keys(), "labels": curr_final_df.values()})
            val_pred_file = summary_dir_path + "val_pred_tmp.json"
            if not os.path.exists(summary_dir_path):
                os.makedirs(summary_dir_path)
            write_json(val_pred_file, reversed_pred_df)

            score = hierarchical_scores(val_path, val_pred_file)['f1_hierarchical']
            if score > best_score:
                best_score = score
                best_leaf_threshold = leaf_threshold
                best_parent_threshold = parent_threshold

    return best_leaf_threshold, best_parent_threshold, best_score

best_leaf_threshold, best_parent_threshold, best_score = grid_search_thresholds(final_ds, lvl1_parents, val_path, summary_dir_path)
print(f"Best leaf threshold: {best_leaf_threshold}, Best parent threshold: {best_parent_threshold}, Best F1 Score: {best_score}")

f1_h=0.64837	prec_h=0.60134	rec_h=0.70337
f1_h=0.64837	prec_h=0.60134	rec_h=0.70337
f1_h=0.64837	prec_h=0.60134	rec_h=0.70337
f1_h=0.64837	prec_h=0.60134	rec_h=0.70337
f1_h=0.64837	prec_h=0.60134	rec_h=0.70337
f1_h=0.64837	prec_h=0.60134	rec_h=0.70337
f1_h=0.64837	prec_h=0.60134	rec_h=0.70337
f1_h=0.64837	prec_h=0.60134	rec_h=0.70337
f1_h=0.65515	prec_h=0.61892	rec_h=0.69588
f1_h=0.65533	prec_h=0.62193	rec_h=0.69251
f1_h=0.65533	prec_h=0.62193	rec_h=0.69251
f1_h=0.65533	prec_h=0.62193	rec_h=0.69251
f1_h=0.65533	prec_h=0.62193	rec_h=0.69251
f1_h=0.65533	prec_h=0.62193	rec_h=0.69251
f1_h=0.65533	prec_h=0.62193	rec_h=0.69251
f1_h=0.65533	prec_h=0.62193	rec_h=0.69251
f1_h=0.65624	prec_h=0.63105	rec_h=0.68352
f1_h=0.65643	prec_h=0.63430	rec_h=0.68015
f1_h=0.65494	prec_h=0.63480	rec_h=0.67640
f1_h=0.65494	prec_h=0.63480	rec_h=0.67640
f1_h=0.65494	prec_h=0.63480	rec_h=0.67640
f1_h=0.65494	prec_h=0.63480	rec_h=0.67640
f1_h=0.65494	prec_h=0.63480	rec_h=0.67640
f1_h=0.65494	prec_h=0.63480	rec_h=

In [27]:
for k,v in final_ds.items():
  if len(v)>0:
    pred_prob=list(zip(v[0],v[1]))
    pred_prob.sort(key=lambda x: -x[1])
    tmp=[]
    for el in pred_prob:
      if el[1] > best_leaf_threshold and el[0] not in tmp:
        tmp.append(el[0])
      elif el[1] > best_parent_threshold and el[0] not in tmp:
        tmp.append(lvl1_parents[el[0]][0])
    curr_final_df[str(k)]=tmp
  else:
    curr_final_df[str(k)]=v

reversed_pred_df=pd.DataFrame.from_dict({"id":curr_final_df.keys(),"labels":curr_final_df.values()})

val_pred_file=summary_dir_path + "val_pred.json"
if not os.path.exists(summary_dir_path):
  os.makedirs(summary_dir_path)

write_json(val_pred_file,reversed_pred_df)

hierarchical_scores(val_path,val_pred_file)

f1_h=0.66133	prec_h=0.67287	rec_h=0.65019


{'f1_hierarchical': 0.66133,
 'precision_hierarchical': 0.67287,
 'recall_hierarchical': 0.65019}