<a href="https://colab.research.google.com/github/hhhuang/NetKu/blob/main/stance_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import csv
from collections import Counter

fin = open("/content/drive/MyDrive/Colab Notebooks/stance-detection/StanceDataset/train.csv")
reader = csv.DictReader(fin)
labels = Counter()
targets = Counter()
for line in reader:
    labels[line['Stance']] += 1
    targets[line['Target']] += 1
print(labels)
print(targets)

Counter({'AGAINST': 1395, 'NONE': 766, 'FAVOR': 753})
Counter({'Hillary Clinton': 689, 'Feminist Movement': 664, 'Legalization of Abortion': 653, 'Atheism': 513, 'Climate Change is a Real Concern': 395})


In [1]:
!pip install transformers
!pip install datasets
!pip install torch
!pip install tqdm
!pip install evaluate
!pip install pandas

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.25.1-py3-none-any.whl (5.8 MB)
[K     |████████████████████████████████| 5.8 MB 4.7 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 93.5 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 83.1 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.11.1 tokenizers-0.13.2 transformers-4.25.1
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting datasets
  Downloading datasets-2.7.1-py3-none-any.whl (451 kB)
[K     |████████████████████████████████| 451 kB 4.9 MB/s

In [2]:
import csv

from collections import defaultdict, Counter

import transformers
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModel, AutoConfig
from transformers import TrainingArguments, Trainer
from transformers import get_scheduler

import torch
from torch.utils.data import DataLoader
from torch.optim import AdamW
from torch import nn

from datasets import Dataset
import evaluate
import pandas as pd
import numpy as np

In [3]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

class_weights = torch.tensor([2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.5, 0.5]).to(device)
main_class_weights = torch.tensor([2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0]).to(device)
adv_class_weights = torch.tensor([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0]).to(device)

loss_fct = torch.nn.BCEWithLogitsLoss(weight=class_weights)
main_loss_fct = torch.nn.BCEWithLogitsLoss(weight=main_class_weights)
adv_loss_fct = torch.nn.BCEWithLogitsLoss(weight=adv_class_weights)

class MultilabelTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), 
                        labels.float().view(-1, self.model.config.num_labels))
        return (loss, outputs) if return_outputs else loss

class MultilabelTrainer2(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits
        main_logits, main_labels = [], []
        adv_logits, adv_labels = [], []
        for logit, label in zip(logits, labels):
            if label[0] == 0.5:
                adv_logits.append(logit)
                adv_labels.append(label)
            elif label[-1] == 0.5:
                main_logits.append(logit)
                main_labels.append(label)
            else:
                raise("invalid value: ", label)
        loss = torch.tensor(0.0).to(torch.device("cuda"))
        if main_labels:
            main_logits = torch.cat(main_logits).to(torch.device("cuda")) 
            main_labels = torch.cat(main_labels).to(torch.device("cuda")) 
            loss += main_loss_fct(main_logits.view(-1, self.model.config.num_labels), 
                                  main_labels.float().view(-1, self.model.config.num_labels)) * 0.7
        if adv_labels:
            adv_logits = torch.cat(adv_logits).to(torch.device("cuda")) 
            adv_labels = torch.cat(adv_labels).to(torch.device("cuda")) 
            loss += adv_loss_fct(adv_logits.view(-1, self.model.config.num_labels), 
                                 adv_labels.float().view(-1, self.model.config.num_labels)) * 0.3             
        return (loss, outputs) if return_outputs else loss

In [4]:
from transformers import TextClassificationPipeline

class StanceDetectionModel:
    def __init__(self):
        self.test_data = "/content/drive/MyDrive/Colab Notebooks/stance-detection/StanceDataset/test.csv"
        self.training_data = "/content/drive/MyDrive/Colab Notebooks/stance-detection/StanceDataset/train.csv"
        self.model_path = "trained_model"
        self.stance_label_mapping = {'AGAINST': 0, 'NONE': 1, 'FAVOR': 2}
        self.inverted_stance_label_mapping = {v: k for k, v in self.stance_label_mapping.items()}
        self.sentiment_label_mapping = {'neg': 0, 'other': 1, 'pos': 2}
        self.inverted_sentiment_label_mapping = {v: k for k, v in self.sentiment_label_mapping.items()}
        #topic_label_mapping = {'Hillary Clinton': 0, 'Feminist Movement': 1, 'Legalization of Abortion': 2, 'Atheism': 3, 'Climate Change is a Real Concern': 4, 'Donald Trump': 5}


    def prompt_revise(self, topic):
        return "I agree " + topic

    def prompt_revise_negative(self, topic):
        return "I disagree " + topic

    def create_model(self, load_trained=False):
        pretrained_model = "bert-base-uncased"
        self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model)
        if load_trained:
            self.model = AutoModelForSequenceClassification.from_pretrained(self.model_path)
        else:
            self.model = AutoModelForSequenceClassification.from_pretrained(pretrained_model, num_labels=8, problem_type="multi_label_classification")
        
    def load_tweet_data(self, filename, target=None, augmentation=False, adversarial=False):
        """ Data source: https://alt.qcri.org/semeval2016/task6/index.php?id=data-and-tools """
        import csv
        reader = csv.DictReader(open(filename))    # The original released dataset was encoded in ISO-8859-1. The attached version has been changed to utf8 for universality. 
        cnt = 0
        for row in reader:
            cnt += 1
            if adversarial:
                labels = ([0.5] * 6) + ([0.0] * 2)
                labels[6 + (cnt % 2)] = 1.0 
                yield {'labels': labels, 'text': row['Tweet'], 'topic': self.prompt_revise(row['Target'])}                

            if target is not None:
                if isinstance(target, str) and row['Target'] != target:
                    continue
                elif isinstance(target, set) and row['Target'] not in target:
                    continue
            labels = ([0.0] * 6) + ([0.5] * 2)
            labels[self.stance_label_mapping[row['Stance']]] = 1.0
            labels[self.sentiment_label_mapping[row['Sentiment']] + 3] = 1.0
            yield {'labels': labels, 'text': row['Tweet'], 'topic': self.prompt_revise(row['Target'])}            
            if augmentation:
                if row['Stance'] == 'AGAINST':
                    labels[stance_label_mapping["AGAINST"]] = 0.0
                    labels[stance_label_mapping["FAVOR"]] = 1.0
                elif row['Stance'] == 'FAVOR':
                    labels[stance_label_mapping["AGAINST"]] = 1.0
                    labels[stance_label_mapping["FAVOR"]] = 0.0
                yield {'labels': labels, 'text': row['Tweet'], 'topic': self.prompt_revise_negative(row['Target'])}
            #yield {'label': label_mapping[row['Stance']], 'text': row['Tweet'], 'topic': row['Target']}
        
    def tokenize_dataset(self, data):
        return self.tokenizer(data['topic'], data['text'], return_tensors='pt', truncation=True, padding='max_length', max_length=280)

    def compute_metrics(self, eval_pred):
        logits, labels = eval_pred
        predictions, references = [], []
        for i in range(len(logits)):
            predictions.append(np.argmax(logits[i][:3]))
            references.append(np.argmax(labels[i][:3]))
        #predictions = np.argmax(logits, axis=-1)
        # Following the evaluation of SemEval-2016, only the F-scores of Favour (2) and Against (0) are measured. 
        return self.metric.compute(predictions=predictions, references=references, labels=[0, 2], average='macro')

    def zeroshot_evaluation(self):
        stances = ['Hillary Clinton', 'Feminist Movement', 'Legalization of Abortion', 'Atheism', 'Climate Change is a Real Concern', 'Donald Trump']
        for tgt in stances:
            self.create_model()
            load_training_data = lambda: self.load_tweet_data(self.training_data, set(stances) - set([tgt]), adversarial=True)
            load_test_data = lambda: self.load_tweet_data(self.test_data, tgt)
            training_data = Dataset.from_generator(load_training_data).shuffle(seed=42).map(self.tokenize_dataset, batched=True)
            test_data = Dataset.from_generator(load_test_data).map(self.tokenize_dataset, batched=True)
            
            training_args = TrainingArguments(
                output_dir="test_trainer", 
                evaluation_strategy="epoch", 
                num_train_epochs=3,
                per_device_train_batch_size=24,
                per_device_eval_batch_size=24,
            )

            self.metric = evaluate.load("f1")
            print(tgt)
            trainer = MultilabelTrainer(
                model=self.model,
                args=training_args,
                train_dataset=training_data,
                eval_dataset=test_data,
                compute_metrics=self.compute_metrics,
            )
            trainer.train()

    def train(self):
        stances = ['Hillary Clinton', 'Feminist Movement', 'Legalization of Abortion', 'Atheism', 'Climate Change is a Real Concern', 'Donald Trump']
        self.create_model()
        load_training_data = lambda: self.load_tweet_data(self.training_data, adversarial=True)
        load_test_data = lambda: self.load_tweet_data(self.test_data)
        training_data = Dataset.from_generator(load_training_data).shuffle(seed=42).map(self.tokenize_dataset, batched=True)
        test_data = Dataset.from_generator(load_test_data).map(self.tokenize_dataset, batched=True)
            
        training_args = TrainingArguments(
            output_dir="test_trainer", 
            evaluation_strategy="epoch", 
            num_train_epochs=10,
            per_device_train_batch_size=24,
            per_device_eval_batch_size=24,
        )

        self.metric = evaluate.load("f1")
        trainer = MultilabelTrainer(
            model=self.model,
            args=training_args,
            train_dataset=training_data,
            eval_dataset=test_data,
            compute_metrics=self.compute_metrics,
        )
        trainer.train()
        trainer.save_model(self.model_path)

    def predict(self, instances):
        """ The input is an array of dictinaries. Each dictionary is in the format of 
            {
                "text": "I like Hillary Clinton. We are personal friends. My support for her candidacy is based on the fact that she is brilliantly qualified for the job.",
                "topic": "Hillary Clinton"
            }
            The output is an array of predictions for all the input instances. 
        """
        pipe = TextClassificationPipeline(model=self.model, tokenizer=self.tokenizer, return_all_scores=True)
        predictions = []
        for results in pipe([ {"text": instance["text"], "text_pair": self.prompt_revise(instance["topic"])} for instance in instances]):
            predictions.append(max(
                [(result['score'], self.inverted_stance_label_mapping[int(result['label'][-1])]) for result in results if result['label'] in ['LABEL_0', 'LABEL_1', 'LABEL_2']])[1])
        return predictions

In [None]:
model = StanceDetectionModel()
#model.zeroshot_evaluation()
model.train()

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Downloading and preparing dataset generator/default to /root/.cache/huggingface/datasets/generator/default-0e70884986d02733/0.0.0...


Generating train split: 0 examples [00:00, ? examples/s]

Dataset generator downloaded and prepared to /root/.cache/huggingface/datasets/generator/default-0e70884986d02733/0.0.0. Subsequent calls will reuse this data.


  0%|          | 0/6 [00:00<?, ?ba/s]



Downloading and preparing dataset generator/default to /root/.cache/huggingface/datasets/generator/default-e33c771de1d7015a/0.0.0...


Generating train split: 0 examples [00:00, ? examples/s]

Dataset generator downloaded and prepared to /root/.cache/huggingface/datasets/generator/default-e33c771de1d7015a/0.0.0. Subsequent calls will reuse this data.


  0%|          | 0/2 [00:00<?, ?ba/s]

Downloading builder script:   0%|          | 0.00/6.77k [00:00<?, ?B/s]

The following columns in the training set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, topic. If text, topic are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 5828
  Num Epochs = 10
  Instantaneous batch size per device = 24
  Total train batch size (w. parallel, distributed & accumulation) = 24
  Gradient Accumulation steps = 1
  Total optimization steps = 2430
  Number of trainable parameters = 109488392


Epoch,Training Loss,Validation Loss,F1
1,No log,0.60529,0.570535


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text, topic. If text, topic are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1956
  Batch size = 24


| Topic                     |  F-score |
|---------------------------|----------|
| Hiliary Clinton           |  0.5373  |
| Feminist Movement         |  0.4225  |
| Legalization of Abortion  |  0.4685  |
| Atheism                   |  0.4488  |
| Climate is a Real Concern |  0.2559  |
| Supervised                |  0.6223  |

In [None]:
model = StanceDetectionModel()
model.create_model(True)
model.predict([
    {"text": "I trust Hillary", "topic": "Hillary Clinton"}, 
    {"text": "I don't like Trump", "topic": "Hillary Clinton"},
    {"text": "I like Hillary Clinton. We are personal friends. My support for her candidacy is based on the fact that she is brilliantly qualified for the job.", "topic": "Hillary Clinton"}
    ])

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--bert-base-uncased/snapshots/0a6aa9128b6194f4f3c4db429b6cb4891cdb421b/config.json
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.25.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file vocab.txt from cache at /root/.cache/huggingface/hub/models--bert-base-uncased/snapshots/0a6aa9128b6194f4f3c4db429b6cb4891cdb421b/vocab.txt
loading file to

[{'label': 'LABEL_0', 'score': 0.507077693939209}, {'label': 'LABEL_1', 'score': 0.1692151427268982}, {'label': 'LABEL_2', 'score': 0.5552554130554199}, {'label': 'LABEL_3', 'score': 0.33545705676078796}, {'label': 'LABEL_4', 'score': 0.2688095271587372}, {'label': 'LABEL_5', 'score': 0.5860686302185059}, {'label': 'LABEL_6', 'score': 0.49196144938468933}, {'label': 'LABEL_7', 'score': 0.5364270806312561}]
[{'label': 'LABEL_0', 'score': 0.3769098222255707}, {'label': 'LABEL_1', 'score': 0.6410229206085205}, {'label': 'LABEL_2', 'score': 0.2685477137565613}, {'label': 'LABEL_3', 'score': 0.673368513584137}, {'label': 'LABEL_4', 'score': 0.3148013651371002}, {'label': 'LABEL_5', 'score': 0.3180817663669586}, {'label': 'LABEL_6', 'score': 0.5091047286987305}, {'label': 'LABEL_7', 'score': 0.4833521842956543}]
[{'label': 'LABEL_0', 'score': 0.4163896441459656}, {'label': 'LABEL_1', 'score': 0.1704079806804657}, {'label': 'LABEL_2', 'score': 0.6422132849693298}, {'label': 'LABEL_3', 'score'



['FAVOR', 'NONE', 'FAVOR']