# Phrases

Process the phrases that appear in a relation.

In [1]:
import copy
import json
import pandas as pd

## 1. Read the data

In [4]:
DATA_FILE = "../data/femke.jsonl"

In [5]:
json_data = []
infile = open(DATA_FILE, "r")
for line in infile:
    json_data.append(json.loads(line))
infile.close()

In [9]:
len(json_data)

1867

## 2. Give an example of the data

In [6]:
json_data[0]

{'id': 6660,
 'data': 'Today I want to send a clear message to the people of this great country, of Greece. I know that many people feel without hope. Many are making extremely difficult sacrifices. And many people ask why they should do more. I understand those concerns. And I agree that some of the efforts seem unfair. But I ask people to recognise the other alternatives which will be much more difficult for Greece and will affect even more the most vulnerable in the Greek society. So this is why it is the right approach to ask Greece to reform, to increase its competitiveness to have a viable future, irrespective of the crisis. You, in Greece, with our support, need to rebuild your country, your structures, your administration, your economy to increase the competitiveness of Greece. And the best hope of a return to growth and job creation is inside the euro area. Staying in the euro is the best chance to avoid worse hardship and difficulties to the Greek people, namely for those in 

## 3. Count the frequency of the labels in each data item

In [7]:
def get_patterns(json_data):
    base_cases = []
    results = {}
    for data in json_data:
        key = [0, 0, 0]
        for label in data["label"]:
            if label[2] == 'Content_Concept_1':
                key[0] += 1
            elif label[2] == 'Content_Relation_Explanation':
                key[1] += 1
            elif label[2] == 'Content_Concept_2':
                key[2] += 1
            else:
                print("cannot happen")
        for i in range(0, len(key)):
            key[i] = str(key[i])
        results_key = " ".join(key)
        if not results_key in results:
            results[results_key] = 1
        else:
            results[results_key] += 1
        if results_key == "1 1 1":
             base_cases.append(data)
    return [results, base_cases]

results, base_cases = get_patterns(json_data)
print(results)

{'1 1 1': 1400, '2 1 1': 230, '1 1 2': 136, '2 1 2': 24, '1 2 1': 2, '1 0 0': 3, '1 1 0': 5, '0 1 1': 5, '3 1 1': 30, '1 1 3': 13, '2 1 0': 3, '4 1 1': 4, '0 1 3': 1, '3 1 2': 3, '2 1 3': 3, '5 1 1': 1, '1 1 4': 1, '4 1 2': 3}


Some data items have more than two content concepts because of split phrases.

## 4. Convert base cases (1,1,1) to table

In [8]:
len(base_cases)

1400

In [10]:
base_cases[0]

{'id': 6660,
 'data': 'Today I want to send a clear message to the people of this great country, of Greece. I know that many people feel without hope. Many are making extremely difficult sacrifices. And many people ask why they should do more. I understand those concerns. And I agree that some of the efforts seem unfair. But I ask people to recognise the other alternatives which will be much more difficult for Greece and will affect even more the most vulnerable in the Greek society. So this is why it is the right approach to ask Greece to reform, to increase its competitiveness to have a viable future, irrespective of the crisis. You, in Greece, with our support, need to rebuild your country, your structures, your administration, your economy to increase the competitiveness of Greece. And the best hope of a return to growth and job creation is inside the euro area. Staying in the euro is the best chance to avoid worse hardship and difficulties to the Greek people, namely for those in 

In [11]:
table = []
for data_in in base_cases:
    data_out = [data_in["data"], "", "", ""]
    for label_data in data_in["label"]:
        data_out_id = -1
        if label_data[2] == 'Content_Concept_1':
            data_out_id = 1
        elif label_data[2] == 'Content_Relation_Explanation':
            data_out_id = 2
        elif label_data[2] == 'Content_Concept_2':
            data_out_id = 3
        else:
            print(f"unexpected label data: {label_data}")
        if data_out[data_out_id] != "":
            print("duplicate data in label_data: {data_in['label']}")
        data_out[data_out_id] = data_in["data"][label_data[0]:label_data[1]]
    table.append(data_out)

In [12]:
table[0]

['Today I want to send a clear message to the people of this great country, of Greece. I know that many people feel without hope. Many are making extremely difficult sacrifices. And many people ask why they should do more. I understand those concerns. And I agree that some of the efforts seem unfair. But I ask people to recognise the other alternatives which will be much more difficult for Greece and will affect even more the most vulnerable in the Greek society. So this is why it is the right approach to ask Greece to reform, to increase its competitiveness to have a viable future, irrespective of the crisis. You, in Greece, with our support, need to rebuild your country, your structures, your administration, your economy to increase the competitiveness of Greece. And the best hope of a return to growth and job creation is inside the euro area. Staying in the euro is the best chance to avoid worse hardship and difficulties to the Greek people, namely for those in a more vulnerable pos

In [13]:
pd.DataFrame(table, columns=["Paragraph", "Content_Concept_1", "Content_Relation_Explanation", "Content_Concept_2"])

Unnamed: 0,Paragraph,Content_Concept_1,Content_Relation_Explanation,Content_Concept_2
0,Today I want to send a clear message to the pe...,"You, in Greece, with our support, need to rebu...",to,increase the competitiveness of Greece
1,Today I want to send a clear message to the pe...,"You, in Greece, with our support, need to rebu...",And the best hope of a,return to growth
2,"To conclude, let me say a few words on the eur...","We have taken important, fundamental decisions",to safeguard,the stability of the euro area
3,"To conclude, let me say a few words on the eur...",We need sustained efforts and determination,As we said there will not be,magic solutions
4,Giving to the ECB the ultimate responsibility ...,confidence between the banks,and in this way,increase the financial stability in the euro area
...,...,...,...,...
1395,But today I want to focus on our economic prio...,cut business taxes,You've got to,succeed
1396,But today I want to focus on our economic prio...,tackle the bloat in welfare,You've got to,succeed
1397,But today I want to focus on our economic prio...,make sure your schools and your universities a...,and crucially you've got to,succeed
1398,Now yesterday I gave a speech setting out the ...,When you have a single currency,you move inexorably towards,a banking union


## 5. Combine all data of duplicate paragraps

In [14]:
combined_data = {}
for data in json_data:
    key = f"{data['source_id']} {data['speech_id']} {data['paragraph_id']}"
    if key not in combined_data:
        combined_data[key] = copy.deepcopy(data)
    else:
        if len(data["data"]) != len(combined_data[key]["data"]):
            print("cannot happen")
        for label_data in data["label"]:
            if label_data not in combined_data[key]["label"]:
                combined_data[key]["label"].append(label_data)
                if combined_data[key]["label"][-1][1] > len(combined_data[key]["data"]):
                    combined_data[key]["label"][-1][1] = len(combined_data[key]["data"])

for key in combined_data:
    for label_data in combined_data[key]["label"]:
        label_data.append(combined_data[key]["data"][label_data[0]:label_data[1]])

In [15]:
len(combined_data)

526

In [16]:
results, base_cases = get_patterns(list(combined_data.values()))
print(results)

{'5 2 4': 3, '2 2 2': 52, '1 1 1': 95, '3 2 3': 10, '3 2 2': 24, '5 3 3': 3, '5 2 2': 6, '5 4 6': 5, '4 3 3': 5, '1 2 2': 5, '8 2 4': 1, '2 3 3': 1, '8 5 8': 1, '2 1 1': 30, '1 1 2': 22, '3 3 3': 15, '2 1 2': 8, '9 5 6': 1, '3 3 4': 8, '3 1 2': 5, '2 2 6': 4, '4 2 2': 11, '3 1 1': 12, '4 2 3': 8, '8 3 4': 2, '6 4 4': 3, '2 2 4': 5, '4 1 1': 5, '5 1 1': 4, '14 6 7': 1, '13 4 4': 1, '3 4 4': 2, '1 1 3': 7, '6 2 2': 5, '7 5 5': 2, '4 3 4': 7, '7 4 4': 2, '5 1 2': 1, '2 3 4': 2, '6 4 5': 1, '4 4 4': 5, '3 5 8': 1, '1 3 4': 1, '2 2 3': 9, '6 5 5': 2, '8 5 5': 1, '4 2 1': 2, '7 1 2': 1, '1 1 4': 3, '3 2 5': 2, '6 3 4': 2, '8 3 9': 2, '3 3 5': 1, '6 4 6': 4, '4 2 4': 5, '4 4 5': 1, '10 9 11': 1, '15 11 16': 1, '11 5 6': 1, '1 1 5': 3, '10 3 4': 1, '1 1 8': 1, '8 3 11': 1, '1 2 3': 2, '14 5 9': 1, '3 2 4': 2, '7 5 6': 1, '4 5 6': 1, '7 2 2': 2, '9 4 6': 2, '6 1 1': 2, '4 3 2': 1, '9 4 11': 1, '4 1 3': 3, '2 1 12': 1, '3 1 4': 2, '11 4 6': 1, '3 5 7': 1, '6 3 5': 1, '2 1 5': 1, '4 4 7': 1, '2 2

In [17]:
combined_data[list(combined_data.keys())[0]]

{'id': 6660,
 'data': 'Today I want to send a clear message to the people of this great country, of Greece. I know that many people feel without hope. Many are making extremely difficult sacrifices. And many people ask why they should do more. I understand those concerns. And I agree that some of the efforts seem unfair. But I ask people to recognise the other alternatives which will be much more difficult for Greece and will affect even more the most vulnerable in the Greek society. So this is why it is the right approach to ask Greece to reform, to increase its competitiveness to have a viable future, irrespective of the crisis. You, in Greece, with our support, need to rebuild your country, your structures, your administration, your economy to increase the competitiveness of Greece. And the best hope of a return to growth and job creation is inside the euro area. Staying in the euro is the best chance to avoid worse hardship and difficulties to the Greek people, namely for those in 

## 6. Make character labels

Several tokens have more than one label. We use the following labeling scheme:

* 1: Content_Concept_1
* 2: Content_Concept_2
* 3: both Content_Concept_1 and Content_Concept_2
* E: Content_Relation_Explanation
* F: both Content_Relation_Explanation and Content_Concept_1
* G: both Content_Relation_Explanation and Content_Concept_2
* \*: all three labels: Content_Relation_Explanation and Content_Concept_1 and Content_Concept_2
* .: no label

In [18]:
class Label_Clash:
    def __init__(self):
        self.data = {}
        
    def add(self, key):
        if key not in self.data:
            self.data[key] = 1
        else:
            self.data[key] += 1
            
    def print(self):
        print(self.data)

In [19]:
label_clash = Label_Clash()
for key in combined_data:
    combined_data[key]['labels'] = len(combined_data[key]["data"])*["."]
    for label in combined_data[key]["label"]:
        for i in range(label[0], label[1]):
            if label[2] == "Content_Concept_1":
                if combined_data[key]['labels'][i] != ".":
                    combined_data[key]['labels'][i] = "1"
                elif combined_data[key]['labels'][i] != "1":
                    combined_data[key]['labels'][i] = "1"
                elif combined_data[key]['labels'][i] != "E":
                    combined_data[key]['labels'][i] = "F"
                elif combined_data[key]['labels'][i] != "2":
                    combined_data[key]['labels'][i] = "3"
                elif combined_data[key]['labels'][i] != "3":
                    combined_data[key]['labels'][i] = "3"
                elif combined_data[key]['labels'][i] != "F":
                    combined_data[key]['labels'][i] = "F"
                elif combined_data[key]['labels'][i] != "G":
                    combined_data[key]['labels'][i] = "*"
                else:
                    print("cannot happen")
            elif label[2] == "Content_Relation_Explanation":
                if combined_data[key]['labels'][i] != ".":
                    combined_data[key]['labels'][i] = "E"
                elif combined_data[key]['labels'][i] != "1":
                    combined_data[key]['labels'][i] = "F"
                elif combined_data[key]['labels'][i] != "E":
                    combined_data[key]['labels'][i] = "E"
                elif combined_data[key]['labels'][i] != "2":
                    combined_data[key]['labels'][i] = "G"
                elif combined_data[key]['labels'][i] != "3":
                    combined_data[key]['labels'][i] = "*"
                elif combined_data[key]['labels'][i] != "F":
                    combined_data[key]['labels'][i] = "F"
                elif combined_data[key]['labels'][i] != "G":
                    combined_data[key]['labels'][i] = "G"
                else:
                    print("cannot happen")
            elif label[2] == "Content_Concept_2":
                if combined_data[key]['labels'][i] != ".":
                    combined_data[key]['labels'][i] = "2"
                elif combined_data[key]['labels'][i] != "1":
                    combined_data[key]['labels'][i] = "3"
                elif combined_data[key]['labels'][i] != "E":
                    combined_data[key]['labels'][i] = "G"
                elif combined_data[key]['labels'][i] != "2":
                    combined_data[key]['labels'][i] = "2"
                elif combined_data[key]['labels'][i] != "3":
                    combined_data[key]['labels'][i] = "3"
                elif combined_data[key]['labels'][i] != "F":
                    combined_data[key]['labels'][i] = "*"
                elif combined_data[key]['labels'][i] != "G":
                    combined_data[key]['labels'][i] = "G"
                else:
                    print("cannot happen")
            else:
                print(f"unknown label: {label}")

In [20]:
("".join(combined_data[list(combined_data.keys())[0]]["labels"]))[:933]

'........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................11111111111111111111111111111111111111111111111111111111111111..111111111111111..1111111111111111111..111111111111.FF.33333333333333333333333333333333333333..FFFFFFFFFFFFFFFFFFFFFF.2222222223333333.....333333333333.......................................................................................................'

## 7. Machine learning

Instructions copied from `filterbubble/transformers/test.ipynb` section `5. BERT Fine-Tuning Tutorial with PyTorch`

In [67]:
from transformers import RobertaTokenizer, RobertaForTokenClassification, AdamW, BertTokenizer, BertForSequenceClassification
from torch.utils.data import TensorDataset, random_split
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from transformers import get_linear_schedule_with_warmup
from sklearn.metrics import confusion_matrix
import torch
import random
import numpy as np
import time
import datetime

In [63]:
def get_prediction_labels(predictions):
    prediction_labels = []
    for i in range(len(predictions)):
        prediction_labels.extend(np.argmax(predictions[i], axis=1).flatten())
    return prediction_labels

In [59]:
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

In [50]:
def format_time(elapsed):
    elapsed_rounded = int(round((elapsed)))
    return str(datetime.timedelta(seconds=elapsed_rounded))

In [31]:
def remove_initial_words(sentence, n):
    words = sentence.strip().split()
    return " ".join(words[int(n):])

In [61]:
def collapse_labels(true_labels, predictions, sentence_sources, label_values):
    prediction_labels = get_prediction_labels(predictions)
    true_labels_flattened = []
    for array in true_labels:
         true_labels_flattened.extend(array)
    prediction_labels_collapsed = []
    true_labels_collapsed = []
    for i in range(0, len(sentence_sources)):
        if i == 0 or sentence_sources[i] != sentence_sources[i-1]:
            prediction_labels_collapsed.append(prediction_labels[i])
            true_labels_collapsed.append(true_labels_flattened[i])
        elif prediction_labels[i] != label_values['ANDERS']:
            prediction_labels_collapsed[-1] = prediction_labels[i]
    return [ true_labels_collapsed, prediction_labels_collapsed ]

In [35]:
def make_input_ids(sentences, file_labels, keep_short_only=False):
    input_ids, attention_masks, expanded_labels, sentence_sources = [], [], [], []
    max_length = 64
    for i in range(0, len(sentences)):
        sentence = sentences[i]
        while len(sentence) > 0:
            encoded_dict = tokenizer.encode_plus(
                                sentence,
                                max_length = max_length,
                                truncation=True,
                                padding='max_length',
                                add_special_tokens = True,
                                return_attention_mask = True,
                                return_tensors = 'pt',
                           )
            if keep_short_only and encoded_dict['attention_mask'][0][max_length-1] != 0:
                break
            input_ids.append(encoded_dict['input_ids'])
            attention_masks.append(encoded_dict['attention_mask'])
            expanded_labels.append(file_labels[i])
            sentence_sources.append(i)
            sentence = remove_initial_words(sentence, int(max_length/2))
    input_ids = torch.cat(input_ids, dim=0)
    attention_masks = torch.cat(attention_masks, dim=0)
    labels = torch.tensor(expanded_labels)
    return [input_ids, attention_masks, labels, sentence_sources]

In [23]:
def make_data(fold, sentences):
    validation_start = int(0.1 * fold * len(sentences))
    validation_end = int(0.1 * (fold + 1) * len(sentences))
    input_ids, attention_masks, labels, sentence_sources_validation = make_input_ids(sentences[validation_start:validation_end], 
                                                                                     file_labels[validation_start:validation_end], 
                                                                                     keep_short_only=False)
    val_dataset = TensorDataset(input_ids, attention_masks, labels)
    if fold == 0:
        training_sentences = []
        training_file_labels = []
    else:
        training_sentences = sentences[:validation_start]
        training_file_labels = file_labels[:validation_start]
    if fold < 9:
        training_sentences.extend(sentences[validation_end:])
        training_file_labels.extend(file_labels[validation_end:])
    input_ids, attention_masks, labels, _ = make_input_ids(training_sentences, training_file_labels, keep_short_only=True)
    train_dataset = TensorDataset(input_ids, attention_masks, labels)
    return [ train_dataset, val_dataset, sentence_sources_validation ]

In [29]:
def make_experiment(fold, sentences):
    train_dataset, val_dataset, sentence_sources_validation = make_data(fold, sentences)
    print(f"fold: {fold}; train size: {len(train_dataset)}; validation size: {len(val_dataset)}")
    batch_size = 32
    train_dataloader = DataLoader(
                train_dataset,
                sampler = RandomSampler(train_dataset),
                batch_size = batch_size
            )
    validation_dataloader = DataLoader(
                val_dataset,
                sampler = SequentialSampler(val_dataset),
                batch_size = batch_size
            )
    optimizer = AdamW(model.parameters(),
                      lr = 2e-5,
                      eps = 1e-8
                    )
    epochs = 2
    total_steps = len(train_dataloader) * epochs
    scheduler = get_linear_schedule_with_warmup(optimizer, 
                                                num_warmup_steps = 0,
                                                num_training_steps = total_steps)
    return [ train_dataset, val_dataset, train_dataloader, validation_dataloader, batch_size, epochs, total_steps, optimizer, scheduler, sentence_sources_validation ]

In [48]:
def train_model(model, train_dataloader, device, optimizer, scheduler):
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    print('Training...')

    t0 = time.time()
    total_train_loss = 0
    model.train()

    for step, batch in enumerate(train_dataloader):
        if step % 10 == 0 and not step == 0:
            elapsed = format_time(time.time() - t0)
            print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}    Loss: {:.3f}.'.format(step, len(train_dataloader), elapsed, total_train_loss/step))
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].long().to(device)
        model.zero_grad()        
        model_output = model(b_input_ids, 
                             token_type_ids=None, 
                             attention_mask=b_input_mask, 
                             labels=b_labels)
        loss = model_output["loss"]
        logits = model_output["logits"]
        total_train_loss += loss.item()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()
    avg_train_loss = total_train_loss / len(train_dataloader)            
    training_time = format_time(time.time() - t0)
    print("")
    print("  Average training loss: {0:.3f}".format(avg_train_loss))
    print("  Training epoch took: {:}".format(training_time))
    return avg_train_loss, training_time

In [55]:
def validate_model(model, validation_dataloader, device, sentence_sources_validation, label_values):
    print("")
    print("Running Validation...")
    t0 = time.time()
    model.eval()
    total_eval_accuracy = 0
    total_eval_loss = 0
    nb_eval_steps = 0
    logits_total, label_ids_total = [], []
    for batch in validation_dataloader:
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)
        with torch.no_grad():        
            model_output = model(b_input_ids, 
                                   token_type_ids=None, 
                                   attention_mask=b_input_mask,
                                   labels=b_labels)
        loss = model_output["loss"]
        logits = model_output["logits"]
        total_eval_loss += loss.item()
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()
        total_eval_accuracy += flat_accuracy(logits, label_ids)
        logits_total.append(logits)
        label_ids_total.append(label_ids)
    true_labels_collapsed, prediction_labels_collapsed = collapse_labels(label_ids_total, logits_total, sentence_sources_validation, label_values)
    print(confusion_matrix(true_labels_collapsed, prediction_labels_collapsed))
    avg_val_accuracy = total_eval_accuracy / len(validation_dataloader)
    print("  Accuracy: {0:.3f}".format(avg_val_accuracy))
    avg_val_loss = total_eval_loss / len(validation_dataloader)
    validation_time = format_time(time.time() - t0)
    print("  Validation Loss: {0:.3f}".format(avg_val_loss))
    print("  Validation took: {:}".format(validation_time))
    return [ avg_val_accuracy, avg_val_loss, validation_time, true_labels_collapsed, prediction_labels_collapsed ]

In [None]:
tokenizer = RobertaTokenizer.from_pretrained("pdelobelle/robbert-v2-dutch-base")

In [57]:
num_labels = 2
sentences = [ 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten' ]
file_labels = [ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 ]
label_values = { "odd": 1, "even": 0 }

In [68]:
true_labels = []
predicted_labels = []
for fold in range(0, 1):
    model = RobertaForTokenClassification.from_pretrained("pdelobelle/robbert-v2-dutch-base", num_labels = num_labels)
    train_dataset, val_dataset, train_dataloader, validation_dataloader, batch_size, epochs, total_steps, optimizer, scheduler, sentence_sources_validation = \
        make_experiment(fold, sentences)
    seed_val = 42
    random.seed(seed_val)
    np.random.seed(seed_val)
    torch.manual_seed(seed_val)
    if torch.cuda.is_available():
         torch.cuda.manual_seed_all(seed_val)
    training_stats = []
    total_t0 = time.time()

    device = torch.device("cpu")
    print(f"======== Fold {fold:2d} ============")
    for epoch_i in range(0, epochs):
        avg_train_loss, training_time = train_model(model, train_dataloader, device, optimizer, scheduler)
        avg_val_accuracy, avg_val_loss, validation_time, true_labels_collapsed, prediction_labels_collapsed = \
            validate_model(model, validation_dataloader, device, sentence_sources_validation, label_values)
        training_stats.append(
            {
                'epoch': epoch_i + 1,
                'Training Loss': avg_train_loss,
                'Valid. Loss': avg_val_loss,
                'Valid. Accur.': avg_val_accuracy,
                'Training Time': training_time,
                'Validation Time': validation_time
            }
        )
    true_labels.extend(true_labels_collapsed)
    predicted_labels.extend(prediction_labels_collapsed)
    print("")
    print("Total training took {:} (h:mm:ss)".format(format_time(time.time()-total_t0)))

Some weights of the model checkpoint at pdelobelle/robbert-v2-dutch-base were not used when initializing RobertaForTokenClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at pdelobelle/robbert-v2-dutch-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

fold: 0; train size: 9; validation size: 1
Training...


RuntimeError: The size of tensor a (576) must match the size of tensor b (9) at non-singleton dimension 0