# Preprocessing

In [1]:
from collections import Counter, defaultdict
from typing import Dict

from allennlp.data.fields import TextField, LabelField, SequenceLabelField
from allennlp.data.token_indexers import TokenIndexer, SingleIdTokenIndexer
from allennlp.data.tokenizers import Token
from allennlp.data.vocabulary import Vocabulary

import torch
from allennlp.data import Token, Vocabulary, TokenIndexer, Tokenizer
from allennlp.data.fields import ListField, TextField, Field
from allennlp.data.token_indexers import (
    SingleIdTokenIndexer,
    TokenCharactersIndexer,
    ELMoTokenCharactersIndexer,
    PretrainedTransformerIndexer,
    PretrainedTransformerMismatchedIndexer,
)
from allennlp.data.tokenizers import (
    CharacterTokenizer,
    PretrainedTransformerTokenizer,
    SpacyTokenizer,
    WhitespaceTokenizer,
)
from allennlp.modules.seq2vec_encoders import CnnEncoder
from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder
from allennlp.modules.token_embedders import (
    Embedding,
    TokenCharactersEncoder,
    ElmoTokenEmbedder,
    PretrainedTransformerEmbedder,
    PretrainedTransformerMismatchedEmbedder,
)
from allennlp.nn import util as nn_util



from allennlp.common.params import Params
from my_package import my_readers, my_classifiers, my_trainers, my_fields

2021-08-26 18:45:49.725757: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0


In [2]:
from my_package.data.dataset_readers.counterfactual_reader import  CounterfactualSnliReader

In [3]:
import allennlp
allennlp.__version__

'2.5.0'

In [4]:
torch.cuda.is_available()

False

In [5]:
from my_package.data.fields.float_fields import FloatField

In [6]:
x=FloatField(1.0)

In [7]:
from my_package.my_readers import OverlapSnliReader, ReversibleSnliReader

In [8]:
train_data_path = "/ist/users/canu/debias_nlu/data/nli/multinli_1.0_train.jsonl"
validation_data_path = "/ist/users/canu/debias_nlu/data/nli/multinli_1.0_dev_matched.jsonl"
test_data_path = "/ist/users/canu/debias_nlu/data/nli/multinli_1.0_dev_mismatched.jsonl"

In [9]:
from allennlp_models.pair_classification.dataset_readers import snli

In [10]:
transformer_model = "roberta-large"
transformer_dim = 1024
pretrained_transformer_tokenizer = PretrainedTransformerTokenizer(model_name=transformer_model,add_special_tokens = False)
token_indexer  = PretrainedTransformerIndexer(model_name=transformer_model,max_length=512 )
reader = OverlapSnliReader(tokenizer=pretrained_transformer_tokenizer,token_indexers={"tokens":token_indexer})
cf_reader = CounterfactualSnliReader(tokenizer=pretrained_transformer_tokenizer,token_indexers={"tokens":token_indexer})
# adversarial_reader = ReversibleSnliReader(tokenizer=pretrained_transformer_tokenizer,token_indexers={"tokens":token_indexer}, reversible = True)

/ist/users/canu/.cache/huggingface/transformers https://huggingface.co/roberta-large/resolve/main/config.json
/ist/users/canu/.cache/huggingface/transformers https://huggingface.co/roberta-large/resolve/main/vocab.json
/ist/users/canu/.cache/huggingface/transformers https://huggingface.co/roberta-large/resolve/main/merges.txt
/ist/users/canu/.cache/huggingface/transformers https://huggingface.co/roberta-large/resolve/main/tokenizer.json
/ist/users/canu/.cache/huggingface/transformers https://huggingface.co/roberta-large/resolve/main/added_tokens.json
/ist/users/canu/.cache/huggingface/transformers https://huggingface.co/roberta-large/resolve/main/special_tokens_map.json
/ist/users/canu/.cache/huggingface/transformers https://huggingface.co/roberta-large/resolve/main/tokenizer_config.json
/ist/users/canu/.cache/huggingface/transformers https://huggingface.co/roberta-large/resolve/main/config.json
/ist/users/canu/.cache/huggingface/transformers https://huggingface.co/roberta-large/resolv

In [11]:
# # Instantiate and use the dataset reader to read a file containing the data
train_dataset = reader.read(train_data_path)
train_dataset_list = list(train_dataset)

validation_dataset = reader.read(validation_data_path)
validation_dataset_list = list(validation_dataset)
                               
counter_factual_val =cf_reader.read(validation_data_path)  
cf_dataset_list = list(counter_factual_val)

In [12]:
print("type of its first element: ", type(train_dataset_list[0]))
print("size of training dataset: ", len(train_dataset_list))

type of its first element:  <class 'allennlp.data.instance.Instance'>
size of training dataset:  392702


In [13]:
validation_dataset_list[0].fields

{'tokens': <allennlp.data.fields.text_field.TextField at 0x7fdcf2777240>,
 'label': <allennlp.data.fields.label_field.LabelField at 0x7fdc7f212fc0>}

In [15]:
# building vocabulary
vocab = Vocabulary.from_instances(train_dataset_list)


building vocab:   0%|          | 0/392702 [00:00<?, ?it/s]

In [16]:
train_dataset_list[3].human_readable_dict()

{'tokens': ['<s>',
  'How',
  'Ġdo',
  'Ġyou',
  'Ġknow',
  '?',
  'ĠAll',
  'Ġthis',
  'Ġis',
  'Ġtheir',
  'Ġinformation',
  'Ġagain',
  '.',
  '</s>',
  '</s>',
  'This',
  'Ġinformation',
  'Ġbelongs',
  'Ġto',
  'Ġthem',
  '.',
  '</s>'],
 'label': 'entailment'}

In [17]:
x = reader._tokenizer.tokenize("ahoo ahoo <mask> this is a book")

In [18]:
x[3].idx

10

In [19]:
reader._tokenizer.tokenizer.mask_token

'<mask>'

# Data Masking

In [20]:
import copy
from allennlp.data.tokenizers import Token

# Model

In [21]:
from allennlp.models.basic_classifier import BasicClassifier
from allennlp.models.model import Model

params = Params.from_file('spurious_corr/MNLI/training_config/mnli_roberta_cpu.jsonnet')

# Grab the part of the `config` that defines the model
# model_params = params.pop("model")
# print(model_params.pop("type"))
# print(model_params)
# Find out which model subclass we want
# model_name = model_params.pop("type")

# Instantiate that subclass with the remaining model params
#model = Model.from_params(model_params,vocab=vocab)

In [22]:
model = Model.load(params, '/ist/users/canu/CI4RRL_project/outputs_real_test')

/ist/users/canu/.cache/huggingface/transformers https://huggingface.co/roberta-large/resolve/main/config.json
/ist/users/canu/.cache/huggingface/transformers https://huggingface.co/roberta-large/resolve/main/pytorch_model.bin


Some weights of the model checkpoint at roberta-large were not used when initializing RobertaModel: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [23]:
from allennlp.predictors.predictor import Predictor
from copy import deepcopy
import numpy as np

In [24]:
vocab._index_to_token['tokens'][0]

'@@PADDING@@'

In [25]:
predictor = Predictor(model,reader)

In [26]:
validation_dataset_list[73]

<allennlp.data.instance.Instance at 0x7fdc7f1fc190>

In [52]:
new_x = cf_dataset_list[73]
new_y = copy.deepcopy( cf_dataset_list[73])

new_y.fields['tokens'] = new_y.fields.pop('cf_tokens')
new_x.fields.pop('cf_tokens')

{'tokens': <allennlp.data.fields.text_field.TextField at 0x7fdc7a18e380>,
 'cf_tokens': <allennlp.data.fields.text_field.TextField at 0x7fdc7a18e8c0>,
 'label': <allennlp.data.fields.label_field.LabelField at 0x7fdc7a18ec40>}

In [58]:

predictor.predict_instance(new_x)

{'logits': [-0.7791373133659363, -0.6617544889450073, 0.8475649952888489],
 'probs': [0.1386650800704956, 0.15593577921390533, 0.7053991556167603],
 'token_ids': [0,
  3762,
  631,
  21,
  12648,
  162,
  24506,
  6459,
  6,
  53,
  127,
  1144,
  851,
  10,
  372,
  45219,
  9,
  3500,
  77,
  38,
  794,
  127,
  17589,
  3121,
  6480,
  575,
  12445,
  81,
  5,
  124,
  9,
  10,
  3428,
  4,
  2,
  2,
  100,
  21,
  24506,
  6459,
  3915,
  59,
  171,
  383,
  4,
  1437,
  2],
 'loss': 1.8583110570907593,
 'label': 'neutral',
 'tokens': ['<s>',
  'One',
  'Ġthing',
  'Ġwas',
  'Ġworrying',
  'Ġme',
  'Ġdread',
  'fully',
  ',',
  'Ġbut',
  'Ġmy',
  'Ġheart',
  'Ġgave',
  'Ġa',
  'Ġgreat',
  'Ġthrob',
  'Ġof',
  'Ġrelief',
  'Ġwhen',
  'ĠI',
  'Ġsaw',
  'Ġmy',
  'Ġul',
  'ster',
  'Ġlying',
  'Ġcare',
  'lessly',
  'Ġover',
  'Ġthe',
  'Ġback',
  'Ġof',
  'Ġa',
  'Ġchair',
  '.',
  '</s>',
  '</s>',
  'I',
  'Ġwas',
  'Ġdread',
  'fully',
  'Ġworried',
  'Ġabout',
  'Ġmany',
  'Ġthing

In [57]:
new_y.fields['tokens'].tokens

[<s>, </s>, </s>, </s>]

In [30]:
validation_dataset_list[73].fields['label'].label

'contradiction'

In [None]:
new_x = reader.text_to_instance('','')
out_x = predictor.predict_instance(new_x)['logits']
count_correct_normie = 0
count_correct_ci = 0
label_dict = {0:'entailment',1:'contradiction',2:'neutral'}
for i in range(len(validation_dataset_list)):
    test_x_1 = validation_dataset_list[i]
    y_true  = test_x_1.fields['label'].label
    out_normie = predictor.predict_instance(test_x_1)
    y_pred_normie = out_normie['label']
    if y_true == y_pred_normie:
        count_correct_normie+=1
    out = np.array(out_normie['logits']) - 0.5*np.array(out_x)
    y_pred_ci = label_dict[np.argmax(softmax(out))]
    if y_true == y_pred_ci:
        count_correct_ci+=1
    break

In [None]:
output_dict['logits']

In [None]:
n = len(validation_dataset_list)
print(count_correct_normie/n, count_correct_ci/n)

# Dataloader

In [59]:

from allennlp.data import DataLoader

params = Params.from_file('spurious_corr/MNLI/training_config/mnli_roberta_cpu.jsonnet')
# Grab the part of the `config` that defines the 
dataloader_params = params.pop("data_loader")
print(dataloader_params)
# Instantiate that subclass with the remaining model params
# train_dataset.index_with(vocab)
# validation_dataset.index_with(vocab)
data_loader = DataLoader.from_params(
            params=dataloader_params, reader=reader, data_path=train_data_path
        )


params = Params.from_file('spurious_corr/MNLI/training_config/mnli_roberta_cpu.jsonnet')
# Grab the part of the `config` that defines the model
dataloader_params = params.pop("data_loader")
validation_data_loader = DataLoader.from_params(
            params=dataloader_params, reader=reader, data_path=validation_data_path
        )

params = Params.from_file('spurious_corr/MNLI/training_config/mnli_roberta_cpu.jsonnet')
# Grab the part of the `config` that defines the model
dataloader_params = params.pop("data_loader")
cf_data_loader = DataLoader.from_params(
            params=dataloader_params, reader=cf_reader, data_path=validation_data_path
        )



data_loader.Params({'batch_sampler': {'batch_size': 32, 'type': 'bucket'}})


loading instances: 0it [00:00, ?it/s]

loading instances: 0it [00:00, ?it/s]

loading instances: 0it [00:00, ?it/s]

In [60]:
data_loader.set_target_device(-1)
data_loader.index_with(model.vocab)

# adversarial_data_loader.set_target_device(0)
# adversarial_data_loader.index_with(vocab)

validation_data_loader.set_target_device(-1)
validation_data_loader.index_with(model.vocab)

cf_data_loader.set_target_device(-1)
cf_data_loader.index_with(model.vocab)

In [61]:
list(cf_data_loader.iter_instances())[2204].get_padding_lengths()

{'tokens': {'tokens___token_ids': 79,
  'tokens___mask': 79,
  'tokens___type_ids': 79,
  'tokens___segment_concat_mask': 79},
 'cf_tokens': {'tokens___token_ids': 4,
  'tokens___mask': 4,
  'tokens___type_ids': 4,
  'tokens___segment_concat_mask': 4},
 'label': {}}

# Counterfactual Eval

In [62]:
import datetime
import logging
import os
import shutil
import json
from os import PathLike
from typing import Any, Dict, Iterable, Optional, Union, Tuple, Set, List
from collections import Counter

import torch
from torch.nn.utils import clip_grad_norm_

from allennlp.common.checks import check_for_gpu, ConfigurationError
from allennlp.common.params import Params
from allennlp.common.tqdm import Tqdm
from allennlp.common.util import dump_metrics, sanitize, int_to_device
from allennlp.data import Instance, Vocabulary, Batch, DataLoader
from allennlp.data.dataset_readers import DatasetReader
from allennlp.models.archival import CONFIG_NAME
from allennlp.models.model import Model
from allennlp.nn import util as nn_util

# We want to warn people that tqdm ignores metrics that start with underscores
# exactly once. This variable keeps track of whether we have.
class HasBeenWarned:
    tqdm_ignores_underscores = False

In [71]:

logger = logging.getLogger(__name__)
def cf_evaluate(
    model: Model,
    data_loader: DataLoader,
    cf_weight: float = 0.5,
    cuda_device: int = -1,
    batch_weight_key: str = None,
    output_file: str = None,
    predictions_output_file: str = None,
) -> Dict[str, Any]:
    """
    # Parameters
    model : `Model`
        The model to evaluate
    data_loader : `DataLoader`
        The `DataLoader` that will iterate over the evaluation data (data loaders already contain
        their data).
    cuda_device : `int`, optional (default=`-1`)
        The cuda device to use for this evaluation.  The model is assumed to already be using this
        device; this parameter is only used for moving the input data to the correct device.
    batch_weight_key : `str`, optional (default=`None`)
        If given, this is a key in the output dictionary for each batch that specifies how to weight
        the loss for that batch.  If this is not given, we use a weight of 1 for every batch.
    metrics_output_file : `str`, optional (default=`None`)
        Optional path to write the final metrics to.
    predictions_output_file : `str`, optional (default=`None`)
        Optional path to write the predictions to.
    # Returns
    `Dict[str, Any]`
        The final metrics.
    """
    check_for_gpu(cuda_device)
    data_loader.set_target_device(int_to_device(cuda_device))
    predictions_file = (
        None if predictions_output_file is None else open(predictions_output_file, "w")
    )

    with torch.no_grad():
        model.eval()
        iterator = iter(data_loader)
        logger.info("Iterating over dataset")
        generator_tqdm = Tqdm.tqdm(iterator)

        # Number of batches in instances.
        batch_count = 0
        # Number of batches where the model produces a loss.
        loss_count = 0
        # Cumulative weighted loss
        total_loss = 0.0
        # Cumulative weight across all batches.
        total_weight = 0.0

        for batch in generator_tqdm:
            # create cf batch
            batch_cf = copy.deepcopy(batch)
            batch_cf['tokens'] = batch_cf.pop('cf_tokens')
            batch.pop('cf_tokens')
            
            batch_count += 1
            batch = nn_util.move_to_device(batch, cuda_device)
            output_dict = model(**batch)
            output_dict_cf = model(**batch_cf)
            output_dict['logits'] = output_dict['logits']-cf_wright*output_dict_cf['logits']
            probs = torch.nn.functional.softmax(output_dict['logits'], dim=-1)
           
            output_dict['probs'] = probs
            loss = output_dict.get("loss")

            metrics = model.get_metrics()

            if loss is not None:
                loss_count += 1
                if batch_weight_key:
                    weight = output_dict[batch_weight_key].item()
                else:
                    weight = 1.0

                total_weight += weight
                total_loss += loss.item() * weight
                # Report the average loss so far.
                metrics["loss"] = total_loss / total_weight

            if not HasBeenWarned.tqdm_ignores_underscores and any(
                metric_name.startswith("_") for metric_name in metrics
            ):
                logger.warning(
                    'Metrics with names beginning with "_" will '
                    "not be logged to the tqdm progress bar."
                )
                HasBeenWarned.tqdm_ignores_underscores = True
            description = (
                ", ".join(
                    [
                        "%s: %.2f" % (name, value)
                        for name, value in metrics.items()
                        if not name.startswith("_")
                    ]
                )
                + " ||"
            )
            generator_tqdm.set_description(description, refresh=False)

            if predictions_file is not None:
                predictions = json.dumps(sanitize(model.make_output_human_readable(output_dict)))
                predictions_file.write(predictions + "\n")

        if predictions_file is not None:
            predictions_file.close()
        
        # recaculate accuracy
        model._accuracy(output_dict['logits'], batch['label'])
        final_metrics = model.get_metrics(reset=True)
        if loss_count > 0:
            # Sanity check
            if loss_count != batch_count:
                raise RuntimeError(
                    "The model you are trying to evaluate only sometimes produced a loss!"
                )
            final_metrics["loss"] = total_loss / total_weight
            final_metrics["loss"] = "N/A"
        if output_file is not None:
            dump_metrics(output_file, final_metrics, log=True)

        return final_metrics


In [72]:
final_metrics = cf_evaluate(model,cf_data_loader)

0it [00:00, ?it/s]

In [73]:
final_metrics

{'accuracy': 0.6301725454267827, 'loss': 'N/A'}

In [69]:
validation_data_loader.batch_sampler.get_batch_indices()

TypeError: get_batch_indices() missing 1 required positional argument: 'instances'

In [None]:
# trace allennlp predict to create new evaluate code
new_x = reader.text_to_instance('','')
xtemp  = reader.apply_token_indexers(new_x)

In [None]:
new_x.index_fields

In [None]:
validation_dataset_list[0].fields['tokens'].tokens

In [48]:
test_x = validation_dataset_list[2]
out_normie = predictor.predict_instance(test_x)

In [13]:
logits = self._classification_layer(embedded_text)
probs = torch.nn.functional.softmax(logits, dim=-1)

output_dict = {"logits": logits, "probs": probs}
output_dict["token_ids"] = util.get_token_ids_from_text_field_tensors(tokens)
if label is not None:
    loss = self._loss(logits, label.long().view(-1))
    output_dict["loss"] = loss
    self._accuracy(logits, label)

return output_dict

NameError: name 'self' is not defined

In [40]:
# NLP

In [47]:
validation_data_loader.list_available()

ConfigurationError: Default implementation multiprocess is not registered