# Multilingual Contradiction and Entailment (BERT)

## Import Libraries

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import transformers

from sklearn import model_selection


# To use GPU
import torch

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
torch.cuda.is_available()
# model.to(device)


False

## Training Parameters

In [2]:
max_length = 280  # Maximum length of input sentence to the model.
batch_size = 32
epochs = 2

# Labels in our dataset.
# labels = ["contradiction", "entailment", "neutral"]
labels = ["entailment", "neutral", "contradiction"]

## Data Ingestion

In [3]:
train_df = pd.read_csv("train/train.csv")
test_df = pd.read_csv("test/test.csv")

print(f"Total train samples : {train_df.shape[0]}")
print(f"Total test samples: {test_df.shape[0]}")

Total train samples : 12120
Total test samples: 5195


In [4]:
train_df.head()

Unnamed: 0,id,premise,hypothesis,lang_abv,language,label
0,5130fd2cb5,and these comments were considered in formulat...,The rules developed in the interim were put to...,en,English,0
1,5b72532a0b,These are issues that we wrestle with in pract...,Practice groups are not permitted to work on t...,en,English,2
2,3931fbe82a,Des petites choses comme celles-là font une di...,J'essayais d'accomplir quelque chose.,fr,French,0
3,5622f0c60b,you know they can't really defend themselves l...,They can't defend themselves because of their ...,en,English,0
4,86aaa48b45,ในการเล่นบทบาทสมมุติก็เช่นกัน โอกาสที่จะได้แสด...,เด็กสามารถเห็นได้ว่าชาติพันธุ์แตกต่างกันอย่างไร,th,Thai,1


In [5]:
test_df.head()

Unnamed: 0,id,premise,hypothesis,lang_abv,language
0,c6d58c3f69,بکس، کیسی، راہیل، یسعیاہ، کیلی، کیلی، اور کولم...,"کیسی کے لئے کوئی یادگار نہیں ہوگا, کولمین ہائی...",ur,Urdu
1,cefcc82292,هذا هو ما تم نصحنا به.,عندما يتم إخبارهم بما يجب عليهم فعله ، فشلت ال...,ar,Arabic
2,e98005252c,et cela est en grande partie dû au fait que le...,Les mères se droguent.,fr,French
3,58518c10ba,与城市及其他公民及社区组织代表就IMA的艺术发展进行对话&amp,IMA与其他组织合作，因为它们都依靠共享资金。,zh,Chinese
4,c32b0d16df,Она все еще была там.,"Мы думали, что она ушла, однако, она осталась.",ru,Russian


In [6]:
print("Number of missing values")
print(train_df.isnull().sum())
train_df.dropna(axis=0, inplace=True)

Number of missing values
id            0
premise       0
hypothesis    0
lang_abv      0
language      0
label         0
dtype: int64


## Data Details

In [7]:
print(train_df.groupby(['label','lang_abv']).count())

                  id  premise  hypothesis  language
label lang_abv                                     
0     ar         124      124         124       124
      bg         123      123         123       123
      de         108      108         108       108
      el         120      120         120       120
      en        2427     2427        2427      2427
      es         118      118         118       118
      fr         133      133         133       133
      hi         125      125         125       125
      ru         132      132         132       132
      sw         140      140         140       140
      th         121      121         121       121
      tr         110      110         110       110
      ur         133      133         133       133
      vi         122      122         122       122
      zh         140      140         140       140
1     ar         129      129         129       129
      bg         111      111         111       111
      de    

In [8]:
print('Max. length of \'premise\' inputs', max(train_df['premise'].str.len()))
print('Max. length of \'hypothesis\' inputs', max(train_df['hypothesis'].str.len()))

max_length = max(max(train_df['premise'].str.len()), max(train_df['hypothesis'].str.len()))

# Max input size for Bert model is 512
if max_length > 512:
    max_length = 512

Max. length of 'premise' inputs 967
Max. length of 'hypothesis' inputs 276


In [9]:
train_df['train_data'] = train_df[['premise', 'hypothesis']].agg(', '.join, axis = 1)

train_df['train_data'][0]

'and these comments were considered in formulating the interim rules., The rules developed in the interim were put together with these comments in mind.'

## Splitting to training and validation sets

In [10]:
# Splitting to training and validation set
x = train_df['train_data']
y = train_df['label']

# x = train_df['train_data'][:50]
# y = train_df['label'][:50]

x_train, x_test, y_train, y_test = model_selection.train_test_split(x, y, test_size = 0.1, train_size = 0.9, 
                                                                    random_state = 2021, stratify = y)

## Global Functions

In [11]:
class BertSemanticDataGenerator(tf.keras.utils.Sequence):
    """Generates batches of data.

    Args:
        sentence_pairs: Array of premise and hypothesis input sentences.
        labels: Array of labels.
        batch_size: Integer batch size.
        shuffle: boolean, whether to shuffle the data.
        include_targets: boolean, whether to incude the labels.

    Returns:
        Tuples `([input_ids, attention_mask, `token_type_ids], labels)`
        (or just `[input_ids, attention_mask, `token_type_ids]`
         if `include_targets=False`)
    """

    def __init__(
        self,
        sentence_pairs,
        labels,
        batch_size=batch_size,
        shuffle=True,
        include_targets=True,
    ):
        self.sentence_pairs = sentence_pairs
        self.labels = labels
        self.shuffle = shuffle
        self.batch_size = batch_size
        self.include_targets = include_targets
        # Load our BERT Tokenizer to encode the text.
        # We will use base-base-uncased pretrained model.
        self.tokenizer = transformers.BertTokenizer.from_pretrained(
            "bert-base-uncased", do_lower_case=True
        )
        self.indexes = np.arange(len(self.sentence_pairs))
        self.on_epoch_end()

    def __len__(self):
        # Denotes the number of batches per epoch.
        return len(self.sentence_pairs) // self.batch_size

    def __getitem__(self, idx):
        # Retrieves the batch of index.
        indexes = self.indexes[idx * self.batch_size : (idx + 1) * self.batch_size]
        sentence_pairs = self.sentence_pairs[indexes]

        # With BERT tokenizer's batch_encode_plus batch of both the sentences are
        # encoded together and separated by [SEP] token.
        encoded = self.tokenizer.batch_encode_plus(
            sentence_pairs.tolist(),
            add_special_tokens=True,
            max_length=max_length,
            return_attention_mask=True,
            return_token_type_ids=True,
            pad_to_max_length=True,
            return_tensors="tf",
        )

        # Convert batch of encoded features to numpy array.
        input_ids = np.array(encoded["input_ids"], dtype="int32")
        attention_masks = np.array(encoded["attention_mask"], dtype="int32")
        token_type_ids = np.array(encoded["token_type_ids"], dtype="int32")
        
        # Set to true if data generator is used for training/validation.
        if self.include_targets:
            labels = np.array(self.labels[indexes], dtype="int32")
            return [input_ids, attention_masks, token_type_ids], labels
        else:
            return [input_ids, attention_masks, token_type_ids]

    def on_epoch_end(self):
        # Shuffle indexes after each epoch if shuffle is set to True.
        if self.shuffle:
            np.random.RandomState(42).shuffle(self.indexes)

## Model (Bert + LSTM)

In [12]:
# Create the model under a distribution strategy scope.
strategy = tf.distribute.MirroredStrategy()

with strategy.scope():
    # Encoded token ids from BERT tokenizer.
    input_ids = tf.keras.layers.Input(
        shape=(max_length,), dtype=tf.int32, name="input_ids"
    )
    # Attention masks indicates to the model which tokens should be attended to.
    attention_masks = tf.keras.layers.Input(
        shape=(max_length,), dtype=tf.int32, name="attention_masks"
    )
    # Token type ids are binary masks identifying different sequences in the model.
    token_type_ids = tf.keras.layers.Input(
        shape=(max_length,), dtype=tf.int32, name="token_type_ids"
    )
    # Loading pretrained BERT model.
    bert_model = transformers.TFBertModel.from_pretrained("bert-base-multilingual-cased")
    # Freeze the BERT model to reuse the pretrained features without modifying them.
    bert_model.trainable = False

#     sequence_output, pooled_output = bert_model(
#         input_ids, attention_mask=attention_masks, token_type_ids=token_type_ids
#     )
    
    # Updated for transformer v3.xx
    bm = bert_model(
        input_ids, attention_mask=attention_masks, token_type_ids=token_type_ids
    )
    # Add trainable layers on top of frozen layers to adapt the pretrained features on the new data.
    bi_lstm = tf.keras.layers.Bidirectional(
        tf.keras.layers.LSTM(64, return_sequences=True)
    )(bm['last_hidden_state']) # Updated for transformer v3.xx
    # Applying hybrid pooling approach to bi_lstm sequence output.
    avg_pool = tf.keras.layers.GlobalAveragePooling1D()(bi_lstm)
    max_pool = tf.keras.layers.GlobalMaxPooling1D()(bi_lstm)
    concat = tf.keras.layers.concatenate([avg_pool, max_pool])
    dropout = tf.keras.layers.Dropout(0.3)(concat)
    output = tf.keras.layers.Dense(3, activation="softmax")(dropout)
    model = tf.keras.models.Model(
        inputs=[input_ids, attention_masks, token_type_ids], outputs=output
    )

    model.compile(
        optimizer=tf.keras.optimizers.Adam(),
        loss="categorical_crossentropy",
        metrics=["acc"],
    )

    

    
print(f"Strategy: {strategy}")
model.summary()


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


Some layers from the model checkpoint at bert-base-multilingual-cased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-multilingual-cased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method
Strategy: <tensorflow.python.distribute.mirrored_strategy.MirroredStrategy object at 0x00000266F71EC5C8>
Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_ids (InputLayer)          [(None, 512)]        0                                            
_____________________________

## Model Training

In [13]:
train_data = BertSemanticDataGenerator(
#     x_train.reset_index(drop=True),
    x_train.values,
    tf.keras.utils.to_categorical(y_train),
    batch_size=batch_size,
    shuffle=True,
)
valid_data = BertSemanticDataGenerator(
    x_test.values,
    tf.keras.utils.to_categorical(y_test),
    batch_size=batch_size,
    shuffle=False,
)

In [14]:
history = model.fit(
    train_data,
    validation_data=valid_data,
    epochs=epochs,
    use_multiprocessing=True,
    workers=-1,
)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Epoch 1/2
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tenso

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
Epoch 2/2


In [15]:
# # Unfreeze the bert_model.
bert_model.trainable = True

# Fine-tune from this layer onwards
fine_tune_at = 10

# Freeze all the layers before the `fine_tune_at` layer
for layer in bert_model.layers[:fine_tune_at]:
  layer.trainable =  False


# Recompile the model to make the change effective.
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-5),
    loss="categorical_crossentropy",
    metrics=["accuracy"],
)
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_ids (InputLayer)          [(None, 512)]        0                                            
__________________________________________________________________________________________________
attention_masks (InputLayer)    [(None, 512)]        0                                            
__________________________________________________________________________________________________
token_type_ids (InputLayer)     [(None, 512)]        0                                            
__________________________________________________________________________________________________
tf_bert_model (TFBertModel)     TFBaseModelOutputWit 177853440   input_ids[0][0]                  
______________________________________________________________________________________________

In [16]:
history = model.fit(
    train_data,
    validation_data=valid_data,
    epochs=epochs,
    use_multiprocessing=True,
    workers=-1,
)

Epoch 1/2
Epoch 2/2


## Saving Model

In [17]:
# Saving the model
model.save_weights('mcae.h5')

In [18]:
help(history)

Help on History in module tensorflow.python.keras.callbacks object:

class History(Callback)
 |  Callback that records events into a `History` object.
 |  
 |  This callback is automatically applied to
 |  every Keras model. The `History` object
 |  gets returned by the `fit` method of models.
 |  
 |  Method resolution order:
 |      History
 |      Callback
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __init__(self)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  on_epoch_end(self, epoch, logs=None)
 |      Called at the end of an epoch.
 |      
 |      Subclasses should override for any actions to run. This function should only
 |      be called during TRAIN mode.
 |      
 |      Arguments:
 |          epoch: integer, index of epoch.
 |          logs: dict, metric results for this training epoch, and for the
 |            validation epoch if validation is performed. Validation result keys
 |            are prefixed with `val_`.
 |

## Model Evaluation

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import transformers

from sklearn import model_selection


# To use GPU
import torch

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
torch.cuda.is_available()
# model.to(device)


False

In [2]:
max_length = 280  # Maximum length of input sentence to the model.
batch_size = 32
epochs = 2

# Labels in our dataset.
# labels = ["contradiction", "entailment", "neutral"]
labels = ["entailment", "neutral", "contradiction"]

In [3]:
class BertSemanticDataGenerator(tf.keras.utils.Sequence):
    """Generates batches of data.

    Args:
        sentence_pairs: Array of premise and hypothesis input sentences.
        labels: Array of labels.
        batch_size: Integer batch size.
        shuffle: boolean, whether to shuffle the data.
        include_targets: boolean, whether to incude the labels.

    Returns:
        Tuples `([input_ids, attention_mask, `token_type_ids], labels)`
        (or just `[input_ids, attention_mask, `token_type_ids]`
         if `include_targets=False`)
    """

    def __init__(
        self,
        sentence_pairs,
        labels,
        batch_size=batch_size,
        shuffle=True,
        include_targets=True,
    ):
        self.sentence_pairs = sentence_pairs
        self.labels = labels
        self.shuffle = shuffle
        self.batch_size = batch_size
        self.include_targets = include_targets
        # Load our BERT Tokenizer to encode the text.
        # We will use base-base-uncased pretrained model.
        self.tokenizer = transformers.BertTokenizer.from_pretrained(
            "bert-base-uncased", do_lower_case=True
        )
        self.indexes = np.arange(len(self.sentence_pairs))
        self.on_epoch_end()

    def __len__(self):
        # Denotes the number of batches per epoch.
        return len(self.sentence_pairs) // self.batch_size

    def __getitem__(self, idx):
        # Retrieves the batch of index.
        indexes = self.indexes[idx * self.batch_size : (idx + 1) * self.batch_size]
        sentence_pairs = self.sentence_pairs[indexes]

        # With BERT tokenizer's batch_encode_plus batch of both the sentences are
        # encoded together and separated by [SEP] token.
        encoded = self.tokenizer.batch_encode_plus(
            sentence_pairs.tolist(),
            add_special_tokens=True,
            max_length=max_length,
            return_attention_mask=True,
            return_token_type_ids=True,
            pad_to_max_length=True,
            return_tensors="tf",
        )

        # Convert batch of encoded features to numpy array.
        input_ids = np.array(encoded["input_ids"], dtype="int32")
        attention_masks = np.array(encoded["attention_mask"], dtype="int32")
        token_type_ids = np.array(encoded["token_type_ids"], dtype="int32")
        
        # Set to true if data generator is used for training/validation.
        if self.include_targets:
            labels = np.array(self.labels[indexes], dtype="int32")
            return [input_ids, attention_masks, token_type_ids], labels
        else:
            return [input_ids, attention_masks, token_type_ids]

    def on_epoch_end(self):
        # Shuffle indexes after each epoch if shuffle is set to True.
        if self.shuffle:
            np.random.RandomState(42).shuffle(self.indexes)

In [4]:
# Create the model under a distribution strategy scope.
strategy = tf.distribute.MirroredStrategy()

with strategy.scope():
    # Encoded token ids from BERT tokenizer.
    input_ids = tf.keras.layers.Input(
        shape=(max_length,), dtype=tf.int32, name="input_ids"
    )
    # Attention masks indicates to the model which tokens should be attended to.
    attention_masks = tf.keras.layers.Input(
        shape=(max_length,), dtype=tf.int32, name="attention_masks"
    )
    # Token type ids are binary masks identifying different sequences in the model.
    token_type_ids = tf.keras.layers.Input(
        shape=(max_length,), dtype=tf.int32, name="token_type_ids"
    )
    # Loading pretrained BERT model.
    bert_model = transformers.TFBertModel.from_pretrained("bert-base-multilingual-cased")
    # Freeze the BERT model to reuse the pretrained features without modifying them.
    bert_model.trainable = False

#     sequence_output, pooled_output = bert_model(
#         input_ids, attention_mask=attention_masks, token_type_ids=token_type_ids
#     )
    
    # Updated for transformer v3.xx
    bm = bert_model(
        input_ids, attention_mask=attention_masks, token_type_ids=token_type_ids
    )
    # Add trainable layers on top of frozen layers to adapt the pretrained features on the new data.
    bi_lstm = tf.keras.layers.Bidirectional(
        tf.keras.layers.LSTM(64, return_sequences=True)
    )(bm['last_hidden_state']) # Updated for transformer v3.xx
    # Applying hybrid pooling approach to bi_lstm sequence output.
    avg_pool = tf.keras.layers.GlobalAveragePooling1D()(bi_lstm)
    max_pool = tf.keras.layers.GlobalMaxPooling1D()(bi_lstm)
    concat = tf.keras.layers.concatenate([avg_pool, max_pool])
    dropout = tf.keras.layers.Dropout(0.3)(concat)
    output = tf.keras.layers.Dense(3, activation="softmax")(dropout)
    model = tf.keras.models.Model(
        inputs=[input_ids, attention_masks, token_type_ids], outputs=output
    )

    model.compile(
        optimizer=tf.keras.optimizers.Adam(),
        loss="categorical_crossentropy",
        metrics=["acc"],
    )

    

    
print(f"Strategy: {strategy}")
model.summary()


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


Some layers from the model checkpoint at bert-base-multilingual-cased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-multilingual-cased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method
Strategy: <tensorflow.python.distribute.mirrored_strategy.MirroredStrategy object at 0x000001C40393BC48>
Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_ids (InputLayer)          [(None, 280)]        0                                            
_____________________________

In [5]:
model.load_weights('mcae.h5')

In [6]:
train_df = pd.read_csv("train/train.csv")

train_df['test_data'] = train_df[['premise', 'hypothesis']].agg(', '.join, axis = 1)

train_df['test_data'][0]

'and these comments were considered in formulating the interim rules., The rules developed in the interim were put together with these comments in mind.'

In [7]:
train_df

Unnamed: 0,id,premise,hypothesis,lang_abv,language,label,test_data
0,5130fd2cb5,and these comments were considered in formulat...,The rules developed in the interim were put to...,en,English,0,and these comments were considered in formulat...
1,5b72532a0b,These are issues that we wrestle with in pract...,Practice groups are not permitted to work on t...,en,English,2,These are issues that we wrestle with in pract...
2,3931fbe82a,Des petites choses comme celles-là font une di...,J'essayais d'accomplir quelque chose.,fr,French,0,Des petites choses comme celles-là font une di...
3,5622f0c60b,you know they can't really defend themselves l...,They can't defend themselves because of their ...,en,English,0,you know they can't really defend themselves l...
4,86aaa48b45,ในการเล่นบทบาทสมมุติก็เช่นกัน โอกาสที่จะได้แสด...,เด็กสามารถเห็นได้ว่าชาติพันธุ์แตกต่างกันอย่างไร,th,Thai,1,ในการเล่นบทบาทสมมุติก็เช่นกัน โอกาสที่จะได้แสด...
...,...,...,...,...,...,...,...
12115,2b78e2a914,The results of even the most well designed epi...,All studies have the same amount of uncertaint...,en,English,2,The results of even the most well designed epi...
12116,7e9943d152,But there are two kinds of the pleasure of do...,But there are two kinds of the pleasure of doi...,en,English,0,But there are two kinds of the pleasure of do...
12117,5085923e6c,The important thing is to realize that it's wa...,"It cannot be moved, now or ever.",en,English,2,The important thing is to realize that it's wa...
12118,fc8e2fd1fe,At the west end is a detailed model of the who...,The model temple complex is at the east end.,en,English,2,At the west end is a detailed model of the who...


In [8]:
test_df = pd.read_csv("test/test.csv")

test_df['test_data'] = test_df[['premise', 'hypothesis']].agg(', '.join, axis = 1)

test_df['test_data'][0]

'بکس، کیسی، راہیل، یسعیاہ، کیلی، کیلی، اور کولمبین ہائی اسکول کے دوسرے طلبا کے نام سے بکسوں کو نشان زد کیا جائے گا جس نے اس سال پہلے اپنی زندگی کھو دی, کیسی کے لئے کوئی یادگار نہیں ہوگا, کولمین ہائی اسکول کے طالب علموں میں سے ایک جو مر گیا.'

In [9]:
test_df

Unnamed: 0,id,premise,hypothesis,lang_abv,language,test_data
0,c6d58c3f69,بکس، کیسی، راہیل، یسعیاہ، کیلی، کیلی، اور کولم...,"کیسی کے لئے کوئی یادگار نہیں ہوگا, کولمین ہائی...",ur,Urdu,بکس، کیسی، راہیل، یسعیاہ، کیلی، کیلی، اور کولم...
1,cefcc82292,هذا هو ما تم نصحنا به.,عندما يتم إخبارهم بما يجب عليهم فعله ، فشلت ال...,ar,Arabic,"هذا هو ما تم نصحنا به., عندما يتم إخبارهم بما ..."
2,e98005252c,et cela est en grande partie dû au fait que le...,Les mères se droguent.,fr,French,et cela est en grande partie dû au fait que le...
3,58518c10ba,与城市及其他公民及社区组织代表就IMA的艺术发展进行对话&amp,IMA与其他组织合作，因为它们都依靠共享资金。,zh,Chinese,"与城市及其他公民及社区组织代表就IMA的艺术发展进行对话&amp, IMA与其他组织合作，因..."
4,c32b0d16df,Она все еще была там.,"Мы думали, что она ушла, однако, она осталась.",ru,Russian,"Она все еще была там., Мы думали, что она ушла..."
...,...,...,...,...,...,...
5190,5f90dd59b0,نیند نے وعدہ کیا کہ موٹل نے سوال میں تحقیق کی.,نیمیتھ کو موٹل کی تفتیش کے لئے معاوضہ دیا جارہ...,ur,Urdu,نیند نے وعدہ کیا کہ موٹل نے سوال میں تحقیق کی....
5191,f357a04e86,The rock has a soft texture and can be bough...,The rock is harder than most types of rock.,en,English,The rock has a soft texture and can be bough...
5192,1f0ea92118,她目前的存在，并考虑到他与沃佛斯顿争执的本质，那是尴尬的。,她在与Wolverstone的打斗结束后才在场的事实被看作是很尴尬的。,zh,Chinese,"她目前的存在，并考虑到他与沃佛斯顿争执的本质，那是尴尬的。, 她在与Wolverstone的..."
5193,0407b48afb,isn't it i can remember i've only been here ei...,I could see downtown Dallas from where I lived...,en,English,isn't it i can remember i've only been here ei...


In [10]:
def check_similarity(sentence1, sentence2, num):
    sentence_pairs = np.array([[str(sentence1), str(sentence2)]])
    test_data = BertSemanticDataGenerator(
        sentence_pairs, labels=None, batch_size=1, shuffle=False, include_targets=False,
    )

    proba = model.predict(test_data)[0]
    idx = np.argmax(proba)
    proba = f"{proba[idx]: .2f}%"
    pred = labels[idx]
    
    print('Language:', train_df['language'][num])
    print(f'Sentences compared:\n 1) {sentence1} \n 2) {sentence2}')
    print('Predicted relevance:', pred)
    print('Actual relevance:', labels[int(train_df['label'][num])])
    
#     return pred, proba
#     return idx
#     return sentence_pairs, pred, labels[int(train_df['label'][num])]


In [11]:
from random import randint

num = randint(0, len(train_df)-1)

check_similarity(train_df['premise'][num], train_df['hypothesis'][num], num)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Language: English
Sentences compared:
 1) Emergency physician attitudes concerning intervention for alcohol abuse/dependence in the emergency department. 
 2) Physicians have different attitudes concerning substance abuse in the ER.
Predicted relevance: contradiction
Actual relevance: neutral


In [12]:
from random import randint

num = randint(0, len(train_df)-1)

check_similarity(train_df['premise'][num], train_df['hypothesis'][num], num)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Language: French
Sentences compared:
 1) Le plan doit également identifier la méthode d'acquisition, les principaux points d'entrée / de sortie, un plan de formation officiel et un plan d'urgence pour minimiser les pertes. 
 2) Le plan devrait également inclure un budget.
Predicted relevance: neutral
Actual relevance: neutral


In [13]:
from random import randint

num = randint(0, len(train_df)-1)

check_similarity(train_df['premise'][num], train_df['hypothesis'][num], num)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Language: Turkish
Sentences compared:
 1) KSM, Moussaoui'nin Jarrah için muhtemel yedek pilot olarak hazırlanabilmesi için Binalshibh'e Moussaoui''ye para göndermesi konusunda talimat vermiş olabilir. 
 2) KSM hiçbir zaman Binalshibh ile konuşmadı.
Predicted relevance: contradiction
Actual relevance: contradiction


In [14]:
from random import randint

num = randint(0, len(train_df)-1)

check_similarity(train_df['premise'][num], train_df['hypothesis'][num], num)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Language: English
Sentences compared:
 1) Strategic parents might spend a large portion of their tax cuts, causing interest rates to rise. 
 2) More spending on goods will not cause higher interest rates.
Predicted relevance: neutral
Actual relevance: contradiction


In [15]:
from random import randint

num = randint(0, len(train_df)-1)

check_similarity(train_df['premise'][num], train_df['hypothesis'][num], num)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Language: Chinese
Sentences compared:
 1) Squamish镇以其在八月举办的滚动比赛而闻名，是前往加里波第省立公园徒步旅行者的有用基地。 
 2) 斯夸米什是水上踩滚木竞赛开始的地方。
Predicted relevance: entailment
Actual relevance: neutral


In [16]:
from random import randint

num = randint(0, len(train_df)-1)

check_similarity(train_df['premise'][num], train_df['hypothesis'][num], num)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Language: Turkish
Sentences compared:
 1) Bir uçak alev alsa bile, ki neden yansın, radyasyonun sızması için kurşundan yapılan kısımların erimesi gerekir. 
 2) Uçak yandıktan sonra radyasyon bir aktarma parçasından sızacaktır.
Predicted relevance: neutral
Actual relevance: entailment


In [17]:
from random import randint

num = randint(0, len(train_df)-1)

check_similarity(train_df['premise'][num], train_df['hypothesis'][num], num)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Language: Arabic
Sentences compared:
 1) لقد رحلت بالفعل وأخبرتني ألا أقلق على ذلك. 
 2) قالت أن الوقت قد حان لنشعر بالذعر.
Predicted relevance: contradiction
Actual relevance: contradiction


In [18]:
from random import randint

num = randint(0, len(train_df)-1)

check_similarity(train_df['premise'][num], train_df['hypothesis'][num], num)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Language: English
Sentences compared:
 1) i don't know um-hum 
 2) I know very well.
Predicted relevance: contradiction
Actual relevance: contradiction


In [19]:
from random import randint

num = randint(0, len(train_df)-1)

check_similarity(train_df['premise'][num], train_df['hypothesis'][num], num)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Language: English
Sentences compared:
 1) What about the hole?" They scanned the cliff-side narrowly. 
 2) They looked from the top of the cliff for the hole.
Predicted relevance: contradiction
Actual relevance: neutral


In [20]:
from random import randint

num = randint(0, len(train_df)-1)

check_similarity(train_df['premise'][num], train_df['hypothesis'][num], num)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Language: English
Sentences compared:
 1) The Gaiety Theatre in South King Street is worth visiting for its ornate d??cor. 
 2) The Gaiety Theatre is decorated very ornately.
Predicted relevance: contradiction
Actual relevance: entailment
