In [1]:
import json
import random
from transformers import AutoTokenizer
import numpy as np
from allennlp.common.util import import_module_and_submodules as import_submodules
from allennlp.models.archival import load_archive
from allennlp.predictors import Predictor
from scipy.spatial import distance

import sys
sys.path.append('/home/alonj/contrastive')

import_submodules("contrastive_allennlp")

DATASET="mnli"
MODEL_NAME="roberta-large"
model_path=f"../s3-link/experiments/models/{DATASET}/{MODEL_NAME}"

archive = load_archive(model_path + '/model.tar.gz')
print(archive.config)
archive.config['dataset_reader']['type'] = 'esnli'  # comment out for stained!!!
# archive.config['dataset_reader']['type'] = 'esnli'
archive.config['model']['output_hidden_states'] = True
model = archive.model
model._output_hidden_states = True
predictor = Predictor.from_archive(archive, 'textual_entailment_fixed')

tok = AutoTokenizer.from_pretrained("roberta-large")

with open(model_path + "/label2index.json", "r") as f:
    label2index = json.load(f)
    index2label = {label2index[k]: k for k in label2index}
label2index


Params({'model': {'dropout': 0.1, 'feedforward': {'activations': 'tanh', 'hidden_dims': 1024, 'input_dim': 1024, 'num_layers': 1}, 'namespace': 'tags', 'seq2vec_encoder': {'embedding_dim': 1024, 'type': 'cls_pooler'}, 'text_field_embedder': {'token_embedders': {'tokens': {'max_length': 512, 'model_name': 'roberta-large', 'type': 'pretrained_transformer'}}}, 'type': 'encoder_classifier'}, 'trainer': {'cuda_device': 0, 'learning_rate_scheduler': {'cut_frac': 0.06, 'type': 'slanted_triangular'}, 'num_epochs': 20, 'optimizer': {'lr': 2e-06, 'type': 'huggingface_adamw', 'weight_decay': 0.1}, 'patience': 5, 'validation_metric': '+accuracy'}, 'test_data_path': 'https://allennlp.s3.amazonaws.com/datasets/multinli/multinli_1.0_dev_mismatched.jsonl', 'train_data_path': 'https://allennlp.s3.amazonaws.com/datasets/multinli/multinli_1.0_train.jsonl', 'data_loader': {'batch_sampler': {'batch_size': 40, 'type': 'bucket'}}, 'dataset_reader': {'token_indexers': {'tokens': {'max_length': 512, 'model_nam

{'entailment': 0, 'contradiction': 1, 'neutral': 2}

In [2]:
def all_masks(tokenized_text):
    # https://stackoverflow.com/questions/1482308/how-to-get-all-subsets-of-a-set-powerset
    # WITHOUT empty and full sets!
    s = list(range(len(tokenized_text)))
    x = len(s)
    masks = [1 << i for i in range(x)]
    #     for i in range(1 << x):  # empty and full sets included here
    for i in range(1, 1 << x - 1):
        yield [ss for mask, ss in zip(masks, s) if i & mask]
        
def all_consecutive_masks(tokenized_text, max_length = -1):
    # WITHOUT empty and full sets!
    s = list(range(len(tokenized_text)))
    x = len(s)
    for i in range(x):
        for j in range(i+1, x):
            mask = s[:i] + s[j:]
            if max_length > 0:
                if j - i >= max_length:
                    yield mask
            else:
                yield mask
                
def all_consecutive_masks2(tokenized_text, max_length = -1):
    # WITHOUT empty and full sets!
    s = list(range(len(tokenized_text)))
    x = len(s)
    for i in range(x+1):
        for j in range(i+1, x+1):
            mask = s[i:j]
            if max_length > 0:
                if j - i <= max_length:
                    yield mask
            else:
                yield mask
            
text = "A man selling donuts to a customer.".split()
for mask in all_consecutive_masks2(text, 4):
    masked_text = list(text)
    for i in mask:
        masked_text[i] = '<mask>'
    print(' '.join(masked_text))

<mask> man selling donuts to a customer.
<mask> <mask> selling donuts to a customer.
<mask> <mask> <mask> donuts to a customer.
<mask> <mask> <mask> <mask> to a customer.
A <mask> selling donuts to a customer.
A <mask> <mask> donuts to a customer.
A <mask> <mask> <mask> to a customer.
A <mask> <mask> <mask> <mask> a customer.
A man <mask> donuts to a customer.
A man <mask> <mask> to a customer.
A man <mask> <mask> <mask> a customer.
A man <mask> <mask> <mask> <mask> customer.
A man selling <mask> to a customer.
A man selling <mask> <mask> a customer.
A man selling <mask> <mask> <mask> customer.
A man selling <mask> <mask> <mask> <mask>
A man selling donuts <mask> a customer.
A man selling donuts <mask> <mask> customer.
A man selling donuts <mask> <mask> <mask>
A man selling donuts to <mask> customer.
A man selling donuts to <mask> <mask>
A man selling donuts to a <mask>


In [190]:
# ex = {
#       "gold_label": "entailment", 
#       "Sentence1": "A man selling donuts to a customer during a world exhibition event held in the city of Angeles", 
#       "Sentence2": "A man selling donuts to a customer."
#      }

ex = {'Sentence1': 'A woman is holding a sign that says honk to indict bush.', 
      'Sentence2': 'The woman is touching the sign.', 'gold_label': 'entailment'}
ex = {'Sentence1': 'A basset hound is tied to a doorway in an alley in front of a man and woman.', 
      'Sentence2': 'The dog is outside.', 'gold_label': 'entailment'}


ex = {'Sentence1': 'A soccer game in a large area with 8 yellow players and 4 black players.', 
      'Sentence2': 'There is a soccer game with 12 players.', 'gold_label': 'entailment'}
# ex = {'Sentence1': 'A photographer snaps a midair action shot of a snowboarder.', 
#       'Sentence2': 'The midair shot snaps at a boarder of snow.', 'gold_label': 'contradiction'}

if ex['gold_label'] == 'entailment' or ex['gold_label'] == 'contradiction':
    ex['Sentence1'] = ex['Sentence1'] + " ;"
    ex['Sentence2'] = ex['Sentence2'] + " ;"
else:
    ex['Sentence1'] = ex['Sentence1'] + " ."
    ex['Sentence2'] = ex['Sentence2'] + " ."

foil = ex['gold_label']
# foil = 'neutral'

# out = predictor.predict(premise=ex['Sentence1'], hypothesis=ex['Sentence2'])
out = predictor.predict_json(ex)
encoded_orig = out['encoded_representations']

fact = out['label']
print('Predicted: ', fact)

ex['Sentence1'] = ex['Sentence1'].split()
ex['Sentence2'] = ex['Sentence2'].split()

tok.convert_tokens_to_string(out['tokens'])

# print("\n")

masks1 = list(all_consecutive_masks2(ex['Sentence1'], max_length=1))
# masks1 = [[]]
masks2 = list(all_consecutive_masks2(ex['Sentence2'], max_length=1))
encoded = []
mask_mapping = []
preds = np.zeros(shape=(len(masks1), len(masks2)))

for m1_i, m1 in enumerate(masks1):
    masked1 = list(ex['Sentence1'])
    for i in m1:
        masked1[i] = '<mask>'
    masked1 = ' '.join(masked1)
        
    for m2_i, m2 in enumerate(masks2):
        masked2 = list(ex['Sentence2'])
        for i in m2:
            masked2[i] = '<mask>'
        masked2 = ' '.join(masked2)
            
        masked_ex = {
            "Sentence1": masked1,
            "Sentence2": masked2
        }
        
        masked_out = predictor.predict_json(masked_ex)
#         if masked_out['label'] != foil:
#             continue
        
        print(m1_i, m2_i)
        print(f"{masked1}\n{masked2}")
        print(masked_out['label'])
        encoded.append(masked_out['encoded_representations'])
        mask_mapping.append((m1_i, m2_i))
        
        print("====")
        
encoded = np.array(encoded)
        
# """
# TODO
# 1. Reverse the vec caching - save only the fact ones and save a mapping from matrix index to the mask indices
# 3. do all the mumbo jumbo from the other notebook
# """

        

Predicted:  entailment
0 0
<mask> soccer game in a large area with 8 yellow players and 4 black players. ;
<mask> is a soccer game with 12 players. ;
entailment
====
0 1
<mask> soccer game in a large area with 8 yellow players and 4 black players. ;
There <mask> a soccer game with 12 players. ;
entailment
====
0 2
<mask> soccer game in a large area with 8 yellow players and 4 black players. ;
There is <mask> soccer game with 12 players. ;
contradiction
====
0 3
<mask> soccer game in a large area with 8 yellow players and 4 black players. ;
There is a <mask> game with 12 players. ;
contradiction
====
0 4
<mask> soccer game in a large area with 8 yellow players and 4 black players. ;
There is a soccer <mask> with 12 players. ;
entailment
====
0 5
<mask> soccer game in a large area with 8 yellow players and 4 black players. ;
There is a soccer game <mask> 12 players. ;
entailment
====
0 6
<mask> soccer game in a large area with 8 yellow players and 4 black players. ;
There is a soccer gam

6 4
A soccer game in a large <mask> with 8 yellow players and 4 black players. ;
There is a soccer <mask> with 12 players. ;
entailment
====
6 5
A soccer game in a large <mask> with 8 yellow players and 4 black players. ;
There is a soccer game <mask> 12 players. ;
neutral
====
6 6
A soccer game in a large <mask> with 8 yellow players and 4 black players. ;
There is a soccer game with <mask> players. ;
entailment
====
6 7
A soccer game in a large <mask> with 8 yellow players and 4 black players. ;
There is a soccer game with 12 <mask> ;
contradiction
====
6 8
A soccer game in a large <mask> with 8 yellow players and 4 black players. ;
There is a soccer game with 12 players. <mask>
contradiction
====
7 0
A soccer game in a large area <mask> 8 yellow players and 4 black players. ;
<mask> is a soccer game with 12 players. ;
entailment
====
7 1
A soccer game in a large area <mask> 8 yellow players and 4 black players. ;
There <mask> a soccer game with 12 players. ;
entailment
====
7 2
A so

KeyboardInterrupt: 

In [17]:
# ex = {
#       "gold_label": "entailment", 
#       "Sentence1": "A man selling donuts to a customer during a world exhibition event held in the city of Angeles", 
#       "Sentence2": "A man selling donuts to a customer."
#      }

ex = {'Sentence1': 'A woman is holding a sign that says honk to indict bush.', 
      'Sentence2': 'The woman is touching the sign.', 'gold_label': 'entailment'}
# ex = {'Sentence1': 'A basset hound is tied to a doorway in an alley in front of a man and woman.', 
#       'Sentence2': 'The dog is outside.', 'gold_label': 'entailment'}

ex = {"Sentence1": "The Postal Service would undoubtedly begin", 
      "Sentence2": "The Postal Service will remain stagnant for years.", "gold_label": "contradiction"}

ex = {"Sentence1": "Patents can even be held on the genetic blueprints of various forms of life.", 
      "Sentence2": "You cannot patent living creatures.", "gold_label": "contradiction"}

ex = {"Sentence1": "A girl in a green knit cap stands ready to push a sled, with a little boy behind her.",
      "Sentence2": "A sad girl in a green knit cap stands ready to push a sled, with a little boy behind her. ", "gold_label": "neutral"}

ex = {'Sentence1': 'A soccer game in a large area with 8 yellow players and 4 black players.', 
      'Sentence2': 'There is a soccer game with 12 players.', 'gold_label': 'entailment'}
# # ex = {'Sentence1': 'A photographer snaps a midair action shot of a snowboarder.', 
# #       'Sentence2': 'The midair shot snaps at a boarder of snow.', 'gold_label': 'contradiction'}

# # if ex['gold_label'] == 'entailment' or ex['gold_label'] == 'contradiction':
# #     ex['Sentence1'] = ex['Sentence1'] + " ;"
# #     ex['Sentence2'] = ex['Sentence2'] + " ;"
# # else:
# #     ex['Sentence1'] = ex['Sentence1'] + " ."
# #     ex['Sentence2'] = ex['Sentence2'] + " ."

# ex = {'Sentence1': 'A young man sits at a workbench amidst a number of work tools including wire cutters working on what appears to be a large ornate necklace.', 'Sentence2': 'The necklace will be sold.', 'gold_label': 'neutral', 'prediction': 'neutral'}
# ex = {'Sentence1': 'A young man sits at a workbench amidst a number of work tools including wire cutters working on what appears to be a large ornate necklace.', 'Sentence2': 'The necklace will not be kept unsold.', 'gold_label': 'neutral', 'prediction': 'neutral'}

# ex = {'Sentence1': 'Two black dogs in the snow.', 
#       'Sentence2': 'Black puppies are frolicking among the freshly fallen snow.', 'gold_label': 'entailment', 'prediction': 'entailment'}
    

    
    
foil = ex['gold_label']
# foil = 'contradiction'

# out = predictor.predict(premise=ex['Sentence1'], hypothesis=ex['Sentence2'])
out = predictor.predict_json(ex)
encoded_orig = out['encoded_representations']

fact = out['label']
print('Predicted: ', fact)

ex['Sentence1'] = ex['Sentence1'].split()
ex['Sentence2'] = ex['Sentence2'].split()

tok.convert_tokens_to_string(out['tokens'])

# print("\n")

# masks1 = list(all_consecutive_masks2(ex['Sentence1'], max_length=1))
masks1 = [[]] 
masks2 = list(all_consecutive_masks2(ex['Sentence2'], max_length=1))
encoded = []
mask_mapping = []
preds = np.zeros(shape=(len(masks1), len(masks2)))

for m1_i, m1 in enumerate(masks1):
    masked1 = list(ex['Sentence1'])
    for i in m1:
        masked1[i] = '<mask>'
    masked1 = ' '.join(masked1)
        
    for m2_i, m2 in enumerate(masks2):
        masked2 = list(ex['Sentence2'])
        for i in m2:
            masked2[i] = '<mask>'
        masked2 = ' '.join(masked2)
            
        masked_ex = {
            "Sentence1": masked1,
            "Sentence2": masked2
        }
        
        masked_out = predictor.predict_json(masked_ex)
#         if masked_out['label'] != foil:
#             continue
        
        print(m1_i, m2_i)
        print(f"{masked1}\n{masked2}")
        print(masked_out['label'])
        encoded.append(masked_out['encoded_representations'])
        mask_mapping.append((m1_i, m2_i))
        
        print("====")
        
encoded = np.array(encoded)
        
# """
# TODO
# 1. Reverse the vec caching - save only the fact ones and save a mapping from matrix index to the mask indices
# 3. do all the mumbo jumbo from the other notebook
# """

        

Predicted:  contradiction
0 0
A soccer game in a large area with 8 yellow players and 4 black players.
<mask> is a soccer game with 12 players.
entailment
====
0 1
A soccer game in a large area with 8 yellow players and 4 black players.
There <mask> a soccer game with 12 players.
entailment
====
0 2
A soccer game in a large area with 8 yellow players and 4 black players.
There is <mask> soccer game with 12 players.
contradiction
====
0 3
A soccer game in a large area with 8 yellow players and 4 black players.
There is a <mask> game with 12 players.
contradiction
====
0 4
A soccer game in a large area with 8 yellow players and 4 black players.
There is a soccer <mask> with 12 players.
entailment
====
0 5
A soccer game in a large area with 8 yellow players and 4 black players.
There is a soccer game <mask> 12 players.
entailment
====
0 6
A soccer game in a large area with 8 yellow players and 4 black players.
There is a soccer game with <mask> players.
entailment
====
0 7
A soccer game i

In [18]:
foil = 'neutral'

import nullspace_projection.src.debias as debias

fact_idx = label2index[fact]
foil_idx = label2index[foil]
print('fact:', index2label[fact_idx])
print('foil:', index2label[foil_idx])
num_classifiers = 100

classifier_w = np.load(f"../s3-link/experiments/models/mnli/roberta-large/w.npy")
classifier_b = np.load(f"../s3-link/experiments/models/mnli/roberta-large/b.npy")

# contrastive_projection = np.load(model_path + f"/projections/{num_classifiers}/contrastive_{max(fact_idx, foil_idx)}_{min(fact_idx, foil_idx)}.npy")
# ambiguous_projection = np.load(model_path + f"/projections/{num_classifiers}/ambiguous_{max(fact_idx, foil_idx)}_{min(fact_idx, foil_idx)}.npy")
# classifiers = np.load(model_path + f"/projections/{num_classifiers}/classifiers_{max(fact, foil)}_{min(fact, foil)}.npy")
# classifiers = classifiers.reshape(num_classifiers, enc_size)
contrastive_projection = debias.get_rowspace_projection((classifier_w[fact_idx] - classifier_w[foil_idx]).reshape(1, -1))


fact: contradiction
foil: neutral


In [19]:

from scipy.stats import entropy
from scipy.special import softmax

z_all = encoded_orig 
z_h = encoded 
z_all_row = encoded_orig @ contrastive_projection
z_h_row = encoded @ contrastive_projection

prediction_probabilities = softmax(z_all_row @ classifier_w.T + classifier_b)
prediction_probabilities = np.tile(prediction_probabilities, (z_h_row.shape[0], 1))

prediction_probabilities_del = softmax(z_h_row @ classifier_w.T + classifier_b, axis=1)

# distances = entropy(prediction_probabilities_del, prediction_probabilities, axis=1)

distances = entropy(prediction_probabilities_del, prediction_probabilities, axis=1) + entropy(prediction_probabilities, prediction_probabilities_del, axis=1)

# distances2 = prediction_probabilities[:, fact_idx] - prediction_probabilities[:, foil_idx]
# distances3 = prediction_probabilities_del[:, fact_idx] - prediction_probabilities_del[:, foil_idx]

# distances = distances2 - distances3

print(' '.join(ex['Sentence1']))
print(' '.join(ex['Sentence2']))

print("=========\nFarthest masks")    
    
highlight_rankings = np.argsort(-distances)# + dist_h_nul)
# highlight_rankings = np.argsort(-dist_h)# - dist_h_nul)

for i in range(4):
    rank = highlight_rankings[i]
    m1_i, m2_i = mask_mapping[rank]
    
    masked1 = list(ex['Sentence1'])
    for k in masks1[m1_i]:
        masked1[k] = '<m>'
    masked1 = ' '.join(masked1)
    
    masked2 = list(ex['Sentence2'])
    for k in masks2[m2_i]:
        masked2[k] = '<m>'
    masked2 = ' '.join(masked2)
    
    print(masked1)
    print(masked2)
#     print(dist_h_row[rank])# - dist_h_nul[rank])
    print(np.round(distances[rank], 4))# - dist_h_nul[rank])


A soccer game in a large area with 8 yellow players and 4 black players.
There is a soccer game with 12 players.
Farthest masks
A soccer game in a large area with 8 yellow players and 4 black players.
There is a soccer game with 12 <m>
0.0803
A soccer game in a large area with 8 yellow players and 4 black players.
There is a soccer game <m> 12 players.
0.0652
A soccer game in a large area with 8 yellow players and 4 black players.
<m> is a soccer game with 12 players.
0.0356
A soccer game in a large area with 8 yellow players and 4 black players.
There <m> a soccer game with 12 players.
0.0049


In [9]:
encoded_orig = out['encoded_representations']


# ex['Sentence1'] = ex['Sentence1'].split()
# ex['Sentence2'] = ex['Sentence2'].split()

encoded @ contrastive_projection
z_all = encoded_orig 
z_h = encoded 
z_all_row = encoded_orig @ contrastive_projection
z_h_row = encoded @ contrastive_projection
# z_nh_row = dev_encodings_irrelevant @ contrastive_projection
# z_all_nul = encoded_orig @ ambiguous_projection
# z_h_nul = encoded @ ambiguous_projection
# z_nh_nul = dev_encodings_irrelevant @ ambiguous_projection

distances = []
for proj in z_h_row:
    prediction_probabilities = softmax(encodings @ classifier_w.T + classifier_b, axis=1)
    distances.append(entropy(prediction_probabilities_contrastive, prediction_probabilities, axis=1).mean())
#     c = distance.cdist([z_all_row], [proj], "cosine")
#     distances.append(c[0][0])
#     distances.append(np.linalg.norm(proj))
dist_h_row = np.array(distances)


distances = []
for proj in z_h:
    c = distance.cdist([z_all], [proj], "cosine")
    distances.append(c[0][0])
#     distances.append(np.linalg.norm(proj))
dist_h = np.array(distances)

distances = []
for proj in z_h_nul:
    distances.append(distance.cdist([z_all_nul], [proj], "cosine")[0][0])
dist_h_nul = np.array(distances)

# highlight_rankings = np.argsort(dist_h_row)# - dist_h_nul)
# # highlight_rankings = np.argsort(dist_h)# - dist_h_nul)

# print("Closest masks")

# for i in range(4):
#     rank = highlight_rankings[i]
#     m1_i, m2_i = mask_mapping[rank]
    
#     masked1 = list(ex['Sentence1'])
#     for k in masks1[m1_i]:
#         masked1[k] = '<m>'
#     masked1 = ' '.join(masked1)
    
#     masked2 = list(ex['Sentence2'])
#     for k in masks2[m2_i]:
#         masked2[k] = '<m>'
#     masked2 = ' '.join(masked2)
    
#     print(masked1)
#     print(masked2)
#     print(dist_h_row[rank])# - dist_h_nul[rank])
    
print("=========\nFarthest masks")    
    
highlight_rankings = np.argsort(-dist_h_row)# + dist_h_nul)
# highlight_rankings = np.argsort(-dist_h)# - dist_h_nul)

for i in range(4):
    rank = highlight_rankings[i]
    m1_i, m2_i = mask_mapping[rank]
    
    masked1 = list(ex['Sentence1'])
    for k in masks1[m1_i]:
        masked1[k] = '<m>'
    masked1 = ' '.join(masked1)
    
    masked2 = list(ex['Sentence2'])
    for k in masks2[m2_i]:
        masked2[k] = '<m>'
    masked2 = ' '.join(masked2)
    
    print(masked1)
    print(masked2)
#     print(dist_h_row[rank])# - dist_h_nul[rank])
    print(np.round(dist_h_row[rank], 4))# - dist_h_nul[rank])
    

Farthest masks
A photographer snaps a midair action shot of a <m> ;
The <m> shot snaps at a boarder of snow. ;
1.1229
A <m> snaps a midair action shot of a snowboarder. ;
The midair shot snaps at a <m> of snow. ;
1.0326
A photographer snaps a midair action shot of a <m> ;
The midair shot snaps <m> a boarder of snow. ;
1.0156
A photographer snaps a midair action shot of a <m> ;
The midair shot snaps at a <m> of snow. ;
0.9968


In [None]:
# input text -> tokenizer -> token indexer -> model -> loss (multiNLI)
# ===============================================
# INLP on MultiNLI -> rowspace, nullspace
# input text -> tokenizer -> token indexer -> model -> prettify -> label
# input text -> mask (text) -> tokenizer -> token indexer -> model -> rowspace/nullspace

In [77]:
encoded_orig = out['encoded_representations']

encoded @ contrastive_projection
z_all = encoded_orig 
z_h = encoded 

distances = []
for proj in z_h:
    c = distance.cdist([z_all], [proj], "cosine")
    distances.append(c[0][0])
#     distances.append(np.linalg.norm(proj))
dist_h = np.array(distances)


# highlight_rankings = np.argsort(dist_h_row)# - dist_h_nul)
# highlight_rankings = np.argsort(dist_h)# - dist_h_nul)

# print("Closest masks")

# for i in range(4):
#     rank = highlight_rankings[i]
#     m1_i, m2_i = mask_mapping[rank]
    
#     masked1 = list(ex['Sentence1'])
#     for k in masks1[m1_i]:
#         masked1[k] = '<m>'
#     masked1 = ' '.join(masked1)
    
#     masked2 = list(ex['Sentence2'])
#     for k in masks2[m2_i]:
#         masked2[k] = '<m>'
#     masked2 = ' '.join(masked2)
    
#     print(masked1)
#     print(masked2)
#     print(dist_h_row[rank])# - dist_h_nul[rank])
    
print("=========\nFarthest masks")    
    
# highlight_rankings = np.argsort(-dist_h_row)# + dist_h_nul)
highlight_rankings = np.argsort(-dist_h)# - dist_h_nul)

for i in range(4):
    rank = highlight_rankings[i]
    m1_i, m2_i = mask_mapping[rank]
    
    masked1 = list(ex['Sentence1'])
    for k in masks1[m1_i]:
        masked1[k] = '<m>'
    masked1 = ' '.join(masked1)
    
    masked2 = list(ex['Sentence2'])
    for k in masks2[m2_i]:
        masked2[k] = '<m>'
    masked2 = ' '.join(masked2)
    
    print(masked1)
    print(masked2)
#     print(dist_h_row[rank])# - dist_h_nul[rank])
    print(np.round(dist_h[rank], 4))# - dist_h_nul[rank])
    

Farthest masks
A photographer snaps a midair action shot of a snowboarder. <m>
The midair shot snaps at a boarder of snow. <m>
0.3373
A photographer snaps a midair action shot of a <m> ;
The <m> shot snaps at a boarder of snow. ;
0.3053
A <m> snaps a midair action shot of a snowboarder. ;
The midair shot snaps at a <m> of snow. ;
0.2773
A photographer snaps a midair action shot of a <m> ;
The midair shot snaps <m> a boarder of snow. ;
0.2714


In [74]:
dev_encodings_normal = np.load(model_path + f"/encodings/predicted_esnli-normal_dev_encoded_representations.npy")
# dev_labels = np.load(model_path + f"/encodings/predicted_{task}_{dev_name}_labels.npy")
dev_preds_normal = np.load(model_path + f"/encodings/predicted_esnli-normal_dev_predictions.npy")

with open('../data/esnli/dev.jsonl') as f:
    dev_data = [json.loads(line) for line in f if line.strip() if line.strip()]

for i in range(1000, 8300):
    ex = dev_data[i]
    gold = ex['gold_label']
    #     out = predictor.predict_json(ex)
    pred = index2label[dev_preds_normal[i]]
    
    if pred != 'neutral': 
        continue
    else:
        #     if pred != gold: 
        print(ex['Sentence1'], '\n', ex['Sentence2'])
        print('Gold:', gold)
        print('Pred:', pred)
#         print(ex)
        print(ex['Sentence1_marked_1'])
        print(ex['Sentence2_marked_1'])
        print({'Sentence1': ex['Sentence1'], 'Sentence2': ex['Sentence2'], 'gold_label': gold})
        print('====')
    
#     print(pred)
#     print(out['label'])
#     assert out['label'] == pred
    

Two people are looking at something in New York City. 
 Two people looking at New York.
Gold: neutral
Pred: neutral
Two people are looking at something in New York City.
 Two people *looking* *at* *New* *York.*
{'Sentence1': 'Two people are looking at something in New York City.', 'Sentence2': 'Two people looking at New York.', 'gold_label': 'neutral'}
====
Two people are looking at something in New York City. 
 Two people are enjoying themselves.
Gold: neutral
Pred: neutral
Two people are looking at something in New York City.
 Two people are *enjoying* *themselves.*
{'Sentence1': 'Two people are looking at something in New York City.', 'Sentence2': 'Two people are enjoying themselves.', 'gold_label': 'neutral'}
====
A man sleeping on the ground in a subway. 
 The man is sleeping next to his dog.
Gold: neutral
Pred: neutral
A man sleeping on the ground in a subway.
 The man is sleeping next to his *dog.*
{'Sentence1': 'A man sleeping on the ground in a subway.', 'Sentence2': 'The man 

Gold: neutral
Pred: neutral
A bunch of people are walking in a crowded area.
 A bunch of people are walking in a *mall.*
{'Sentence1': 'A bunch of people are walking in a crowded area.', 'Sentence2': 'A bunch of people are walking in a mall.', 'gold_label': 'neutral'}
====
A bunch of people are walking in a crowded area. 
 A bunch of people are walking in a field.
Gold: neutral
Pred: neutral
A bunch of people are walking in a crowded area.
 A bunch of people are walking in a *field.*
{'Sentence1': 'A bunch of people are walking in a crowded area.', 'Sentence2': 'A bunch of people are walking in a field.', 'gold_label': 'neutral'}
====
Four females wearing helments are riding on an ATV. 
 Four women are riding an ATV on a mountain.
Gold: neutral
Pred: neutral
Four females wearing helments are riding on an ATV.
 Four women are riding an ATV on a *mountain.*
{'Sentence1': 'Four females wearing helments are riding on an ATV.', 'Sentence2': 'Four women are riding an ATV on a mountain.', 'go

 A bike is racing a *cheetah.*
{'Sentence1': 'A motorcycle races.', 'Sentence2': 'A bike is racing a cheetah.', 'gold_label': 'neutral'}
====
Three children play on the grass under a multicolored umbrella and wearing rainbow colored outfits. 
 Children celebrate color.
Gold: neutral
Pred: neutral
Three children play on the grass under a multicolored umbrella and wearing rainbow colored outfits.
 Children *celebrate* *color.*
{'Sentence1': 'Three children play on the grass under a multicolored umbrella and wearing rainbow colored outfits.', 'Sentence2': 'Children celebrate color.', 'gold_label': 'neutral'}
====
A group of people on the street dancing to a four men playing various conga drums. 
 People are participating in a tribal dance demostration.
Gold: neutral
Pred: neutral
A group of people on the street dancing to a four men playing various conga drums.
 People are participating in a *tribal* *dance* demostration.
{'Sentence1': 'A group of people on the street dancing to a four me

 The skeleton is *scary.*
{'Sentence1': "A person in a white t-shirt, military print cap and red bandanna with a skeleton on it covering most of the person's face.", 'Sentence2': 'The skeleton is scary.', 'gold_label': 'neutral'}
====
A man dressed in yellow rescue gear walks in a field. 
 The man is looking for someone.
Gold: neutral
Pred: neutral
A man dressed in yellow rescue gear walks in a field.
 The man is *looking* *for* *someone.*
{'Sentence1': 'A man dressed in yellow rescue gear walks in a field.', 'Sentence2': 'The man is looking for someone.', 'gold_label': 'neutral'}
====
A man in blue clothing carries a yellow bucket while walking in the water near a pelican. 
 A man with a bucket is catching fish in the water.
Gold: neutral
Pred: neutral
A man in blue clothing carries a yellow bucket while walking in the water near a pelican.
 A man with a bucket is *catching* *fish* in the water.
{'Sentence1': 'A man in blue clothing carries a yellow bucket while walking in the water n

 A woman rides a bike bast the beach sunset.
Gold: neutral
Pred: neutral
A woman with a red and white piece of clothing draped over her arm rides a pink cruiser-style bicycle down a boardwalk.
 A woman rides a bike bast the beach *sunset.*
{'Sentence1': 'A woman with a red and white piece of clothing draped over her arm rides a pink cruiser-style bicycle down a boardwalk.', 'Sentence2': 'A woman rides a bike bast the beach sunset.', 'gold_label': 'neutral'}
====
Two girls smile for the camera. 
 People are saying 'cheese'.
Gold: neutral
Pred: neutral
Two girls smile for the camera.
 People are saying *'cheese'.*
{'Sentence1': 'Two girls smile for the camera.', 'Sentence2': "People are saying 'cheese'.", 'gold_label': 'neutral'}
====
Four boys are shown in a house playing and all wear t-shirts. 
 The boys are playing a game of cards together.
Gold: neutral
Pred: neutral
Four boys are shown in a house playing and all wear t-shirts.
 The boys are playing a *game* *of* *cards* together.
{'

 A doctor is doing a good job.
Gold: neutral
Pred: neutral
A doctor is performing surgery.
 A doctor *is* *doing* *a* *good* *job.*
{'Sentence1': 'A doctor is performing surgery.', 'Sentence2': 'A doctor is doing a good job.', 'gold_label': 'neutral'}
====
Two women eating lunch in a cafe smile at the photographer. 
 Two models are photographed by a paparazzi.
Gold: neutral
Pred: neutral
Two women eating lunch in a cafe smile at the photographer.
 Two models are *photographed* *by* *a* *paparazzi.*
{'Sentence1': 'Two women eating lunch in a cafe smile at the photographer.', 'Sentence2': 'Two models are photographed by a paparazzi.', 'gold_label': 'neutral'}
====
Children participating at a party while looking at the one kid in exuberance over the new toy. 
 A child just opened his smallest gift at his party
Gold: neutral
Pred: neutral
Children participating at a party while looking at the one kid in exuberance over the new toy.
 A child just opened *his* *smallest* *gift* at his party


People are walking into a store. 
 People are going shopping.
Gold: neutral
Pred: neutral
People are walking into a store.
 People are *going* *shopping.*
{'Sentence1': 'People are walking into a store.', 'Sentence2': 'People are going shopping.', 'gold_label': 'neutral'}
====
A woman in a blue shirt and black workout pants practicing martial arts in front of a house. 
 A woman has a light blue shirt.
Gold: neutral
Pred: neutral
A woman in a blue shirt and black workout pants practicing martial arts in front of a house.
 A woman has a *light* *blue* shirt.
{'Sentence1': 'A woman in a blue shirt and black workout pants practicing martial arts in front of a house.', 'Sentence2': 'A woman has a light blue shirt.', 'gold_label': 'neutral'}
====
A large tan dog is running with a green item in its mouth 
 A large tan dog is running with a patch of grass in its mouth.
Gold: neutral
Pred: neutral
A large tan dog is running with a green item in its mouth
 A large tan dog is running with a *patc

A woman with big green glasses writing at a desk outside while looking confused. 
 A women is is confused about her writing.
Gold: neutral
Pred: neutral
A woman with big green glasses writing at a desk outside while looking confused.
 A women is is confused *about* *her* *writing.*
{'Sentence1': 'A woman with big green glasses writing at a desk outside while looking confused.', 'Sentence2': 'A women is is confused about her writing.', 'gold_label': 'neutral'}
====
A woman with big green glasses writing at a desk outside while looking confused. 
 A women is confused about something she sees outside.
Gold: neutral
Pred: neutral
A woman with big green glasses writing at a desk outside while looking confused.
 A women is confused about something she *sees* *outside.*
{'Sentence1': 'A woman with big green glasses writing at a desk outside while looking confused.', 'Sentence2': 'A women is confused about something she sees outside.', 'gold_label': 'neutral'}
====
A man is flexing his biceps 

 The young dancers performing are *indian.*
{'Sentence1': 'Beautiful young dancer performing dressed in white and silver performs an acrobatic, ballet, hula-hoop, dance routine while 4 female observers look on.', 'Sentence2': 'The young dancers performing are indian.', 'gold_label': 'neutral'}
====
Nine women in blue and purple dresses and one man wearing a purple shirt and black pants, clap while a man dressed in black dances. 
 There is a group watching a street performance.
Gold: neutral
Pred: neutral
Nine women in blue and purple dresses and one man wearing a purple shirt and black pants, clap while a man dressed in black dances.
 There is a group watching a *street* *performance.*
{'Sentence1': 'Nine women in blue and purple dresses and one man wearing a purple shirt and black pants, clap while a man dressed in black dances.', 'Sentence2': 'There is a group watching a street performance.', 'gold_label': 'neutral'}
====
brown dogs are running down a trail 
 Brown dogs are running o

Pred: neutral
a crowd gather together, all looking in the same direction.
 A *large* crowd is *curious* *about* *something.*
{'Sentence1': 'a crowd gather together, all looking in the same direction.', 'Sentence2': 'A large crowd is curious about something.', 'gold_label': 'neutral'}
====
a crowd gather together, all looking in the same direction. 
 A large crowd are preparing to watch the parade.
Gold: neutral
Pred: neutral
a crowd gather together, all looking in the same direction.
 A large crowd are *preparing* to *watch* the *parade.*
{'Sentence1': 'a crowd gather together, all looking in the same direction.', 'Sentence2': 'A large crowd are preparing to watch the parade.', 'gold_label': 'neutral'}
====
Two dogs run together across a grassy lawn. 
 The dogs are related.
Gold: neutral
Pred: neutral
Two dogs run together across a grassy lawn.
 The dogs are *related.*
{'Sentence1': 'Two dogs run together across a grassy lawn.', 'Sentence2': 'The dogs are related.', 'gold_label': 'neut

Gold: neutral
Pred: neutral
A child is jumping off a platform into a pool.
 The child is taking a *diving* *class.*
{'Sentence1': 'A child is jumping off a platform into a pool.', 'Sentence2': 'The child is taking a diving class.', 'gold_label': 'neutral'}
====
A man with a gray beard and an apron smiles. 
 A grandfather smiling while baking
Gold: neutral
Pred: neutral
A man with a gray beard and an apron smiles.
 A *grandfather* smiling while *baking*
{'Sentence1': 'A man with a gray beard and an apron smiles.', 'Sentence2': 'A grandfather smiling while baking', 'gold_label': 'neutral'}
====
Young friends in a park relaxing and enjoying conversation with each other. 
 They are gossiping about each other.
Gold: neutral
Pred: neutral
Young friends in a park relaxing and enjoying conversation with each other.
 They are *gossiping* about each other.
{'Sentence1': 'Young friends in a park relaxing and enjoying conversation with each other.', 'Sentence2': 'They are gossiping about each othe

A large competition sailboat is surrounded by three small boats in the middle of the water.
 A large boat with a *green* *flag* *on* *the* *back* is *towering* above three smaller boats with the *same* *flag.*
{'Sentence1': 'A large competition sailboat is surrounded by three small boats in the middle of the water.', 'Sentence2': 'A large boat with a green flag on the back is towering above three smaller boats with the same flag.', 'gold_label': 'neutral'}
====
Two black dogs run along the green grass. 
 Two dogs run to catch a bone.
Gold: neutral
Pred: neutral
Two black dogs run along the green grass.
 Two dogs run to *catch* *a* *bone.*
{'Sentence1': 'Two black dogs run along the green grass.', 'Sentence2': 'Two dogs run to catch a bone.', 'gold_label': 'neutral'}
====
Smiling woman in yellow shirt gesturing. 
 the woman was gesturing to her son
Gold: neutral
Pred: neutral
Smiling woman in yellow shirt gesturing.
 the woman was *gesturing* *to* *her* *son*
{'Sentence1': 'Smiling woma

Gold: neutral
Pred: neutral
Two girls are in line about to order at a Mexican eatery.
 Two *sad* girls are in line about to order at a Mexican eatery.
{'Sentence1': 'Two girls are in line about to order at a Mexican eatery.', 'Sentence2': 'Two sad girls are in line about to order at a Mexican eatery.', 'gold_label': 'neutral'}
====
Rugby player in blue dives to tackle the ball carrier in white. 
 A rugby player tackles an opponent whose about to win the game.
Gold: neutral
Pred: neutral
Rugby player in blue dives to tackle the ball carrier in white.
 A rugby player tackles an opponent whose about to *win* the game.
{'Sentence1': 'Rugby player in blue dives to tackle the ball carrier in white.', 'Sentence2': 'A rugby player tackles an opponent whose about to win the game.', 'gold_label': 'neutral'}
====
A young man is standing staring at something. 
 A young man is looking intently at a young woman.
Gold: neutral
Pred: neutral
A young man is standing staring at something.
 A young man i

In [72]:
ex = {'Sentence1': 'A photographer snaps a midair action shot of a snowboarder.', 
      'Sentence2': 'The midair shot snaps at a boarder of snow.', 'gold_label': 'contradiction'}

ex['Sentence1'] = ex['Sentence1'] + " ."
ex['Sentence2'] = ex['Sentence2'] + " ."

# foil = ex['gold_label']
# foil = 'neutral'

# out = predictor.predict(premise=ex['Sentence1'], hypothesis=ex['Sentence2'])
out = predictor.predict_json(ex)
encoded_orig = out['encoded_representations']

fact = out['label']
print('Predicted: ', fact)

Predicted:  neutral


In [82]:
import random
def rand_parts(seq, n, l):
    indices = list(range(len(seq) - (l - 1) * n))
    result = []
    offset = 0
    for i in sorted(random.sample(indices, n)):
        i += offset
        result.append(seq[i:i+l])
        offset += l - 1
    return result
rand_parts([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], 1, 4)[0]

[2, 3, 4, 5]

In [14]:
dev_encodings_normal = np.load(model_path + f"/encodings/predicted_esnli-normal_dev_encoded_representations.npy")
# dev_labels = np.load(model_path + f"/encodings/predicted_{task}_{dev_name}_labels.npy")
dev_preds_normal = np.load(model_path + f"/encodings/predicted_esnli-normal_dev_predictions.npy")


foil = 'contradiction'
fact_idx = label2index[fact]
foil_idx = label2index[foil]
print('fact:', index2label[fact_idx])
print('foil:', index2label[foil_idx])
num_classifiers = 100

projections = {}

for fact_class in index2label:
    for foil_class in index2label:
        if fact_class == foil_class:
            continue
#         projections[(fact_class, foil_class)] = np.load(model_path + f"/projections/{num_classifiers}/contrastive_{max(fact_class, foil_class)}_{min(fact_class, foil_class)}.npy")
        projections[(fact_class, foil_class)] = debias.get_rowspace_projection((classifier_w[fact_class] - classifier_w[foil_class]).reshape(1, -1))
        

fact: neutral
foil: contradiction


In [27]:
dev_encodings_normal = np.load(model_path + f"/encodings/predicted_esnli-normal_dev_encoded_representations.npy")
# dev_labels = np.load(model_path + f"/encodings/predicted_{task}_{dev_name}_labels.npy")
dev_preds_normal = np.load(model_path + f"/encodings/predicted_esnli-normal_dev_predictions.npy")

with open('../data/esnli/dev.jsonl') as f:
    dev_data = [json.loads(line) for line in f if line.strip() if line.strip()]

count_neutral_pair = 0
count_nonneutral_pair = 0

for i in range(2100, 2200):
    ex = dev_data[i]
    gold = ex['gold_label']
    #     out = predictor.predict_json(ex)
    pred = index2label[dev_preds_normal[i]]
    
    #     if pred != gold: 
    print(ex['Sentence1'], '\n', ex['Sentence2'])
    print('Gold:', gold)
    print('Pred:', pred)
#         print(ex)
    print(ex['Sentence1_marked_1'])
    print(ex['Sentence2_marked_1'])
    print({'Sentence1': ex['Sentence1'], 'Sentence2': ex['Sentence2'], 'gold_label': gold})
    print()
        
        
    if ex['gold_label'] == 'entailment' or ex['gold_label'] == 'contradiction':
        ex['Sentence1'] = ex['Sentence1'] + " ;"
        ex['Sentence2'] = ex['Sentence2'] + " ;"
    else:
        ex['Sentence1'] = ex['Sentence1'] + " ."
        ex['Sentence2'] = ex['Sentence2'] + " ."

    foil = ex['gold_label']

    # out = predictor.predict(premise=ex['Sentence1'], hypothesis=ex['Sentence2'])
    out = predictor.predict_json(ex)
    encoded_orig = out['encoded_representations']

    fact = out['label']
    print('Predicted: ', fact)
    assert fact == pred

    ex['Sentence1'] = ex['Sentence1'].split()
    ex['Sentence2'] = ex['Sentence2'].split()

    tok.convert_tokens_to_string(out['tokens'])

    masks1 = list(all_consecutive_masks2(ex['Sentence1'], max_length=1))
    masks2 = list(all_consecutive_masks2(ex['Sentence2'], max_length=1))
    encoded = []
    mask_mapping = []
    preds = np.zeros(shape=(len(masks1), len(masks2)))

    for m1_i, m1 in enumerate(masks1):
        masked1 = list(ex['Sentence1'])
        for i in m1:
            masked1[i] = '<mask>'
        masked1 = ' '.join(masked1)

        for m2_i, m2 in enumerate(masks2):
            masked2 = list(ex['Sentence2'])
            for i in m2:
                masked2[i] = '<mask>'
            masked2 = ' '.join(masked2)

            masked_ex = {
                "Sentence1": masked1,
                "Sentence2": masked2
            }

            masked_out = predictor.predict_json(masked_ex)
    #         if masked_out['label'] != foil:
    #             continue

#             print(m1_i, m2_i)
#             print(f"{masked1}\n{masked2}")
#             print(masked_out['label'])
            encoded.append(masked_out['encoded_representations'])
            mask_mapping.append((m1_i, m2_i))

#             print("====")

    encoded = np.array(encoded)
        
    encoded_orig = out['encoded_representations']
    fact_class = label2index[fact]
    
    for foil_class in index2label:
        if foil_class == fact_class:
            continue
        print(f"Projecting to: fact = {fact}, foil = {index2label[foil_class]}")
        contrastive_projection = projections[(fact_class, foil_class)]

        encoded @ contrastive_projection
        z_all = encoded_orig 
        z_h = encoded 
        z_all_row = encoded_orig @ contrastive_projection
        z_h_row = encoded @ contrastive_projection
        # z_nh_row = dev_encodings_irrelevant @ contrastive_projection
#         z_all_nul = encoded_orig @ ambiguous_projection
#         z_h_nul = encoded @ ambiguous_projection
        # z_nh_nul = dev_encodings_irrelevant @ ambiguous_projection

#         distances = []
#         for proj in z_h_row:
#             c = distance.cdist([z_all_row], [proj], "cosine")
#             distances.append(c[0][0])
#         #     distances.append(np.linalg.norm(proj))
#         dist_h_row = np.array(distances)

#         distances = []
#         for proj in z_h:
#             c = distance.cdist([z_all], [proj], "cosine")
#             distances.append(c[0][0])
#         #     distances.append(np.linalg.norm(proj))
#         dist_h = np.array(distances) 

        prediction_probabilities = softmax(z_all_row @ classifier_w.T)# + classifier_b)
        prediction_probabilities = np.tile(prediction_probabilities, (z_h_row.shape[0], 1))

        prediction_probabilities_del = softmax(z_h_row @ classifier_w.T, axis=1)# + classifier_b, axis=1)

        distances = entropy(prediction_probabilities_del, prediction_probabilities, axis=1)

        highlight_rankings = np.argsort(-distances)# + dist_h_nul)
        # highlight_rankings = np.argsort(-dist_h)# - dist_h_nul)

        for i in range(1):
            rank = highlight_rankings[i]
            m1_i, m2_i = mask_mapping[rank]

            masked1 = list(ex['Sentence1'])
            for k in masks1[m1_i]:
                masked1[k] = '<m>'

            masked2 = list(ex['Sentence2'])
            for k in masks2[m2_i]:
                masked2[k] = '<m>'
                
            print(masks1[m1_i], masks2[m2_i])
            print(len(masked1), len(masked2))
            
            if fact_class == label2index['neutral'] or foil_class == label2index['neutral']:
                if (masks1[m1_i][0] + 1 == len(masked1)) and (masks2[m2_i][0] + 1 == len(masked2)):
                    count_neutral_pair += 1
                    print("Passed neutral!")
            else:
                if (masks1[m1_i][0] + 1 != len(masked1)) or (masks2[m2_i][0] + 1 != len(masked2)):
                    count_nonneutral_pair += 1
                    print("Passed non-neutral!")
                
            masked2 = ' '.join(masked2)
            masked1 = ' '.join(masked1)

            print(masked1)
            print(masked2)
        #     print(dist_h_row[rank])# - dist_h_nul[rank])
            print(np.round(distances[rank], 4))# - dist_h_nul[rank])
            
            
            print("=======")
            print("=======")
    
print(count_neutral_pair)
print(count_nonneutral_pair)
print((count_neutral_pair + count_nonneutral_pair) / (100 * 2))

A woman with blond-hair and blue boots walks among a crowd of other pedestrians on a city street, looking up at something. 
 A woman is sitting in a cafe eating lunch.
Gold: contradiction
Pred: contradiction
 A woman with blond-hair and blue boots walks among a crowd of other pedestrians on a city street, *looking* *up* at something.
A woman is sitting in a cafe *eating* lunch.
{'Sentence1': 'A woman with blond-hair and blue boots walks among a crowd of other pedestrians on a city street, looking up at something.', 'Sentence2': 'A woman is sitting in a cafe eating lunch.', 'gold_label': 'contradiction'}

Predicted:  contradiction
Projecting to: fact = contradiction, foil = entailment
[13] [1]
23 10
Passed non-neutral!
A woman with blond-hair and blue boots walks among a crowd of other <m> on a city street, looking up at something. ;
A <m> is sitting in a cafe eating lunch. ;
0.0919
Projecting to: fact = contradiction, foil = neutral
[22] [9]
23 10
Passed neutral!
A woman with blond-hai

Predicted:  neutral
Projecting to: fact = neutral, foil = entailment
[16] [9]
17 10
Passed neutral!
Two bikers a riding along a street in a marathon as the passersby clap for them. <m>
Two bikers compete in a marathon for a charity. <m>
3.3749
Projecting to: fact = neutral, foil = contradiction
[16] [9]
17 10
Passed neutral!
Two bikers a riding along a street in a marathon as the passersby clap for them. <m>
Two bikers compete in a marathon for a charity. <m>
2.8498
A baby crying about getting their diaper changed. 
 the baby is tired
Gold: neutral
Pred: neutral
A baby crying about getting their diaper changed.
 the baby is *tired*
{'Sentence1': 'A baby crying about getting their diaper changed.', 'Sentence2': 'the baby is tired', 'gold_label': 'neutral'}

Predicted:  neutral
Projecting to: fact = neutral, foil = entailment
[8] [4]
9 5
Passed neutral!
A baby crying about getting their diaper changed. <m>
the baby is tired <m>
4.325
Projecting to: fact = neutral, foil = contradiction
[8

Projecting to: fact = neutral, foil = entailment
[10] [9]
11 10
Passed neutral!
A woman is tugging on a white sheet and laughing <m>
The woman is changing the sheets on her bed. <m>
4.3081
Projecting to: fact = neutral, foil = contradiction
[10] [9]
11 10
Passed neutral!
A woman is tugging on a white sheet and laughing <m>
The woman is changing the sheets on her bed. <m>
3.3533
A woman is tugging on a white sheet and laughing 
 The woman is trying on a new dress.
Gold: contradiction
Pred: contradiction
A woman is *tugging* *on* *a* *white* *sheet* and laughing
The woman is *trying* *on* *a* *new* *dress.*
{'Sentence1': 'A woman is tugging on a white sheet and laughing', 'Sentence2': 'The woman is trying on a new dress.', 'gold_label': 'contradiction'}

Predicted:  contradiction
Projecting to: fact = contradiction, foil = entailment
[7] [8]
11 9
Passed non-neutral!
A woman is tugging on a white <m> and laughing ;
The woman is trying on a new dress. <m>
0.2978
Projecting to: fact = contr

Predicted:  contradiction
Projecting to: fact = contradiction, foil = entailment
[5] [7]
6 8
People shopping at the marketplace. <m>
A woman is swimming in the ocean. <m>
0.0949
Projecting to: fact = contradiction, foil = neutral
[5] [7]
6 8
Passed neutral!
People shopping at the marketplace. <m>
A woman is swimming in the ocean. <m>
1.1359
Two people are sitting in a station. 
 A couple of people are inside and not standing.
Gold: entailment
Pred: entailment
 *Two* *people* are *sitting* in a *station.*
 A *couple* of *people* are *inside* and *not* *standing.*
{'Sentence1': 'Two people are sitting in a station.', 'Sentence2': 'A couple of people are inside and not standing.', 'gold_label': 'entailment'}

Predicted:  entailment
Projecting to: fact = entailment, foil = contradiction
[0] [7]
8 10
Passed non-neutral!
<m> people are sitting in a station. ;
A couple of people are inside and <m> standing. ;
0.5067
Projecting to: fact = entailment, foil = neutral
[7] [9]
8 10
Passed neutral!

Predicted:  contradiction
Projecting to: fact = contradiction, foil = entailment
[8] [9]
17 11
Passed non-neutral!
these two ladies are reading a sign while <m> themselves from the sun with their umbrellas. ;
Two women use umbrellas keep themselves dry from the <m> ;
1.6081
Projecting to: fact = contradiction, foil = neutral
[8] [9]
17 11
these two ladies are reading a sign while <m> themselves from the sun with their umbrellas. ;
Two women use umbrellas keep themselves dry from the <m> ;
1.0886
Two Asian women wear sandals, long silk robes and hold umbrellas. 
 Women in Japan take shelter from the rain
Gold: neutral
Pred: neutral
Two Asian women wear sandals, long silk robes and hold umbrellas.
 Women in *Japan* take shelter from the *rain*
{'Sentence1': 'Two Asian women wear sandals, long silk robes and hold umbrellas.', 'Sentence2': 'Women in Japan take shelter from the rain', 'gold_label': 'neutral'}

Predicted:  neutral
Projecting to: fact = neutral, foil = entailment
[11] [8]
12 

Predicted:  neutral
Projecting to: fact = neutral, foil = entailment
[15] [14]
16 15
Passed neutral!
A man is working on a computer while two people sit and talk in front. <m>
A man works on a computer while two people sit and chat about business. <m>
4.9416
Projecting to: fact = neutral, foil = contradiction
[15] [14]
16 15
Passed neutral!
A man is working on a computer while two people sit and talk in front. <m>
A man works on a computer while two people sit and chat about business. <m>
2.8965
A man is working on a computer while two people sit and talk in front. 
 A bear is working on a computer while two people sit and gawk.
Gold: contradiction
Pred: contradiction
A *man* is working on a computer while two people sit and *talk* in front.
A *bear* is working on a computer while two people sit and *gawk.*
{'Sentence1': 'A man is working on a computer while two people sit and talk in front.', 'Sentence2': 'A bear is working on a computer while two people sit and gawk.', 'gold_label': 

Predicted:  entailment
Projecting to: fact = entailment, foil = contradiction
[5] [7]
10 9
Passed non-neutral!
A woman playing with her <m> while taking pictures. ;
A woman is spending time with her <m> ;
0.1814
Projecting to: fact = entailment, foil = neutral
[9] [8]
10 9
Passed neutral!
A woman playing with her cats while taking pictures. <m>
A woman is spending time with her cats. <m>
0.3792
A woman playing with her cats while taking pictures. 
 A woman is encouraging two cats to interact with one another.
Gold: neutral
Pred: neutral
A woman playing with her cats while taking pictures.
 A woman is *encouraging* two cats to *interact* with one another.
{'Sentence1': 'A woman playing with her cats while taking pictures.', 'Sentence2': 'A woman is encouraging two cats to interact with one another.', 'gold_label': 'neutral'}

Predicted:  neutral
Projecting to: fact = neutral, foil = entailment
[9] [11]
10 12
Passed neutral!
A woman playing with her cats while taking pictures. <m>
A woma

Projecting to: fact = entailment, foil = contradiction
[20] [3]
21 8
Passed non-neutral!
A young woman holding a newborn in her arms while still wearing her hospital bracelet laying on a hospital bed. <m>
A young woman <m> holding a newborn. ;
0.232
Projecting to: fact = entailment, foil = neutral
[20] [7]
21 8
Passed neutral!
A young woman holding a newborn in her arms while still wearing her hospital bracelet laying on a hospital bed. <m>
A young woman is holding a newborn. <m>
0.5381
Three young men are watching a tennis match on a large screen outdoors. 
 Three young men watching a tennis match on a screen outdoors, because their brother is playing.
Gold: neutral
Pred: neutral
Three young men are watching a tennis match on a large screen outdoors.
 Three young men watching a tennis match on a screen outdoors, because their *brother* *is* *playing.*
{'Sentence1': 'Three young men are watching a tennis match on a large screen outdoors.', 'Sentence2': 'Three young men watching a tenni

Predicted:  neutral
Projecting to: fact = neutral, foil = entailment
[28] [7]
29 8
Passed neutral!
A maroon car with many decals and signs topped with a gray bundle tied to the roof near the drivers door is a man leaning on the car. <m>
The roof is high in the air. <m>
4.3726
Projecting to: fact = neutral, foil = contradiction
[28] [7]
29 8
Passed neutral!
A maroon car with many decals and signs topped with a gray bundle tied to the roof near the drivers door is a man leaning on the car. <m>
The roof is high in the air. <m>
4.1552
A man is taking pictures hanging outside of a red rally car. 
 The man is alone in the bathroom.
Gold: contradiction
Pred: contradiction
A man is taking pictures *hanging* *outside* *of* *a* red rally *car.*
The man is alone *in* *the* *bathroom.*
{'Sentence1': 'A man is taking pictures hanging outside of a red rally car.', 'Sentence2': 'The man is alone in the bathroom.', 'gold_label': 'contradiction'}

Predicted:  contradiction
Projecting to: fact = contrad

Projecting to: fact = entailment, foil = contradiction
[12] [3]
21 13
Passed non-neutral!
The woman in the blue skirt is sleeping on a cardboard box <m> a picture of Mary and baby Jesus. ;
A homeless woman <m> under the protective gaze of the Blessed Mother. ;
0.0574
Projecting to: fact = entailment, foil = neutral
[20] [12]
21 13
Passed neutral!
The woman in the blue skirt is sleeping on a cardboard box under a picture of Mary and baby Jesus. <m>
A homeless woman sleeps under the protective gaze of the Blessed Mother. <m>
0.9154
The woman in the blue skirt is sleeping on a cardboard box under a picture of Mary and baby Jesus. 
 A person sleeping on a cardboard box below a picture of religious icons.
Gold: entailment
Pred: entailment
 The *woman* in the blue skirt is *sleeping* on a *cardboard* *box* under a picture of *Mary* *and* *baby* *Jesus.*
 A *person* *sleeping* on a *cardboard* *box* below a picture of *religious* *icons.*
{'Sentence1': 'The woman in the blue skirt is sleeping

Projecting to: fact = contradiction, foil = entailment
[4] [6]
11 9
Passed non-neutral!
Two men with fake <m> are dressed like archaeologist explorers. ;
Clean shaven youths are wearing formal <m> attire. ;
0.415
Projecting to: fact = contradiction, foil = neutral
[10] [8]
11 9
Passed neutral!
Two men with fake mustaches are dressed like archaeologist explorers. <m>
Clean shaven youths are wearing formal prom attire. <m>
0.3244
Two men with fake mustaches are dressed like archaeologist explorers. 
 People are wearing disguises.
Gold: entailment
Pred: entailment
 *Two* *men* with *fake* *mustaches* are dressed like archaeologist explorers.
 *People* are wearing *disguises.*
{'Sentence1': 'Two men with fake mustaches are dressed like archaeologist explorers.', 'Sentence2': 'People are wearing disguises.', 'gold_label': 'entailment'}

Predicted:  entailment
Projecting to: fact = entailment, foil = contradiction
[10] [3]
11 5
Passed non-neutral!
Two men with fake mustaches are dressed like

In [2]:
# path = "../data/esnli/dev_sample_entailment_joined.jsonl"
# path2 = "../data/esnli/dev_sample_entailment_negation_h_joined.jsonl"
path = "../data/esnli/dev_sample_neutral_joined.jsonl"
path2 = "../data/esnli/dev_sample_neutral_negation_h_joined.jsonl"
# path = "../data/esnli/dev_small_negation_h.jsonl"
# path2 = "../data/esnli/dev_small_negation_h_counterfactual.jsonl"
import ast

dneg = []
for line in open(path):
#     print(line)
    dneg.append(ast.literal_eval(line.strip()))
    
cneg = []
for line in open(path2):
#     print(line)
    cneg.append(ast.literal_eval(line.strip()))

In [4]:

from scipy.stats import entropy
from scipy.special import softmax

def dkl(p,q):
    p = p / p.sum(axis=1).reshape(-1, 1)
    q = q / q.sum(axis=1).reshape(-1, 1)
    return entropy(p,q, axis=1) + entropy(q,p, axis=1)

avg = 0

entailment_encodings = []
entailment_counterfactual_encodings = []

assert len(cneg)==len(dneg)
print("Numer of examples: ", len(cneg))

count = 0

for ex,ex2 in zip(dneg,cneg):
    print(ex)
    out = predictor.predict_json(ex)
#     print('Prediction:', )
    out2 = predictor.predict_json(ex2)
#     if out2['label'] != 'contradiction':
#         continue
    count += 1
    print('Revised sentence:', ex2['Sentence2'])
    print(out['label'], '-->', out2['label'])
    avg += out['probs'][1] - out2['probs'][1]
#     dkl_probs = dkl(np.array([out['probs']]), np.array([out2['probs']]))
#     avg += dkl_probs
#     print(dkl_probs)
    print(f"{out['probs'][1] * 100:.2f}% --> {out2['probs'][1] * 100:.2f}%. (e: {out2['probs'][0] * 100:.2f}%, n:{out2['probs'][2] * 100:.2f}%)")
    
    entailment_encodings.append(out['encoded_representations'])
    entailment_counterfactual_encodings.append(out2['encoded_representations'])
#     if out['label'] != out2['label']:
#         print("========= HITTTTTT")
# avg = avg[0]
avg = avg / count
print(f"{avg*100:.2f}%")
print(count)


Numer of examples:  88
{'Sentence1': 'A small child with a blue hood is looking out the window.', 'Sentence2': 'A child is excited', 'gold_label': 'neutral', 'prediction': 'neutral'}
Revised sentence: A child is not bored
neutral --> neutral
0.17% --> 27.24%. (e: 8.99%, n:63.77%)
{'Sentence1': 'A woman is holding one young children with another standing next to her while a man is sitting by a small campfire.', 'Sentence2': 'A woman is holding one young children with another standing happily next to her', 'gold_label': 'neutral', 'prediction': 'neutral'}
Revised sentence: A woman is holding one young children with another standing happily not far from her
neutral --> neutral
0.05% --> 0.06%. (e: 1.38%, n:98.56%)
{'Sentence1': 'older men talking and standing around.', 'Sentence2': 'The older men are talking about their wives.', 'gold_label': 'neutral', 'prediction': 'neutral'}
Revised sentence: None of the older men are not talking about their wives.
neutral --> neutral
0.15% --> 1.37%. 

Revised sentence: The boy is not missing a skateboard trick
neutral --> neutral
1.12% --> 3.23%. (e: 9.23%, n:87.54%)
{'Sentence1': 'Bald man in white and black shorts, punching man with black hair, and red, orange and blue shorts.', 'Sentence2': 'The men are fighting over a girl.', 'gold_label': 'neutral', 'prediction': 'neutral'}
Revised sentence: The men are fighting to not lose a girl to the opponent.
neutral --> neutral
0.32% --> 4.50%. (e: 0.17%, n:95.32%)
{'Sentence1': 'A group of people gathered to watch fireworks.', 'Sentence2': 'The group gathered inside the house.', 'gold_label': 'neutral', 'prediction': 'neutral'}
Revised sentence: The group gathered not outside the house.
neutral --> neutral
2.52% --> 13.57%. (e: 1.45%, n:84.99%)
{'Sentence1': 'A woman in a white t-shirt points at a geyser.', 'Sentence2': 'The women points her daughter to the geyser.', 'gold_label': 'neutral', 'prediction': 'neutral'}
Revised sentence: The women is not misdirecting her daughter away from t

Revised sentence: Photographers trying to get the perfect photo before the daylight fades, not in the nightlight
neutral --> neutral
0.06% --> 1.10%. (e: 0.28%, n:98.61%)
{'Sentence1': 'A brown dog is about to catch a treat.', 'Sentence2': 'A brown dog receives a rawhide treat.', 'gold_label': 'neutral', 'prediction': 'neutral'}
Revised sentence: A brown dog receives a rawhide, not a wet treat.
neutral --> neutral
0.20% --> 4.29%. (e: 0.12%, n:95.58%)
{'Sentence1': 'Two older men standing in front of a store of sorts.', 'Sentence2': 'two older men prepare to enter a store', 'gold_label': 'neutral', 'prediction': 'neutral'}
Revised sentence: two older men prepare to enter a store, they are not unprepared
neutral --> neutral
0.96% --> 2.04%. (e: 4.01%, n:93.95%)
{'Sentence1': 'Here we have a guy using a pressure washer on the sidewalk.', 'Sentence2': 'A guy is washing bird poop off of the sidewalk.', 'gold_label': 'neutral', 'prediction': 'neutral'}
Revised sentence: A guy is washing bir

Revised sentence: The mother and girl are not not posing for a picture.
neutral --> neutral
0.08% --> 4.70%. (e: 0.34%, n:94.96%)
{'Sentence1': 'Man standing on a tricycle riding in front of a crowd.', 'Sentence2': 'A man is performing tricks on a bike.', 'gold_label': 'neutral', 'prediction': 'neutral'}
Revised sentence: A man is performing tricks on a bike, not sitting still
neutral --> neutral
0.54% --> 1.22%. (e: 15.73%, n:83.05%)
{'Sentence1': 'A little asian girl in pigtails is building with wood and painting with colored markers.', 'Sentence2': 'a young girl is building and painting for her mother', 'gold_label': 'neutral', 'prediction': 'neutral'}
Revised sentence: a young girl is building and painting for her mother, not sitting still
neutral --> neutral
0.07% --> 0.15%. (e: 0.14%, n:99.71%)
{'Sentence1': 'Two people sit on a bench leaned against a building with writing on it.', 'Sentence2': 'Some people are at a historical park.', 'gold_label': 'neutral', 'prediction': 'neutr

In [5]:

path = "../data/esnli/dev_sample_entailment_joined.jsonl"
path2 = "../data/esnli/dev_sample_entailment_negation_h_joined.jsonl"
# path = "../data/esnli/dev_sample_neutral_joined.jsonl"
# path2 = "../data/esnli/dev_sample_neutral_negation_h_joined.jsonl"
# path = "../data/esnli/dev_small_negation_h.jsonl"
# path2 = "../data/esnli/dev_small_negation_h_counterfactual.jsonl"
import ast

dneg = []
for line in open(path):
#     print(line)
    dneg.append(ast.literal_eval(line.strip()))
    
cneg = []
for line in open(path2):
#     print(line)
    cneg.append(ast.literal_eval(line.strip()))

from scipy.stats import entropy
from scipy.special import softmax

def dkl(p,q):
    p = p / p.sum(axis=1).reshape(-1, 1)
    q = q / q.sum(axis=1).reshape(-1, 1)
    return entropy(p,q, axis=1) + entropy(q,p, axis=1)

avg = 0

entailment_encodings = []
entailment_counterfactual_encodings = []

assert len(cneg)==len(dneg)
print("Numer of examples: ", len(cneg))

count = 0

for ex,ex2 in zip(dneg,cneg):
    print(ex)
    out = predictor.predict_json(ex)
#     print('Prediction:', )
    out2 = predictor.predict_json(ex2)
#     if out2['label'] != 'contradiction':
#         continue
    count += 1
    print('Revised sentence:', ex2['Sentence2'])
    print(out['label'], '-->', out2['label'])
    avg += out['probs'][1] - out2['probs'][1]
#     dkl_probs = dkl(np.array([out['probs']]), np.array([out2['probs']]))
#     avg += dkl_probs
#     print(dkl_probs)
    print(f"{out['probs'][1] * 100:.2f}% --> {out2['probs'][1] * 100:.2f}%. (e: {out2['probs'][0] * 100:.2f}%, n:{out2['probs'][2] * 100:.2f}%)")
    
    entailment_encodings.append(out['encoded_representations'])
    entailment_counterfactual_encodings.append(out2['encoded_representations'])
#     if out['label'] != out2['label']:
#         print("========= HITTTTTT")
# avg = avg[0]
avg = avg / count
print(f"{avg*100:.2f}%")
print(count)


Numer of examples:  90
{'Sentence1': 'A little girl with brown hair is blowing the petals off of a flower.', 'Sentence2': 'A little girl is playing with flowers.', 'gold_label': 'entailment', 'prediction': 'entailment'}
Revised sentence: A little girl is not playing without flowers.
entailment --> entailment
0.13% --> 9.23%. (e: 62.80%, n:27.96%)
{'Sentence1': 'Three men excavating what appears to be a mountainside.', 'Sentence2': 'Three men excavating a mountainside outdoors.', 'gold_label': 'entailment', 'prediction': 'entailment'}
Revised sentence: None of the three men are not excavating a mountainside outdoors.
entailment --> neutral
0.18% --> 4.36%. (e: 5.38%, n:90.25%)
{'Sentence1': 'A dog chasing a deer in a field with yellow flowers.', 'Sentence2': 'A dog is running outside.', 'gold_label': 'entailment', 'prediction': 'entailment'}
Revised sentence: A dog is running not inside.
entailment --> contradiction
0.33% --> 44.85%. (e: 30.20%, n:24.95%)
{'Sentence1': 'A man stands on 

Revised sentence: A woman sings on stage.
entailment --> entailment
3.00% --> 3.00%. (e: 83.77%, n:13.22%)
{'Sentence1': 'A young man is using gloves to control a device with wires in it.', 'Sentence2': 'A gloved young man is working on a wired device.', 'gold_label': 'entailment', 'prediction': 'entailment'}
Revised sentence: A gloved young man is working on a not wireless device.
entailment --> contradiction
0.05% --> 72.63%. (e: 2.69%, n:24.68%)
{'Sentence1': 'A man, dressed in a green shirt with black pants, is transporting luggage strapped to a wheelbarrow, down a cobblestone road.', 'Sentence2': 'The shirt is green.', 'gold_label': 'entailment', 'prediction': 'entailment'}
Revised sentence: The shirt's color is not different from green.
entailment --> neutral
0.29% --> 34.25%. (e: 18.93%, n:46.82%)
{'Sentence1': 'Children with painted red faces being sprayed with water, on grass.', 'Sentence2': 'Children have painted faces.', 'gold_label': 'entailment', 'prediction': 'entailment'

Revised sentence: a woman is walking, not sitting still
entailment --> entailment
0.12% --> 0.07%. (e: 92.44%, n:7.48%)
{'Sentence1': 'A woman in the street with a black shirt and stars with her mouth open.', 'Sentence2': 'There is someone outside.', 'gold_label': 'entailment', 'prediction': 'entailment'}
Revised sentence: There isn't no one outside
entailment --> contradiction
0.57% --> 99.91%. (e: 0.02%, n:0.07%)
{'Sentence1': 'A downtown shot of a man on his cellphone under an awning.', 'Sentence2': 'A man talks on a phone in a city.', 'gold_label': 'entailment', 'prediction': 'entailment'}
Revised sentence: A man talks on a phone in a city, not a village.
entailment --> entailment
0.07% --> 0.35%. (e: 72.13%, n:27.52%)
{'Sentence1': 'A person with a dark yellow baseball cap and a gray sweatshirt is in the air over an outdoor fountain in front of a cityscape.', 'Sentence2': 'The person is wearing a baseball cap.', 'gold_label': 'entailment', 'prediction': 'entailment'}
Revised sente

Revised sentence: A girl, not a boy, is sitting.
entailment --> entailment
0.07% --> 0.33%. (e: 92.41%, n:7.26%)
{'Sentence1': 'A young puppy suckles from his mother.', 'Sentence2': "A young puppy lays at it's mothers stomach.", 'gold_label': 'entailment', 'prediction': 'entailment'}
Revised sentence: A not so old puppy lays at it's mothers stomach.
entailment --> entailment
3.78% --> 1.13%. (e: 87.39%, n:11.48%)
{'Sentence1': 'A man in a black jacket is talking with two blond women as they stand near a car in the sunshine.', 'Sentence2': 'A man talks.', 'gold_label': 'entailment', 'prediction': 'entailment'}
Revised sentence: A man is not quiet.
entailment --> entailment
0.22% --> 3.14%. (e: 71.95%, n:24.91%)
{'Sentence1': 'Two dogs run around inside a fence.', 'Sentence2': 'Two dogs are inside a fence.', 'gold_label': 'entailment', 'prediction': 'entailment'}
Revised sentence: Two dogs are not outside a fence.
entailment --> entailment
0.13% --> 1.72%. (e: 96.17%, n:2.11%)
{'Sentence

In [65]:
entailment_encodings = np.array(entailment_encodings)
entailment_counterfactual_encodings = np.array(entailment_counterfactual_encodings)
neutral_encodings = np.array(neutral_encodings)
neutral_counterfactual_encodings = np.array(neutral_counterfactual_encodings)

In [80]:
fact = 'entailment'
foil = 'contradiction'

import nullspace_projection.src.debias as debias

fact_idx = label2index[fact]
foil_idx = label2index[foil]
print('fact:', index2label[fact_idx])
print('foil:', index2label[foil_idx])
num_classifiers = 100

classifier_w = np.load(f"../s3-link/experiments/models/mnli/roberta-large/w.npy")
classifier_b = np.load(f"../s3-link/experiments/models/mnli/roberta-large/b.npy")

# contrastive_projection = np.load(model_path + f"/projections/{num_classifiers}/contrastive_{max(fact_idx, foil_idx)}_{min(fact_idx, foil_idx)}.npy")
# ambiguous_projection = np.load(model_path + f"/projections/{num_classifiers}/ambiguous_{max(fact_idx, foil_idx)}_{min(fact_idx, foil_idx)}.npy")
# classifiers = np.load(model_path + f"/projections/{num_classifiers}/classifiers_{max(fact, foil)}_{min(fact, foil)}.npy")
# classifiers = classifiers.reshape(num_classifiers, enc_size)
contrastive_projection = debias.get_rowspace_projection((classifier_w[fact_idx] - classifier_w[foil_idx]).reshape(1, -1))


fact: entailment
foil: contradiction


In [81]:
from scipy.special import softmax

neutral_encodings @ contrastive_projection
prediction_probabilities = softmax(entailment_encodings @ contrastive_projection @ classifier_w.T + classifier_b, axis=1) 
prediction_probabilities_counter = softmax(entailment_counterfactual_encodings @ contrastive_projection @ classifier_w.T + classifier_b, axis=1)

print(prediction_probabilities.shape)
print((prediction_probabilities - prediction_probabilities_counter)[:, 1].mean())


(20, 3)
-0.39690640654195997


In [6]:
index2label

{0: 'entailment', 1: 'contradiction', 2: 'neutral'}