In [4]:
import inseq

In [74]:
from transformers import AutoModelWithLMHead, AutoTokenizer,AutoModelForCausalLM
import torch
def get_next_token_probabilities(model, input_embeddings):
    with torch.no_grad():
        outputs = model(inputs_embeds=input_embeddings)
        logits = outputs.logits[:, -1, :]  # Get the logits for the last token
        probabilities = torch.softmax(logits, dim=-1)
    return probabilities

## testing soft suff

In [92]:
model = AutoModelWithLMHead.from_pretrained("gpt2-medium")
tokenizer = AutoTokenizer.from_pretrained("gpt2-medium")



In [116]:
input_string = [ "I landed in Greece and I was staying in the capital city of" ]
input_ids = tokenizer(input_string, return_tensors='pt')['input_ids'].to(model.device)

### let s test 3 different importance distribution 1. flat 2. random 3. empirical correct
importance_scores = torch.softmax(torch.log(torch.tensor([[ 0, 0, 0, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]], dtype=torch.float, device=model.device)), -1)
# and are removed heavily
strong_importance_scores = torch.softmax(torch.log(torch.tensor([[ 0, 0, 0, 0, 0.8, 0, 0, 0, 0, 0, 0.1, 0.1, 0]], dtype=torch.float, device=model.device)), -1)
# greece are removed heavily
correct_importance_scores = torch.softmax(torch.log(torch.tensor([[ 0, 0, 0, 0, 0, 0.8, 0, 0, 0, 0, 0.1, 0.1, 0]], dtype=torch.float, device=model.device)), -1)


input_wte = model.transformer.wte(input_ids)


In [117]:
next_token_probabilities = get_next_token_probabilities(model, input_wte)
next_token_id = torch.argmax(next_token_probabilities)
next_token = tokenizer.decode(next_token_id)

print("Next Token Probability Distribution over all probability:")
print(next_token_probabilities)
print(next_token_probabilities[:,next_token_id])
print("Next Token ID:", next_token_id)
print("Next Token:", next_token)
print(' ')

Next Token Probability Distribution over all probability:
tensor([[2.4524e-06, 2.8481e-06, 2.2075e-08,  ..., 1.6170e-09, 8.6779e-10,
         1.2370e-05]])
tensor([0.5257])
Next Token ID: tensor(21891)
Next Token:  Athens
 


In [118]:
embeddings_3rd = input_wte.size(2)
importance_scores = importance_scores.unsqueeze(2).repeat(1, 1, embeddings_3rd)
zeroout_mask = torch.bernoulli(1-importance_scores).to(model.device)
soft_input_wte = input_wte * zeroout_mask


next_token_probabilities = get_next_token_probabilities(model, soft_input_wte)
next_token_id = torch.argmax(next_token_probabilities)
next_token = tokenizer.decode(next_token_id)

print("flat importance distribution, should be less change".center(50, "-"))
print("Next Token Probability Distribution over all probability:")
print(next_token_probabilities)
print(next_token_probabilities[:,next_token_id])
print("Next Token ID:", next_token_id)
print("Next Token:", next_token)
print(' ')

flat importance distribution, should be less change
Next Token Probability Distribution over all probability:
tensor([[2.7316e-06, 3.8048e-06, 2.1670e-08,  ..., 7.7315e-10, 7.0193e-10,
         1.3444e-05]])
tensor([0.4927])
Next Token ID: tensor(21891)
Next Token:  Athens
 


In [119]:
embeddings_3rd = input_wte.size(2)
# using strong skewed importance score (incorrect one), unimportance ones are removed, important ones are keeeps, expect small changes
importance_scores = strong_importance_scores.unsqueeze(2).repeat(1, 1, embeddings_3rd)
zeroout_mask = torch.bernoulli(1-importance_scores).to(model.device)
soft_input_wte = input_wte * zeroout_mask


next_token_probabilities = get_next_token_probabilities(model, soft_input_wte)
next_token_id = torch.argmax(next_token_probabilities)
next_token = tokenizer.decode(next_token_id)

print("skewed importance distribution, should lead to big changes".center(50, "-"))
print("Next Token Probability Distribution over all probability:")
print(next_token_probabilities)
print(next_token_probabilities[:,next_token_id])
print("Next Token ID:", next_token_id)
print("Next Token:", next_token)
print(' ')

skewed importance distribution, should lead to big changes
Next Token Probability Distribution over all probability:
tensor([[4.7157e-06, 8.1449e-06, 2.7785e-08,  ..., 2.7829e-10, 2.2713e-10,
         2.4398e-05]])
tensor([0.2394])
Next Token ID: tensor(21891)
Next Token:  Athens
 


In [120]:
print("correct importance distribution, should lead to small changes".center(50, "-"))
embeddings_3rd = input_wte.size(2)
# using strong skewed importance score (incorrect one), unimportance ones are removed, important ones are keeeps, expect small changes
importance_scores = correct_importance_scores.unsqueeze(2).repeat(1, 1, embeddings_3rd)
zeroout_mask = torch.bernoulli(1-importance_scores).to(model.device)
soft_input_wte = input_wte * zeroout_mask


next_token_probabilities = get_next_token_probabilities(model, soft_input_wte)
next_token_id = torch.argmax(next_token_probabilities)
next_token = tokenizer.decode(next_token_id)


print("Next Token Probability Distribution over all probability:")
print(next_token_probabilities)
print(next_token_probabilities[:,next_token_id])
print("Next Token ID:", next_token_id)
print("Next Token:", next_token)
print(' ')

correct importance distribution, should lead to small changes
Next Token Probability Distribution over all probability:
tensor([[3.2545e-06, 3.6667e-06, 2.4394e-08,  ..., 1.1526e-09, 6.0725e-10,
         1.5459e-05]])
tensor([0.3688])
Next Token ID: tensor(21891)
Next Token:  Athens
 


In [84]:
# change "I" to "You", change "Greece" to "nowhere"
masked_input_string = [ "You landed in nowhere and I was staying in the capital city of" ]
masked_input_ids = tokenizer(masked_input_string, return_tensors='pt')['input_ids'].to(model.device)
masked_input_wte = model.transformer.wte(masked_input_ids)

next_token_probabilities = get_next_token_probabilities(model, masked_input_wte)
next_token_id = torch.argmax(next_token_probabilities)
next_token = tokenizer.decode(next_token_id)

print("Next Token Probability Distribution over all probability:")
print(next_token_probabilities)
print(next_token_probabilities.size())
print(next_token_probabilities[:,next_token_id])
print("the probability of 'Athens'", next_token_probabilities[:,21891])
print("Next Token ID:", next_token_id)
print("Next Token:", next_token)

Next Token Probability Distribution over all probability:
tensor([[2.0519e-05, 1.7326e-05, 2.0323e-07,  ..., 6.2910e-08, 5.7693e-08,
         2.1967e-05]])
torch.Size([1, 50257])
tensor([0.0964])
the probability of 'Athens' tensor([0.0026])
Next Token ID: tensor(262)
Next Token:  the


In [85]:
masked_input_string = [ "You landed in Greece and I was staying in the capital city of" ]
masked_input_ids = tokenizer(masked_input_string, return_tensors='pt')['input_ids'].to(model.device)
masked_input_wte = model.transformer.wte(masked_input_ids)

next_token_probabilities = get_next_token_probabilities(model, masked_input_wte)
next_token_id = torch.argmax(next_token_probabilities)
next_token = tokenizer.decode(next_token_id)

print("Next Token Probability Distribution over all probability:")
print(next_token_probabilities)
print(next_token_probabilities.size())
print(next_token_probabilities[:,next_token_id])
print("the probability of 'Athens'", next_token_probabilities[:,21891])
print("Next Token ID:", next_token_id)
print("Next Token:", next_token)



Next Token Probability Distribution over all probability:
tensor([[2.1874e-06, 1.7047e-06, 1.1743e-08,  ..., 1.7569e-09, 6.3192e-10,
         8.1974e-06]])
torch.Size([1, 50257])
tensor([0.8049])
the probability of 'Athens' tensor([0.8049])
Next Token ID: tensor(21891)
Next Token:  Athens


## testing package inseq

In [14]:
Model = "facebook/bart-base"
'''
"decapoda-research/llama-7b-hf"
"Helsinki-NLP/opus-mt-en-fr" 
"facebook/bart-base"  558M
'''

FA = "integrated_gradients"
'''
saliency input_x_gradient integrated_gradients deeplift gradient_shap discretized_integrated_gradients
attention
occlusion lime
'''

text = "when my flight landed in Greece, I converted my currency and slowly fall asleep. I was staying in the capital of"

In [26]:
model = inseq.load_model("gpt2", "input_x_gradient") 
out = model.attribute(text, generation_args={"max_new_tokens": 1}, n_steps=500, internal_batch_size=50, attribute_target=True,)
out.show()

attribute_target parameter is set to True, but will be ignored (not an encoder-decoder).
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Unused arguments during attribution: {'n_steps': 500, 'internal_batch_size': 50}
Attributing with input_x_gradient...: 100%|██████████| 45/45 [00:00<00:00, 52.08it/s]


Unnamed: 0_level_0,ĠTokyo
When,0.023
Ġmy,0.017
Ġflight,0.028
Ġlanded,0.019
Ġin,0.034
ĠJapan,0.077
",",0.058
ĠI,0.008
Ġconverted,0.014
Ġmy,0.006
Ġcurrency,0.041
Ġand,0.023
Ġslowly,0.01
Ġfell,0.007
Ġasleep,0.017
.,0.029
Ġ(,0.019
I,0.005
Ġhad,0.005
Ġa,0.012
Ġterrifying,0.013
Ġdream,0.012
Ġabout,0.008
Ġmy,0.006
Ġgrandmother,0.023
",",0.01
Ġbut,0.004
Ġthat,0.005
âĢ,0.023
Ļ,0.009
s,0.005
Ġa,0.011
Ġstory,0.017
Ġfor,0.007
Ġanother,0.005
Ġtime,0.007
).,0.016
ĠI,0.015
Ġwas,0.027
Ġstaying,0.031
Ġin,0.023
Ġthe,0.039
Ġcapital,0.183
Ġof,0.049
ĠTokyo,Unnamed: 1_level_45


In [24]:
model = inseq.load_model("gpt2", "saliency") # saliency integrated_gradients
out = model.attribute(text, generation_args={"max_new_tokens": 1}, n_steps=500, internal_batch_size=50, attribute_target=True,)
out.show()

attribute_target parameter is set to True, but will be ignored (not an encoder-decoder).
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Unused arguments during attribution: {'n_steps': 500, 'internal_batch_size': 50}
Attributing with saliency...: 100%|██████████| 45/45 [00:00<00:00, 49.57it/s]


Unnamed: 0_level_0,ĠTokyo
When,0.036
Ġmy,0.02
Ġflight,0.031
Ġlanded,0.02
Ġin,0.029
ĠJapan,0.081
",",0.028
ĠI,0.008
Ġconverted,0.014
Ġmy,0.008
Ġcurrency,0.043
Ġand,0.013
Ġslowly,0.012
Ġfell,0.008
Ġasleep,0.017
.,0.016
Ġ(,0.014
I,0.006
Ġhad,0.006
Ġa,0.009
Ġterrifying,0.016
Ġdream,0.012
Ġabout,0.008
Ġmy,0.007
Ġgrandmother,0.023
",",0.01
Ġbut,0.006
Ġthat,0.008
âĢ,0.02
Ļ,0.009
s,0.006
Ġa,0.01
Ġstory,0.019
Ġfor,0.008
Ġanother,0.006
Ġtime,0.008
).,0.02
ĠI,0.015
Ġwas,0.025
Ġstaying,0.033
Ġin,0.028
Ġthe,0.038
Ġcapital,0.182
Ġof,0.065
ĠTokyo,Unnamed: 1_level_45


In [27]:
model = inseq.load_model("facebook/bart-base", "saliency")
out = model.attribute(text, generation_args={"max_new_tokens": 1}, n_steps=500, internal_batch_size=50)
out.show()

Unused arguments during attribution: {'n_steps': 500, 'internal_batch_size': 50}
Attributing with saliency...: 100%|██████████| 3/3 [00:00<00:00, 57.37it/s]


Unnamed: 0_level_0,<s>,</s>
<s>,0.007,0.012
When,0.01,0.126
Ġmy,0.006,0.022
Ġflight,0.013,0.028
Ġlanded,0.016,0.024
Ġin,0.007,0.016
ĠJapan,0.019,0.062
",",0.012,0.016
ĠI,0.012,0.017
Ġconverted,0.038,0.031
Ġmy,0.014,0.02
Ġcurrency,0.044,0.048
Ġand,0.023,0.014
Ġslowly,0.019,0.02
Ġfell,0.014,0.015
Ġasleep,0.017,0.019
.,0.029,0.027
Ġ(,0.049,0.031
I,0.013,0.015
Ġhad,0.018,0.012
Ġa,0.011,0.01
Ġterrifying,0.02,0.022
Ġdream,0.021,0.019
Ġabout,0.012,0.01
Ġmy,0.008,0.012
Ġgrandmother,0.017,0.019
",",0.017,0.012
Ġbut,0.025,0.01
Ġthat,0.043,0.012
âĢ,0.024,0.015
Ļ,0.024,0.026
s,0.053,0.012
Ġa,0.035,0.01
Ġstory,0.042,0.018
Ġfor,0.023,0.009
Ġanother,0.025,0.009
Ġtime,0.023,0.01
).,0.029,0.031
ĠI,0.012,0.022
Ġwas,0.012,0.014
Ġstaying,0.011,0.016
Ġin,0.008,0.011
Ġthe,0.009,0.013
Ġcapital,0.024,0.022
Ġof,0.051,0.031
</s>,0.04,0.034


In [1]:

model = inseq.load_model("Helsinki-NLP/opus-mt-en-fr", "integrated_gradients")
out = model.attribute(
    "The developer argued with the designer because she did not like the design.",
    n_steps=300,
    return_convergence_delta=True,
    step_scores=["probability"],
)
out.show()

  from .autonotebook import tqdm as notebook_tqdm
2023-07-17 08:57:10.228590: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-07-17 08:57:10.443332: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Downloading (…)lve/main/config.json: 100%|██████████| 1.42k/1.42k [00:00<00:00, 1.87MB/s]
Downloading pytorch_model.bin: 100%|██████████| 301M/301M [00:24<00:00, 12.1MB/s] 
Downloading (…)neration_config.json: 100%|██████████| 293/293 [00:00<00:00, 273kB/s]
Downloading (…)okenizer_config.json: 100%|██████████| 42.0/42.0 [0

Unnamed: 0_level_0,▁Le,▁développeur,▁s,',est,▁disputé,▁avec,▁la,▁designer,▁parce,▁qu,',elle,▁n,',aimait,▁pas,▁le,▁design,.,</s>
▁The,0.157,0.076,0.047,0.095,0.055,0.042,0.111,0.022,0.023,0.03,0.029,0.042,0.069,0.017,0.045,0.026,0.033,0.053,0.027,0.023,0.07
▁developer,0.312,0.415,0.099,0.086,0.093,0.09,0.079,0.099,0.075,0.099,0.037,0.084,0.059,0.026,0.057,0.043,0.03,0.077,0.105,0.036,0.072
▁argued,0.085,0.088,0.407,0.189,0.19,0.414,0.221,0.044,0.059,0.101,0.038,0.108,0.04,0.021,0.078,0.042,0.025,0.076,0.079,0.033,0.05
▁with,0.025,0.03,0.104,0.06,0.05,0.114,0.228,0.032,0.04,0.059,0.022,0.04,0.03,0.015,0.033,0.026,0.023,0.041,0.029,0.031,0.043
▁the,0.035,0.049,0.054,0.047,0.032,0.054,0.09,0.095,0.111,0.273,0.026,0.057,0.024,0.015,0.027,0.033,0.019,0.04,0.038,0.047,0.038
▁designer,0.046,0.072,0.05,0.059,0.036,0.054,0.045,0.293,0.413,0.104,0.062,0.093,0.038,0.029,0.057,0.039,0.038,0.138,0.118,0.072,0.061
▁because,0.038,0.025,0.041,0.053,0.055,0.036,0.035,0.054,0.043,0.125,0.114,0.092,0.073,0.037,0.046,0.045,0.075,0.029,0.028,0.026,0.06
▁she,0.142,0.056,0.034,0.071,0.059,0.038,0.054,0.186,0.073,0.043,0.289,0.084,0.385,0.045,0.063,0.046,0.047,0.039,0.051,0.039,0.068
▁did,0.036,0.031,0.027,0.052,0.211,0.024,0.027,0.035,0.028,0.033,0.182,0.12,0.112,0.163,0.14,0.179,0.083,0.027,0.044,0.028,0.042
▁not,0.017,0.019,0.026,0.041,0.11,0.025,0.022,0.023,0.016,0.024,0.074,0.059,0.048,0.388,0.186,0.141,0.234,0.026,0.034,0.033,0.056
▁like,0.027,0.027,0.024,0.078,0.036,0.027,0.026,0.041,0.019,0.026,0.054,0.047,0.037,0.159,0.108,0.218,0.159,0.051,0.064,0.069,0.063
▁the,0.029,0.036,0.022,0.041,0.023,0.022,0.017,0.028,0.031,0.022,0.026,0.044,0.021,0.035,0.055,0.065,0.062,0.081,0.084,0.342,0.061
▁design,0.032,0.049,0.038,0.038,0.026,0.039,0.018,0.027,0.045,0.024,0.02,0.032,0.033,0.028,0.031,0.036,0.115,0.233,0.252,0.088,0.086
.,0.019,0.028,0.026,0.091,0.023,0.022,0.027,0.02,0.025,0.035,0.028,0.099,0.03,0.022,0.073,0.06,0.056,0.086,0.047,0.134,0.231
</s>,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
deltas,0.0,-0.0,0.0,0.0,-0.0,-0.0,-0.004,0.001,0.008,0.001,0.0,0.0,0.0,-0.0,0.0,0.0,0.0,0.0,0.0,-0.0,0.0
probability,0.528,0.556,0.335,0.897,0.878,0.526,0.804,0.521,0.335,0.581,0.844,0.906,0.9,0.836,0.909,0.83,0.894,0.513,0.708,0.889,0.903
