In [1]:
import numpy as np
from allennlp_models import pretrained
from allennlp.predictors import Predictor
from allennlp.interpret.saliency_interpreters import SimpleGradient, IntegratedGradient, SmoothGradient
from tqdm import tqdm
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go

  from .autonotebook import tqdm as notebook_tqdm


In [15]:
print(pretrained.get_pretrained_models())

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
lerc is not a registered model.


{'roberta-sst': <allennlp.common.model_card.ModelCard object at 0x7f1e5b66c7c0>, 'pair-classification-binary-gender-bias-mitigated-roberta-snli': <allennlp.common.model_card.ModelCard object at 0x7f1e58ce7be0>, 'semparse-nlvr': <allennlp.common.model_card.ModelCard object at 0x7f1e58d1d8a0>, 'mc-roberta-swag': <allennlp.common.model_card.ModelCard object at 0x7f1e59273c40>, 'tagging-fine-grained-crf-tagger': <allennlp.common.model_card.ModelCard object at 0x7f1e58d1e3b0>, 'vgqa-vilbert': <allennlp.common.model_card.ModelCard object at 0x7f1e58d1feb0>, 'rc-nmn': <allennlp.common.model_card.ModelCard object at 0x7f1e58d1f850>, 'mc-roberta-piqa': <allennlp.common.model_card.ModelCard object at 0x7f1e58d34130>, 'glove-sst': <allennlp.common.model_card.ModelCard object at 0x7f1e58ce7e50>, 'coref-spanbert': <allennlp.common.model_card.ModelCard object at 0x7f1e58d36110>, 'lm-masked-language-model': <allennlp.common.model_card.ModelCard object at 0x7f1e58d37220>, 'pair-classification-esim': <

In [2]:
predictor = pretrained.load_predictor("lm-next-token-lm-gpt2")

lerc is not a registered model.


In [21]:
sample_text = "Toronto Raptors, who are currently tied for the league leader in wins"
predictor.predict(sample_text)

{'token_ids': [31359,
  30836,
  11,
  508,
  389,
  3058,
  8165,
  329,
  262,
  4652,
  3554,
  287,
  7864],
 'top_indices': [[11], [351], [357], [13], [290]],
 'probabilities': [0.29245299100875854,
  0.13100479543209076,
  0.12022867053747177,
  0.08953401446342468,
  0.07160692662000656],
 'top_tokens': [[','], ['Ġwith'], ['Ġ('], ['.'], ['Ġand']],
 'tokens': ['Toronto',
  'ĠRaptors',
  ',',
  'Ġwho',
  'Ġare',
  'Ġcurrently',
  'Ġtied',
  'Ġfor',
  'Ġthe',
  'Ġleague',
  'Ġleader',
  'Ġin',
  'Ġwins']}

In [7]:
tokenizer = predictor._dataset_reader._tokenizer.tokenizer

In [3]:
class AutoRegressiveGPT():
    def __init__(self, predictor: Predictor, interpreter: Predictor):
        self.predictor = predictor
        self.interpreter = interpreter
    
    def generate(self, inputs, max_length: int =10):
        for _ in tqdm(range(max_length)):
            outputs = predictor.predict(inputs)
            probabilities = np.array(outputs["probabilities"])
            probabilities = probabilities / probabilities.sum()
            top_tokens = outputs["top_tokens"]
            top_tokens = np.array(top_tokens).reshape(-1)
            token = np.random.choice(top_tokens, 1, p=probabilities)[0]
            if token[0] == "Ġ" or token[0] == "Ċ":
                token = " "+token[1:]
            inputs += token
                
        return inputs
    
    def generate_and_saliency_score(self, inputs, max_length: int =10):
        grad_matrix = []
        for _ in tqdm(range(max_length)):
            outputs = predictor.predict(inputs)
            probabilities = np.array(outputs["probabilities"])
            probabilities = probabilities / probabilities.sum()
            top_tokens = outputs["top_tokens"]
            top_tokens = np.array(top_tokens).reshape(-1)
            token = np.random.choice(top_tokens, 1, p=probabilities)[0]
            if token[0] == "Ġ" or token[0] == "Ċ":
                token = " "+token[1:]
            
            interpretation = self.interpreter.saliency_interpret_from_json({"sentence": inputs})
            grads = np.array(interpretation['instance_1']['grad_input_1'])
            grad_matrix.append(grads)
            
            inputs += token
                
        return inputs, grad_matrix
    
    def get_score_from_grad_matrix(self, grad_matrix: list[list]):
        n_inputs = len(grad_matrix[0])
        n_outputs = len(grad_matrix)

        scores = np.zeros((n_inputs, n_outputs))

        for n in tqdm(range(n_outputs)):
            if n == 0:
                scores[:, n] = grad_matrix[n]
            else:
                sum_ = 0
                for j in range(1,n+1):
                    sum_ += scores[:, n-j]*grad_matrix[n][-j]
                scores[:, n] = grad_matrix[n][:n_inputs] + sum_
        return scores
    
    def get_scores_from_text(self, text: str, max_length: int = 10):
        result, grad_matrix = self.generate_and_saliency_score(text, max_length)
        scores = self.get_score_from_grad_matrix(grad_matrix)
        
        return result, scores


In [4]:
interpreter = SmoothGradient(predictor)
generator = AutoRegressiveGPT(predictor=predictor, interpreter=interpreter)

In [218]:
generator.generate("test text", max_length=4)

'test text. The "text'

In [220]:
result, grad_matrix = generator.generate_and_saliency_score("test text", max_length=5)

In [42]:
result, scores = generator.get_scores_from_text("Toronto Raptors, who are currently tied for the league leader in wins", max_length=10)


Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.

100%|██████████| 10/10 [00:45<00:00,  4.51s/it]
100%|██████████| 10/10 [00:00<00:00, 6230.40it/s]


In [43]:
tokens = tokenizer.encode(result)
words = [tokenizer.decode(token) for token in tokens]

In [44]:
words[:13]

['Toronto',
 ' Raptors',
 ',',
 ' who',
 ' are',
 ' currently',
 ' tied',
 ' for',
 ' the',
 ' league',
 ' leader',
 ' in',
 ' wins']

In [45]:
scores_round = np.round(scores, 3)

In [46]:
import plotly.express as px
fig = px.imshow(
        scores_round,
        text_auto=True,
        y=words[:13],
        x=words[13:],
        color_continuous_scale='Greys',
        width=1200, height=1200
        )

fig.update_yaxes(autorange="reversed")
fig.update_xaxes(side="top")
fig.update_layout(
yaxis = dict(
tickfont = dict(size=20)))
fig.update_layout(
xaxis = dict(
tickfont = dict(size=20)))

fig.show()

In [274]:
scores.shape

(13, 10)

In [257]:
pd.DataFrame(scores).to_csv("scores.csv", index=False)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [223]:
n_inputs = len(grad_matrix[0])
n_outputs = len(grad_matrix)

scores = np.zeros((n_inputs, n_outputs))

for n in range(n_outputs):
    if n == 0:
        scores[:, n] = grad_matrix[n]
    else:
        sum_ = 0
        for j in range(1,n+1):
            sum_ += scores[:, n-j]*grad_matrix[n][-j]
        scores[:, n] = grad_matrix[n][:n_inputs] + sum_
            
    

In [224]:
scores

array([[0.6       , 0.25882353, 0.61960784, 0.5708061 , 0.5222658 ],
       [0.4       , 0.74117647, 0.38039216, 0.4291939 , 0.4777342 ]])

In [225]:
scores.sum(axis=0)

array([1., 1., 1., 1., 1.])

In [177]:
plt.rcParams['figure.figsize']=7, 7
fig = plt.figure()

ax = fig.add_subplot(111)


array([0.19875776, 0.04968944, 0.09937888, 0.1552795 , 0.07453416,
       0.02173913, 0.1242236 , 0.07453416, 0.0621118 , 0.02795031,
       0.03726708, 0.02484472, 0.04968944])

In [164]:
n, n_inputs

(14, 15)

In [144]:
grad_matrix[0]

array([0.03755869, 0.07511737, 0.        , 0.05633803, 0.18779343,
       0.05399061, 0.15023474, 0.11267606, 0.11267606, 0.0258216 ,
       0.07511737, 0.03755869, 0.07511737, 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        ])

In [143]:
grad_matrix.shape

(15, 27)

In [142]:
max_len = len(grad_matrix[-1])
grad_matrix = np.array([np.pad(grad_vec, (0, max_len-len(grad_vec)), 'constant') for grad_vec in grad_matrix])

In [49]:
interpretation = interpreter.saliency_interpret_from_json({"sentence": sample_text})
grads = np.array(interpretation['instance_1']['grad_input_1'])

outputs = predictor.predict(sample_text)
probs = np.array(outputs['probabilities'])



In [63]:
outputs

{'token_ids': [31359,
  30836,
  11,
  508,
  389,
  3058,
  8165,
  329,
  262,
  4652,
  3554,
  287,
  7864],
 'top_indices': [[11], [351], [357], [13], [290]],
 'probabilities': [0.29245299100875854,
  0.13100479543209076,
  0.12022867053747177,
  0.08953401446342468,
  0.07160692662000656],
 'top_tokens': [[','], ['Ġwith'], ['Ġ('], ['.'], ['Ġand']],
 'tokens': ['Toronto',
  'ĠRaptors',
  ',',
  'Ġwho',
  'Ġare',
  'Ġcurrently',
  'Ġtied',
  'Ġfor',
  'Ġthe',
  'Ġleague',
  'Ġleader',
  'Ġin',
  'Ġwins']}

In [50]:
grads

array([0.23188406, 0.07729469, 0.        , 0.10628019, 0.03864734,
       0.03864734, 0.11594203, 0.17391304, 0.03864734, 0.00483092,
       0.01932367, 0.11594203, 0.03864734])

In [67]:
probs / probs.sum()

array([0.41492852, 0.18586791, 0.17057888, 0.1270297 , 0.10159498])

In [88]:
probabilities = np.array(outputs["probabilities"])
probabilities = probabilities / probabilities.sum()
top_tokens = outputs["top_tokens"]
top_tokens = np.array(top_tokens).reshape(-1)
token = np.random.choice(top_tokens, 1, p=probabilities)[0]

In [73]:
np.array(top_tokens).reshape(-1)

array([',', 'Ġwith', 'Ġ(', '.', 'Ġand'], dtype='<U5')

In [85]:
probabilities

array([0.41492852, 0.18586791, 0.17057888, 0.1270297 , 0.10159498])

In [91]:
if token[0] == "Ġ":
    token = token[1:]

In [92]:
token

'and'