<a href="https://colab.research.google.com/github/ashweta1/interp/blob/main/cs230_visualizing_attention.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install bertviz

Collecting bertviz
  Downloading bertviz-1.4.0-py3-none-any.whl.metadata (19 kB)
Collecting boto3 (from bertviz)
  Downloading boto3-1.35.60-py3-none-any.whl.metadata (6.7 kB)
Collecting botocore<1.36.0,>=1.35.60 (from boto3->bertviz)
  Downloading botocore-1.35.60-py3-none-any.whl.metadata (5.7 kB)
Collecting jmespath<2.0.0,>=0.7.1 (from boto3->bertviz)
  Downloading jmespath-1.0.1-py3-none-any.whl.metadata (7.6 kB)
Collecting s3transfer<0.11.0,>=0.10.0 (from boto3->bertviz)
  Downloading s3transfer-0.10.3-py3-none-any.whl.metadata (1.7 kB)
Downloading bertviz-1.4.0-py3-none-any.whl (157 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m157.6/157.6 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading boto3-1.35.60-py3-none-any.whl (139 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.2/139.2 kB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading botocore-1.35.60-py3-none-any.whl (12.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [4]:
from transformers import GPT2Model, GPT2Config, GPT2Tokenizer
from transformers import BertTokenizer, BertModel
from bertviz import head_view, model_view, neuron_view

def get_gpt2_model():
    tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
    model = GPT2Model.from_pretrained('gpt2',
                                      attn_implementation='eager',
                                      output_attentions=True)
    return model, tokenizer

def get_input_tokens(tokenizer, input_text):
    inputs = tokenizer(input_text, return_tensors='pt')
    input_ids = inputs['input_ids']
    tokens = tokenizer.convert_ids_to_tokens(input_ids[0].tolist())
    return inputs, tokens

def get_attention(model, inputs):
    outputs = model(**inputs, output_attentions=True)
    attentions = outputs.attentions
    return attentions

def get_attention_and_tokens(model, tokenizer, input_text):
    inputs, tokens = get_input_tokens(tokenizer, input_text)
    attentions = get_attention(model, inputs)
    return attentions, tokens

def get_bert_model():
    model = BertModel.from_pretrained('bert-base-uncased',
                                      output_attentions=True)
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    return model, tokenizer

# input_text = "The best paper on transformers and attention is 'Attention is all you need'. The url to the paper is https://arxiv.org/abs/1706.03762"
input_text = "David Beckham plays the sport of soccer. Michael Jordan plays the sport of"

In [6]:
gpt2_model, gpt2_tokenizer = get_gpt2_model()
bert_model, bert_tokenizer = get_bert_model()

In [7]:
# visualize https://colab.research.google.com/drive/1hXIQ77A4TYS4y3UthWF-Ci7V7vVUoxmQ?usp=sharing#scrollTo=aR07__FyOf8a
gpt2_attentions, gpt2_tokens = get_attention_and_tokens(gpt2_model, gpt2_tokenizer, input_text)
head_view(gpt2_attentions, gpt2_tokens)

<IPython.core.display.Javascript object>

In [8]:
model_view(gpt2_attentions, gpt2_tokens)

<IPython.core.display.Javascript object>

In [10]:
#neuron_view.show(gpt2_model, 'gpt2', gpt2_tokenizer, input_text, layer=4, head=3)

In [11]:
attentions, tokens = get_attention_and_tokens(bert_model, bert_tokenizer, input_text)
head_view(attentions, tokens)
model_view(attentions, tokens)



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>