## Generate dataset

Download the Wikipedia dataset + randomly sample sentences as input

### installs

In [None]:
!pip install datasets # run if needed

### code

In [2]:
from datasets import load_dataset
import random

# load dataset
dataset = load_dataset("wiki_auto")

# remove data points with no examples
data = [dataset['part_1'][i]['normal']['normal_article_content']['normal_sentence'] for i in range(len(dataset['part_1'])) if len(dataset['part_1'][i]['normal']['normal_article_content']['normal_sentence']) != 0] 

# choose 1000 random data points to sample from
data_len = len(data)
num_examples = 1000
random.seed(10) # set seed
indices = random.sample(range(data_len), num_examples)
print(indices[:10])

# get one sentence for each selected data point
sentences = []
for i in indices:
    # get all sentences associated with data point
    sentence_list = data[i]
    sentence = random.choice(sentence_list) # choose random sentence from list
    sentences.append(sentence)
# print(sentences[:10])

No config specified, defaulting to: wiki_auto/auto
Reusing dataset wiki_auto (/n/home10/cyeh/.cache/huggingface/datasets/wiki_auto/auto/1.0.0/eeac705719dc9aa2ff180571dfed6c6649588ccdfde8d45a47d2e47e5c5b93af)
100%|██████████| 2/2 [00:00<00:00, 43.47it/s]


[74894, 4270, 56215, 63250, 75771, 1944, 27013, 60631, 106603, 64395]


## Getting Q, K vectors

Follow Jesse Vig's method of extracting query + key info

In [3]:
# import methods from bertviz
from bertviz import neuron_view
from bertviz.transformers_neuron_view import BertModel, BertTokenizer

import numpy as np
import random
import string

# parameters
model_type = 'bert'
model_version = 'bert-base-uncased'
model = BertModel.from_pretrained(model_version, output_attentions=True)
tokenizer = BertTokenizer.from_pretrained(model_version, do_lower_case=True)

sentences_test = sentences[:10] # small sample to test out code with
num_heads = 12
num_layers = 12

# master dictionary for all values
attn_dict = {'left_text': [], 
             'right_text': [], 
             'positions': [],
             'normalized_positions': [],
             'sentences': [],
             'tokenized_sentences': [],
             'queries': [[[''.join(random.choices(string.ascii_letters, k=5))] for i in range(num_heads)] for j in range(num_layers)], 
             'keys': [[[''.join(random.choices(string.ascii_letters, k=5))] for i in range(num_heads)] for j in range(num_layers)],
             'attn': [[[''.join(random.choices(string.ascii_letters, k=5))] for i in range(num_heads)] for j in range(num_layers)],
             'dot_prod': [[[''.join(random.choices(string.ascii_letters, k=5))] for i in range(num_heads)] for j in range(num_layers)]}

for s in sentences:
    # call method from bertviz to get attention info
    s_dict = neuron_view.get_attention(model, model_type, tokenizer, s, include_queries_and_keys=True)['all']
    
    # append to master dictionary
    tokens = s_dict['left_text']
    attn_dict['left_text'].extend(tokens)
    attn_dict['right_text'].extend(s_dict['right_text'])
    
    for index in range(len(tokens)): # save position of token and tokenized sentences too
        attn_dict['positions'].append(index)
        attn_dict['normalized_positions'].append(index / (len(tokens) - 1))
        attn_dict['sentences'].append(s)
        attn_dict['tokenized_sentences'].append(' '.join(tokens))
        
    for i in range(num_heads): # updating cumulative q/k vectors + attn + dp
        for j in range(num_layers):
            q = attn_dict['queries'][i][j]
            k = attn_dict['keys'][i][j]
            a = attn_dict['attn'][i][j]
            d = attn_dict['dot_prod'][i][j]
            
            if len(q) == 1: # on first round, need to empty list (random string was placeholder)
                q.clear()
            query = s_dict['queries'][i][j]
            q.extend(query)
            np_query = np.array(query)
            
            if len(k) == 1:
                k.clear()
            key = s_dict['keys'][i][j]
            k.extend(key)
            np_key = np.array(key)
            
            if len(a) == 1:
                a.clear()
            a.extend(s_dict['attn'][i][j])
            
            if len(d) == 1:
                d.clear()
            dp = np.dot(np_query, np_key.transpose())
            d.extend(dp)

In [None]:
import pickle
# save dictionary to pickle file
with open('attn_dict_new.p', 'wb') as file:
    pickle.dump(attn_dict, file, protocol=pickle.HIGHEST_PROTOCOL)

## Make TSNE / UMAP Plots

Generating plots from query + key vectors

### installs

In [None]:
!pip install plotly

In [None]:
!pip install seaborn

In [None]:
!pip install umap-learn

### imports

In [4]:
# import
from sklearn.manifold import TSNE
from umap import UMAP
import pandas as pd
import time
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pickle

# ensure plots show up in jupyter
import plotly.express as px
import plotly.io as pio
pio.renderers.default = "iframe"

In [5]:
# load attn_dict back if pre-saved
attn_dict = pickle.load( open( "attn_dict.p", "rb" ) )

### helper methods

In [7]:
# produce corresponding key matrix from query matrix (e.g., for attention)
def k_matrix(q_matrix):
    # assumes for specific layer + head (e.g., queries[0][0])
    num_tokens = len(q_matrix)
    k_matrix = []
    i = 0
    while i < num_tokens:
        q = q_matrix[i]
        sent_length = len(q)
        for k_i in range(sent_length):
            k = []
            for q_i in range(sent_length):
                k.append(q_matrix[q_i + i][k_i])
            k_matrix.append(k)
        i += sent_length
    
    return k_matrix

# format sentences to be displayed in html plot
def fix_sentences(sentences, positions, types):
    new_sentences = []
    for sent, pos, t in zip(sentences, positions, types):
        s_arr = sent.split()
        s = ""
        for i in range(len(s_arr)):
            if i % 10 == 0 and i not in [0, len(s_arr) - 1]:
                s += "<br>" # add new line every 10 tokens
                
            if i == pos: # italicize  + color current token
                color = "#B6E1B9"
                if t == "key":
                    color = "#F6BA98"
                s += "<b style='color:" + color + "'>" + s_arr[i] + "</b>"
            else:
                s += s_arr[i]
                
            if s != len(s_arr) - 1:
                s += " " # add space back between each token
        new_sentences.append(s)
    
    return new_sentences

# convert data into pandas dataframe
def make_df(layer, head):
    df = pd.DataFrame()
    df['token'] = attn_dict['left_text'] + attn_dict['right_text'] # store tokens
    df['token'] = df['token'].str.lower() # convert to lowercase
    num_tokens = len(attn_dict['left_text'])
    
    df['type'] = ['query'] * num_tokens + ['key'] * num_tokens # store token type
    df['pos_int'] = attn_dict['positions'] * 2 # positions
    df['position'] = attn_dict['normalized_positions'] * 2
    
    # sentence itself
    df['sentence'] = fix_sentences(attn_dict['tokenized_sentences'], attn_dict['positions'], df['type'][:num_tokens]) + fix_sentences(attn_dict['tokenized_sentences'], attn_dict['positions'], df['type'][num_tokens:])

    # save attn info
    attn = attn_dict['attn'][layer][head]
    df['attn'] = attn + k_matrix(attn)
    dp = attn_dict['dot_prod'][layer][head]
    df['dot_prod'] = dp + k_matrix(dp)
    
    # extract q/k vectors
    queries = attn_dict['queries']
    keys = attn_dict['keys']
    vec_size = len(queries[layer][head][0])
    
    # norms
    norms_q = []
    norms_k = []
    for i in range(len(queries[layer][head])):
        q = queries[layer][head][i]
        k = keys[layer][head][i]
        norms_q.append(np.linalg.norm(q))
        norms_k.append(np.linalg.norm(k))
    df["norm"] = norms_q + norms_k

    for i in range(vec_size): # store q/k vector values
        qs = [queries[layer][head][j][i] for j in range(num_tokens)]
        ks = [keys[layer][head][j][i] for j in range(num_tokens)]
        df["f" + str(i)] = qs + ks # add to dataframe
        
    # comment out line below if want all 60k data points
    df = pd.concat([df.iloc[:5021], df.iloc[30070:30070+5021]]) # only get first X keys + queries
    return df

In [8]:
## TRANSLATING KEYS FOR EASIER COMPARISON
def find_q_means(df):
    # find mean of each feature in query embeddings
    df_queries = df.loc[df['type'] == 'query']
    df_queries = df_queries.iloc[:, 8:].copy()
    query_means = df_queries.mean(axis=0)
    return query_means

def find_k_means(df):
    # find mean of each feature in key embeddings
    df_keys = df.loc[df['type'] == 'key']
    df_keys = df_keys.iloc[:, 8:].copy()
    key_means = df_keys.mean(axis=0)
    return df_keys, key_means

def translate_keys(df, df_keys, query_means, key_means):
    # translate key vectors accordingly
    for i in range(64):
        col = "f" + str(i)
        new_key = df_keys[col] - key_means[col] + query_means[col]
        df.loc[df['type'] == 'key', col] = new_key
    return df

def translate_loop(df): 
    # whole translation loop
    query_means = find_q_means(df)
    df_keys, key_means = find_k_means(df)
    df = translate_keys(df, df_keys, query_means, key_means)
    return df

In [9]:
## TSNE AND UMAP
def run_tsne(df, layer, head): 
    # prepare data for feature plot
    df_sub = df.iloc[:, 8:].copy()
    df_subset = df_sub.values # only get feature cols
    
    # run TSNE
    # from: https://towardsdatascience.com/visualising-high-dimensional-datasets-using-pca-and-t-sne-in-python-8ef87e7915b
    time_start = time.time()
    tsne = TSNE(n_components=3, verbose=0, perplexity=100, n_iter=300, metric="cosine") # 3D
    # tsne = TSNE(n_components=2, verbose=0, perplexity=100, n_iter=300, metric="cosine") # 2D
    tsne_results = tsne.fit_transform(df_subset)
    # np.save("tsne/layer" + str(layer) + "_head" + str(head) + ".npy", tsne_results) # save tsne results too
    print('t-SNE done! Time elapsed: {} seconds'.format(time.time()-time_start))
    
    return tsne_results

def run_umap(df, layer, head):
    # prepare data for feature plot
    df_sub = df.iloc[:, 8:].copy()
    df_subset = df_sub.values # only get feature cols
    
    # run umap
    time_start = time.time()
    umap = UMAP(n_components=3, init='random', random_state=0, metric="cosine")
    # umap = UMAP(n_components=2, init='random', random_state=0, metric="cosine") # 2D
    umap_results = umap.fit_transform(df_subset)
    # np.save("umap/layer" + str(layer) + "_head" + str(head) + ".npy", umap_results) # save umap results too
    print('UMAP done! Time elapsed: {} seconds'.format(time.time()-time_start))
    
    return umap_results

In [10]:
## PLOT GENERATION
# add additional columns to df
def add_to_df(df, half):
    # positions (not normalized)
    df['pos_int'] = attn_dict['positions'][:half] * 2
    df['pos_int'] = df['pos_int'] + 1
    
    # length of sentence
    words = df['sentence'].str.split().str.len()
    df['length'] = words
    
    # corresponding color for queries/keys
    colors = []
    for t in df['type']:
        if t == "query":
            colors.append("#B6E1B9")
        else:
            colors.append("#F6BA98")
    df['color'] = colors
    df['norm'] = round(df['norm'], 2)

    return df

def make_fig(tsne_results, df, layer, head, plot_type, half):
    # plot TSNE / UMAP results with plotly
    # 3D version
    # fig = px.scatter_3d(
    #     tsne_results[half:], x=0, y=1, z=2,
    #     color=df.norm[half:], labels={'color': 'normalized position'}, color_continuous_scale=px.colors.sequential.Burgyl,
    #     title=plot_type + ' Plot for BERT (Layer ' + str(layer) + ', Head ' + str(head) + ')',
    #     height=800,
    #     opacity=0.5
    # )
    
    # 2D version
    fig = px.scatter(
        tsne_results[half:], x=0, y=1,
        color=df.position[half:], labels={'color': 'normalized position'}, color_continuous_scale=px.colors.sequential.Burgyl,
        title=plot_type + ' Plot for BERT (Layer ' + str(layer) + ', Head ' + str(head) + ')',
        height=800,
        opacity=0.5
    )
    
    # 3D version
    # fig2 = px.scatter_3d(
    #     tsne_results[:half], x=0, y=1, z=2, 
    #     color=df.position[:half], labels={'color': ''}, color_continuous_scale=px.colors.sequential.Blugrn,
    #     title=plot_type + ' Plot for BERT (Layer ' + str(layer) + ', Head ' + str(head) + ')',
    #     height=800,
    #     opacity=0.5
    # )
    fig2 = px.scatter(
        tsne_results[:half], x=0, y=1, 
        color=df.position[:half], labels={'color': ''}, color_continuous_scale=px.colors.sequential.Blugrn,
        title=plot_type + ' Plot for BERT (Layer ' + str(layer) + ', Head ' + str(head) + ')',
        height=800,
        opacity=0.5
    )
    
    # add second trace to include 2 color scales (1st is key, 2nd is query)
    fig.layout.coloraxis2 = fig2.layout.coloraxis
    fig.add_trace(fig2.data[0])
    fig['data'][1]['marker'] = {    'color' : df['position'][:half],
                                    'coloraxis' : 'coloraxis2',
                                    'opacity' : 0.5
                                }
    # formatting things
    fig.layout.coloraxis.colorbar.x = 1.05
    fig.layout.coloraxis.colorbar.title.side = "right"
    fig.layout.coloraxis2.colorbar.x = 1.01
    fig.layout.coloraxis2.colorbar.ticklabelstep=70
    fig.layout.coloraxis2.colorbar.ticklabelposition="inside"
    
    # updating display
    fig.update_traces( # queries
        customdata=df[['token', 'sentence', 'pos_int', 'length', 'type', 'color', 'norm']][:half],
        hovertemplate="<b style='font-size:larger'><span style='color:%{customdata[5]}'>%{customdata[0]}</span> (<i>%{customdata[4]}</i>, pos: %{customdata[2]} of %{customdata[3]}, norm: %{customdata[6]})</b><br><br>%{customdata[1]}",
        selector=dict(marker_coloraxis='coloraxis2'),
        marker=dict(size=6)
    )
    fig.update_traces( # keys
        customdata=df[['token', 'sentence', 'pos_int', 'length', 'type', 'color', 'norm']][half:],
        hovertemplate="<b style='font-size:larger'><span style='color:%{customdata[5]}'>%{customdata[0]}</span> (<i>%{customdata[4]}</i>, pos: %{customdata[2]} of %{customdata[3]}, norm: %{customdata[6]})</b><br><br>%{customdata[1]}",
        selector=dict(marker_coloraxis='coloraxis'),
        marker=dict(size=6)
    )
    fig.update_layout(
        plot_bgcolor='#E8E8E8',
        hoverlabel=dict(font_color = 'white', bordercolor = 'white'),
    )
    
    # save plot as html file
    # fig.write_html(plot_type + "_plots/layer" + str(layer) + "_head" + str(head) + ".html")
    fig.show()

In [12]:
## FULL TSNE/UMAP LOOPS
# generate tsne plot for specific layer, head of BERT
def generate_tsne(layer, head):
    df = make_df(layer, head)
    df = translate_loop(df)
    tsne_results = run_tsne(df, layer, head)
    half = int(len(tsne_results) / 2)
    df = add_to_df(df, half)
    make_fig(tsne_results, df, layer, head, "TSNE", half)

# generate umap plot for specific layer, head of BERT
def generate_umap(layer, head):
    df = make_df(layer, head)
    df = translate_loop(df)
    umap_results = run_umap(df, layer, head)
    half = int(len(tsne_results) / 2)
    df = add_to_df(df, half)
    make_fig(umap_results, df, layer, head, "UMAP", half)
    
# generate tsne & umap simultaneously
def generate_tsne_and_umap(layer, head):
    df = make_df(layer, head)
    df = translate_loop(df)
    tsne_results = run_tsne(df, layer, head)
    umap_results = run_umap(df, layer, head)
    half = int(len(tsne_results) / 2)
    df = add_to_df(df, half)
    make_fig(tsne_results, df, layer, head, "TSNE", half)
    make_fig(umap_results, df, layer, head, "UMAP", half)

### plot generation

In [14]:
layer = 0
head = 0

# generate single tsne OR umap plot by itself
generate_tsne(layer, head)
# generate_umap(layer, head)

# generate single tsne AND umap plot
# generate_tsne_and_umap(layer, head)


The default initialization in TSNE will change from 'random' to 'pca' in 1.2.


The default learning rate in TSNE will change from 200.0 to 'auto' in 1.2.



t-SNE done! Time elapsed: 72.77628874778748 seconds


In [None]:
# loop for generating plots
for i in range(12):
    for j in range(12):
        generate_tsne_and_umap(i, j)
        print("Layer {} Head {} done".format(i, j))