### Imports + Data Loading

In [2]:
# import methods from bertviz
from bertviz import neuron_view
from bertviz.transformers_neuron_view import BertModel, BertTokenizer, GPT2Model, GPT2Tokenizer

import numpy as np
import random
import string
import pickle

from numpy import linalg as LA

import seaborn as sns
import matplotlib.pyplot as plt

from datasets import load_dataset
import random

In [4]:
# sentences = np.load('sentences.npy') # load sentences from Catherine's file
# sentences_test = sentences[:10] # small sample to test out code with

### Select BERT or GPT

In [3]:
# # BERT
# model_type = 'bert'
# model_version = 'bert-base-uncased'
# model = BertModel.from_pretrained(model_version, output_attentions=True)
# tokenizer = BertTokenizer.from_pretrained(model_version, do_lower_case=True)

# GPT
model_type = 'gpt2'
model_version = 'gpt2'
model = GPT2Model.from_pretrained(model_version, output_attentions=True)
tokenizer = GPT2Tokenizer.from_pretrained(model_version, do_lower_case=True)

num_heads = 12
num_layers = 12

### Computing Q/K norm ratios per attention head

In [10]:
# create 12 x 12 array for query/key norm ratios

attn_norms = []
for i in range(num_heads):
    curr = []
    for j in range(num_layers):
        curr_dict = {}
        curr_dict['key_norm_sum'] = 0
        curr_dict['query_norm_sum'] = 0
        curr.append(curr_dict)
    attn_norms.append(curr)

In [26]:
for s in sentences:
    # call method from bertviz to get attention info
    s_dict = neuron_view.get_attention(model, model_type, tokenizer, s, include_queries_and_keys=True)['all']
    
    # append to master dictionary
    tokens = s_dict['left_text']
        
    # find q/k ratios per attention head
    for i in range(num_heads):
        for j in range(num_layers):
            query = s_dict['queries'][i][j]
            for q in query:
                attn_norms[i][j]['query_norm_sum'] += LA.norm(q)
            
            key = s_dict['keys'][i][j]
            for k in key:
                attn_norms[i][j]['key_norm_sum'] += LA.norm(k)
                
            attn_norms[i][j]['qk_factor'] = attn_norms[i][j]['query_norm_sum'] / attn_norms[i][j]['key_norm_sum']

In [27]:
factors = np.zeros([12, 12])
for i in range(num_heads):
    for j in range(num_layers):
        factors[i][j] = attn_norms[i][j]['qk_factor'] 

### Visualize ratios using heatmap

In [6]:
fig, ax = plt.subplots(figsize=(8, 6), dpi = 150)
s = sns.heatmap(factors,fmt="",cmap='RdYlGn',linewidths=0.50,ax=ax)
s.set_xlabel('Layer', fontsize=13)
s.set_ylabel('Head', fontsize=13)
plt.title('Average Q/K Norm Ratio Per Attention Head', fontsize = 14)

In [67]:
factors[1][2]

2.2670928274360302

### Scaling queries + keys

In [65]:
# # master dictionary for all values
# attn_dict = {'left_text': [], 
#              'right_text': [], 
#              'positions': [],
#              'normalized_positions': [],
#              'sentences': [],
#              'tokenized_sentences': [],
#              'queries': [[[''.join(random.choices(string.ascii_letters, k=5))] for i in range(num_heads)] for j in range(num_layers)], 
#              'keys': [[[''.join(random.choices(string.ascii_letters, k=5))] for i in range(num_heads)] for j in range(num_layers)],
#              'attn': [[[''.join(random.choices(string.ascii_letters, k=5))] for i in range(num_heads)] for j in range(num_layers)],
#              'dot_prod': [[[''.join(random.choices(string.ascii_letters, k=5))] for i in range(num_heads)] for j in range(num_layers)]}
    

In [70]:
# for s in sentences[0:100]:
#     # call method from bertviz to get attention info
#     s_dict = neuron_view.get_attention(model, model_type, tokenizer, s, include_queries_and_keys=True)['all']
    
#     # append to master dictionary
#     tokens = s_dict['left_text']
#     attn_dict['left_text'].extend(tokens)
#     attn_dict['right_text'].extend(s_dict['right_text'])
    
    
#     for index in range(len(tokens)): # save position of token and tokenized sentences too
#         attn_dict['positions'].append(index)
#         attn_dict['normalized_positions'].append(index / (len(tokens) - 1))
#         attn_dict['sentences'].append(s)
#         attn_dict['tokenized_sentences'].append(' '.join(tokens))
#         attn_dict_scaled['positions'].append(index)
#         attn_dict_scaled['normalized_positions'].append(index / (len(tokens) - 1))
#         attn_dict_scaled['sentences'].append(s)
#         attn_dict_scaled['tokenized_sentences'].append(' '.join(tokens))
        
# #     for i in range(num_heads): # updating cumulative q/k vectors + attn + dp
# #         for j in range(num_layers):
#     i = 1
#     j = 2
#     k = attn_dict['keys'][i][j]
#     a = attn_dict['attn'][i][j]
#     d = attn_dict['dot_prod'][i][j]

#     if len(q) == 1: # on first round, need to empty list (random string was placeholder)
#         q.clear()
#     query = s_dict['queries'][i][j]
#     q.extend(query)
#     np_query = np.array(query)

#     if len(k) == 1:
#         k.clear()
#     key = s_dict['keys'][i][j]
#     k.extend(key)
#     np_key = np.array(key)

#     if len(a) == 1:
#         a.clear()
#     a.extend(s_dict['attn'][i][j])

#     if len(d) == 1:
#         d.clear()
#     dp = np.dot(np_query, np_key.transpose())
#     d.extend(dp)

In [71]:
# # master dictionary for all values
# attn_dict_scaled = {'left_text': [], 
#              'right_text': [], 
#              'positions': [],
#              'normalized_positions': [],
#              'sentences': [],
#              'tokenized_sentences': [],
#              'queries': [[[''.join(random.choices(string.ascii_letters, k=5))] for i in range(num_heads)] for j in range(num_layers)], 
#              'keys': [[[''.join(random.choices(string.ascii_letters, k=5))] for i in range(num_heads)] for j in range(num_layers)],
#              'attn': [[[''.join(random.choices(string.ascii_letters, k=5))] for i in range(num_heads)] for j in range(num_layers)],
#              'dot_prod': [[[''.join(random.choices(string.ascii_letters, k=5))] for i in range(num_heads)] for j in range(num_layers)]}
    

In [None]:
# scaling_factor = factors[1][2]

## Make TSNE / UMAP Plots
Generating plots from query + key vectors

In [None]:
# !pip install plotly
# !pip install seaborn
# !pip install umap-learn

### imports

In [4]:
from sklearn.manifold import TSNE
from umap import UMAP
import pandas as pd
import time
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pickle

# ensure plots show up in jupyter
import plotly.express as px
import plotly.io as pio
pio.renderers.default = "iframe"

In [5]:
# load attn_dict back if pre-saved

attn_dict = pickle.load(open("saved/attn_dict.p", "rb"))

In [12]:
# attn_dict_small = pickle.load(open("saved/attn_dict_small.p", "rb"))

In [6]:
len(attn_dict['queries'][0][0])

28679

In [13]:
len(attn_dict_small['queries'][0][0])

2883

In [28]:
# df = pd.DataFrame()
# df['token'] = attn_dict['left_text'] + attn_dict['right_text'] # store tokens
# df['token'] = df['token'].str.lower() # convert to lowercase
# num_tokens = len(attn_dict['left_text'])

# df['type'] = ['query'] * num_tokens + ['key'] * num_tokens # store token type
# df['pos_int'] = attn_dict['positions'] * 2 # positions
# df['position'] = attn_dict['normalized_positions'] * 2

# # sentence itself
# df['sentence'] = fix_sentences(attn_dict['tokenized_sentences'], attn_dict['positions'], df['type'][:num_tokens]) + fix_sentences(attn_dict['tokenized_sentences'], attn_dict['positions'], df['type'][num_tokens:])

# # save attn info
# attn = attn_dict['attn'][layer][head]
# df['attn'] = attn + k_matrix(attn)
# dp = attn_dict['dot_prod'][layer][head]
# df['dot_prod'] = dp + k_matrix(dp)

# # extract q/k vectors
# queries = attn_dict['queries']
# keys = attn_dict['keys']
# vec_size = len(queries[layer][head][0])

# # norms
# norms_q = []
# norms_k = []
# for i in range(len(queries[layer][head])):
#     q = queries[layer][head][i]
#     k = keys[layer][head][i]
#     norms_q.append(np.linalg.norm(q))
#     norms_k.append(np.linalg.norm(k))
# df["norm"] = norms_q + norms_k

# for i in range(vec_size): # store q/k vector values
#     qs = [queries[layer][head][j][i] for j in range(num_tokens)]
#     ks = [keys[layer][head][j][i] for j in range(num_tokens)]
#     df["f" + str(i)] = qs + ks # add to dataframe

# # comment out line below if want all 60k data points
# df = pd.concat([df.iloc[:5021], df.iloc[30070:30070+5021]]) # only get first X keys + queries

In [32]:
df.head()

Unnamed: 0,token,type,pos_int,position,sentence,attn,dot_prod,norm,f0,f1,...,f54,f55,f56,f57,f58,f59,f60,f61,f62,f63
0,syn,query,0,0.0,<b style='color:#B6E1B9'>Syn</b> th pop band F...,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-1.4260597666596735, -14.045386268408585, -9....",4.776521,0.626356,-0.402041,...,0.012074,-0.468083,0.165571,-1.149125,-1.470522,-0.722273,0.825755,-0.582111,0.639374,-0.158392
1,th,query,1,0.076923,Syn <b style='color:#B6E1B9'>th</b> pop band F...,"[0.7727242708206177, 0.2272757887840271, 0.0, ...","[-2.37188420581595, -12.161947897568501, -14.2...",5.630867,1.253516,0.493161,...,-0.952495,-1.166787,0.505097,-0.813122,-0.057576,-1.366027,0.56158,-0.465369,1.410642,0.122727
2,pop,query,2,0.153846,Syn th <b style='color:#B6E1B9'>pop</b> band F...,"[0.6718084216117859, 0.28001973032951355, 0.04...","[-0.7664155974118483, -7.767320269121932, -21....",8.130524,0.393987,1.333136,...,-0.364882,-3.260059,-1.110516,-0.283682,-2.054488,-0.639621,0.065158,-0.128336,1.627604,-0.476792
3,band,query,3,0.230769,Syn th pop <b style='color:#B6E1B9'>band</b> F...,"[0.6222391128540039, 0.07751425355672836, 0.11...","[3.0192838275094527, -13.643618085799897, -10....",7.561337,0.215017,-0.99805,...,1.107949,-2.849341,-0.212997,0.122969,-1.968271,0.098098,-0.118798,-1.30593,0.840051,1.86999
4,free,query,4,0.307692,Syn th pop band <b style='color:#B6E1B9'>Free<...,"[0.29884660243988037, 0.14810486137866974, 0.0...","[-3.873843952510504, -9.489924005559253, -17.6...",6.836336,0.908945,-0.601588,...,-1.241019,-0.022176,0.152536,-0.391612,0.282707,0.000389,0.233563,0.197837,0.379938,0.536506


In [38]:
df.iloc[10]['attn']

[0.20745466649532318,
 0.05711399018764496,
 0.21443572640419006,
 0.10953076928853989,
 0.018751924857497215,
 0.06923841685056686,
 0.04730606824159622,
 0.0810045525431633,
 0.054247643798589706,
 0.09515170753002167,
 0.04576465114951134,
 0.0,
 0.0,
 0.0]

### Visualization helper methods

In [7]:
# produce corresponding key matrix from query matrix (e.g., for attention)
def k_matrix(q_matrix):
    # assumes for specific layer + head (e.g., queries[0][0])
    num_tokens = len(q_matrix)
    k_matrix = []
    i = 0
    while i < num_tokens:
        q = q_matrix[i]
        sent_length = len(q)
        for k_i in range(sent_length):
            k = []
            for q_i in range(sent_length):
                k.append(q_matrix[q_i + i][k_i])
            k_matrix.append(k)
        i += sent_length
    
    return k_matrix

# format sentences to be displayed in html plot
def fix_sentences(sentences, positions, types):
    new_sentences = []
    for sent, pos, t in zip(sentences, positions, types):
        s_arr = sent.split()
        s = ""
        for i in range(len(s_arr)):
            if i % 10 == 0 and i not in [0, len(s_arr) - 1]:
                s += "<br>" # add new line every 10 tokens
                
            if i == pos: # italicize  + color current token
                color = "#B6E1B9"
                if t == "key":
                    color = "#F6BA98"
                s += "<b style='color:" + color + "'>" + s_arr[i] + "</b>"
            else:
                s += s_arr[i]
                
            if s != len(s_arr) - 1:
                s += " " # add space back between each token
        new_sentences.append(s)
    
    return new_sentences

# convert data into pandas dataframe
def make_df(layer, head, attn_dict, scale = 1):
    df = pd.DataFrame()
    df['token'] = attn_dict['left_text'] + attn_dict['right_text'] # store tokens
    df['token'] = df['token'].str.lower() # convert to lowercase
    num_tokens = len(attn_dict['left_text'])
    
    df['type'] = ['query'] * num_tokens + ['key'] * num_tokens # store token type
    df['pos_int'] = attn_dict['positions'] * 2 # positions
    df['position'] = attn_dict['normalized_positions'] * 2
    
    # sentence itself
    df['sentence'] = fix_sentences(attn_dict['tokenized_sentences'], attn_dict['positions'], df['type'][:num_tokens]) + fix_sentences(attn_dict['tokenized_sentences'], attn_dict['positions'], df['type'][num_tokens:])

    # save attn info
    attn = attn_dict['attn'][layer][head]
    df['attn'] = attn + k_matrix(attn)
    dp = attn_dict['dot_prod'][layer][head]
    df['dot_prod'] = dp + k_matrix(dp)
    
    # extract q/k vectors
    queries = attn_dict['queries']
    keys = attn_dict['keys']
    vec_size = len(queries[layer][head][0])
    
    # norms
    norms_q = []
    norms_k = []
    for i in range(len(queries[layer][head])):
        q = queries[layer][head][i]
        k = keys[layer][head][i]
        norms_q.append(np.linalg.norm(q))
        norms_k.append(np.linalg.norm(k))
    df["norm"] = norms_q + norms_k

    for i in range(vec_size): # store q/k vector values
        qs = [queries[layer][head][j][i]/scale for j in range(num_tokens)]
        ks = [keys[layer][head][j][i]*scale for j in range(num_tokens)]
        df["f" + str(i)] = qs + ks # add to dataframe
        
    # comment out line below if want all 60k data points
    df = pd.concat([df.iloc[:5021], df.iloc[30070:30070+5021]]) # only get first X keys + queries
    return df

In [8]:
## TRANSLATING KEYS FOR EASIER COMPARISON
def find_q_means(df):
    # find mean of each feature in query embeddings
    df_queries = df.loc[df['type'] == 'query']
    df_queries = df_queries.iloc[:, 8:].copy()
    query_means = df_queries.mean(axis=0)
    return query_means

def find_k_means(df):
    # find mean of each feature in key embeddings
    df_keys = df.loc[df['type'] == 'key']
    df_keys = df_keys.iloc[:, 8:].copy()
    key_means = df_keys.mean(axis=0)
    return df_keys, key_means

def translate_keys(df, df_keys, query_means, key_means):
    # translate key vectors accordingly
    for i in range(64):
        col = "f" + str(i)
        new_key = df_keys[col] - key_means[col] + query_means[col]
        df.loc[df['type'] == 'key', col] = new_key
    return df

def translate_loop(df): 
    # whole translation loop
    query_means = find_q_means(df)
    df_keys, key_means = find_k_means(df)
    df = translate_keys(df, df_keys, query_means, key_means)
    return df

In [9]:
## TSNE AND UMAP
def run_tsne(df, layer, head): 
    # prepare data for feature plot
    df_sub = df.iloc[:, 8:].copy()
    df_subset = df_sub.values # only get feature cols
    
    # run TSNE
    # from: https://towardsdatascience.com/visualising-high-dimensional-datasets-using-pca-and-t-sne-in-python-8ef87e7915b
    time_start = time.time()
    tsne = TSNE(n_components=3, verbose=0, perplexity=100, n_iter=300, metric="cosine") # 3D
    # tsne = TSNE(n_components=2, verbose=0, perplexity=100, n_iter=300, metric="cosine") # 2D
    tsne_results = tsne.fit_transform(df_subset)
    # np.save("tsne/layer" + str(layer) + "_head" + str(head) + ".npy", tsne_results) # save tsne results too
    print('t-SNE done! Time elapsed: {} seconds'.format(time.time()-time_start))
    
    return tsne_results

def run_umap(df, layer, head):
    # prepare data for feature plot
    df_sub = df.iloc[:, 8:].copy()
    df_subset = df_sub.values # only get feature cols
    
    # run umap
    time_start = time.time()
    umap = UMAP(n_components=3, init='random', random_state=0, metric="cosine")
    # umap = UMAP(n_components=2, init='random', random_state=0, metric="cosine") # 2D
    umap_results = umap.fit_transform(df_subset)
    # np.save("umap/layer" + str(layer) + "_head" + str(head) + ".npy", umap_results) # save umap results too
    print('UMAP done! Time elapsed: {} seconds'.format(time.time()-time_start))
    
    return umap_results

In [10]:
## PLOT GENERATION
# add additional columns to df
def add_to_df(df, half, attn_dict):
    # positions (not normalized)
    df['pos_int'] = attn_dict['positions'][:half] * 2
    df['pos_int'] = df['pos_int'] + 1
    
    # length of sentence
    words = df['sentence'].str.split().str.len()
    df['length'] = words
    
    # corresponding color for queries/keys
    colors = []
    for t in df['type']:
        if t == "query":
            colors.append("#B6E1B9")
        else:
            colors.append("#F6BA98")
    df['color'] = colors
    df['norm'] = round(df['norm'], 2)

    return df

def make_fig(tsne_results, df, layer, head, plot_type, half):
    # plot TSNE / UMAP results with plotly
    # 3D version
    # fig = px.scatter_3d(
    #     tsne_results[half:], x=0, y=1, z=2,
    #     color=df.norm[half:], labels={'color': 'normalized position'}, color_continuous_scale=px.colors.sequential.Burgyl,
    #     title=plot_type + ' Plot for BERT (Layer ' + str(layer) + ', Head ' + str(head) + ')',
    #     height=800,
    #     opacity=0.5
    # )
    
    # 2D version
    fig = px.scatter(
        tsne_results[half:], x=0, y=1,
        color=df.position[half:], labels={'color': 'normalized position'}, color_continuous_scale=px.colors.sequential.Burgyl,
        title=plot_type + ' Plot for GPT (Layer ' + str(layer) + ', Head ' + str(head) + ')',
        height=800,
        opacity=0.5
    )
    
    # 3D version
    # fig2 = px.scatter_3d(
    #     tsne_results[:half], x=0, y=1, z=2, 
    #     color=df.position[:half], labels={'color': ''}, color_continuous_scale=px.colors.sequential.Blugrn,
    #     title=plot_type + ' Plot for BERT (Layer ' + str(layer) + ', Head ' + str(head) + ')',
    #     height=800,
    #     opacity=0.5
    # )
    fig2 = px.scatter(
        tsne_results[:half], x=0, y=1, 
        color=df.position[:half], labels={'color': ''}, color_continuous_scale=px.colors.sequential.Blugrn,
        title=plot_type + ' Plot for GPT (Layer ' + str(layer) + ', Head ' + str(head) + ')',
        height=800,
        opacity=0.5
    )
    
    # add second trace to include 2 color scales (1st is key, 2nd is query)
    fig.layout.coloraxis2 = fig2.layout.coloraxis
    fig.add_trace(fig2.data[0])
    fig['data'][1]['marker'] = {    'color' : df['position'][:half],
                                    'coloraxis' : 'coloraxis2',
                                    'opacity' : 0.5
                                }
    # formatting things
    fig.layout.coloraxis.colorbar.x = 1.05
    fig.layout.coloraxis.colorbar.title.side = "right"
    fig.layout.coloraxis2.colorbar.x = 1.01
    fig.layout.coloraxis2.colorbar.ticklabelstep=70
    fig.layout.coloraxis2.colorbar.ticklabelposition="inside"
    
    # updating display
    fig.update_traces( # queries
        customdata=df[['token', 'sentence', 'pos_int', 'length', 'type', 'color', 'norm']][:half],
        hovertemplate="<b style='font-size:larger'><span style='color:%{customdata[5]}'>%{customdata[0]}</span> (<i>%{customdata[4]}</i>, pos: %{customdata[2]} of %{customdata[3]}, norm: %{customdata[6]})</b><br><br>%{customdata[1]}",
        selector=dict(marker_coloraxis='coloraxis2'),
        marker=dict(size=6)
    )
    fig.update_traces( # keys
        customdata=df[['token', 'sentence', 'pos_int', 'length', 'type', 'color', 'norm']][half:],
        hovertemplate="<b style='font-size:larger'><span style='color:%{customdata[5]}'>%{customdata[0]}</span> (<i>%{customdata[4]}</i>, pos: %{customdata[2]} of %{customdata[3]}, norm: %{customdata[6]})</b><br><br>%{customdata[1]}",
        selector=dict(marker_coloraxis='coloraxis'),
        marker=dict(size=6)
    )
    fig.update_layout(
        plot_bgcolor='#E8E8E8',
        hoverlabel=dict(font_color = 'white', bordercolor = 'white'),
    )
    
    # save plot as html file
    # fig.write_html(plot_type + "_plots/layer" + str(layer) + "_head" + str(head) + ".html")
    fig.show()

In [11]:
## FULL TSNE/UMAP LOOPS
# generate tsne plot for specific layer, head
const = 2
def generate_tsne(layer, head):
    df = make_df(layer, head)
    df = translate_loop(df)
    tsne_results = run_tsne(df, layer, head)
    half = int(len(tsne_results) / const)
    df = add_to_df(df, half)
    make_fig(tsne_results, df, layer, head, "TSNE", half)

# generate umap plot for specific layer, head
def generate_umap(layer, head, attn_dict, scaling_const):
    df = make_df(layer, head, attn_dict, scaling_const)
    df = translate_loop(df)
    umap_results = run_umap(df, layer, head)
    half = int(len(umap_results) / const)
    df = add_to_df(df, half, attn_dict)
    make_fig(umap_results, df, layer, head, "UMAP", half)
    
# generate tsne & umap simultaneously
def generate_tsne_and_umap(layer, head):
    df = make_df(layer, head)
    df = translate_loop(df)
    tsne_results = run_tsne(df, layer, head)
    umap_results = run_umap(df, layer, head)
    half = int(len(tsne_results) / const)
    df = add_to_df(df, half)
    make_fig(tsne_results, df, layer, head, "TSNE", half)
    make_fig(umap_results, df, layer, head, "UMAP", half)

### plot generation

In [16]:
layer = 5
head = 3

# ones to look at l1,h2 l1,h3

# generate single tsne OR umap plot by itself
# generate_tsne(layer, head, attn_dict_small)
consts = [1/3, 1/2, 1, 2]
for c in consts:
    generate_umap(layer, head, attn_dict, c)

# generate single tsne AND umap plot
# generate_tsne_and_umap(layer, head)

In [None]:
# loop for generating plots
for i in range(12):
    for j in range(12):
        generate_tsne_and_umap(i, j)
        print("Layer {} Head {} done".format(i, j))