In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import os
os.environ['CUDA_VISIBLE_DEVICES']="4"
import pickle
from pathlib import Path
import numpy as np
import scipy as sp

from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.manifold import TSNE

In [None]:
files = list(Path("/home/ec2-user/SageMaker/halu_code/results/place_of_birth").glob("*"))
files.extend(list(Path("/home/ec2-user/SageMaker/halu_code/results/capitals").glob("*")))
files.extend(list(Path("/home/ec2-user/SageMaker/halu_code/results/trivia").glob("*")))
files.extend(list(Path("/home/ec2-user/SageMaker/halu_code/results/founders").glob("*")))

In [None]:
attention_dict = {}
hidden_dict = {}
softmax_dict = {}
attr_dict = {}
correct_dict = {}

for file in tqdm(files):
    print(file)
    with open(file, "rb") as infile:
        results_data = pickle.loads(infile.read())
    attention_dict[file.stem] = np.stack([i[-1] for i in results_data['final_attention']])
    hidden_dict[file.stem] = np.stack([i[-1] for i in results_data['final_fully_connected']])
    softmax_dict[file.stem] = sp.special.softmax(np.stack([i[-1] for i in results_data['logits']]), axis=1)
    attr_len = 20
    attr_list = []
    for attr in results_data['attributes_first']:
        if len(attr)<=20:
            attr_list.append(np.pad(attr, (0,20-len(attr))))
        elif len(attr)>20:
            attr_list.append(attr[:20])
    attr_array = np.stack(attr_list)
    attr_dict[file.stem] = attr_array
    correct_dict[file.stem] = np.array(results_data['correct'])
    del results_data

In [None]:
# Attention
fig, axes = plt.subplots(4, 6, sharex=False, figsize=(20,12))

alpha=0.2

for results_name in tqdm(correct_dict.keys()):
    if 'open_llama_7b' in results_name:
        model = 'OpenLlama 7B'
        col = 0
    elif 'open_llama_13b' in results_name:
        model = 'OpenLlama 13B'
        col = 1
    elif 'falcon-7b' in results_name:
        model = 'Falcon 7B'
        col = 2
    elif 'falcon-40b' in results_name:
        model = 'Falcon 40B'
        col = 3
    elif 'opt-6.7b' in results_name:
        model = 'OPT 6.7B'
        col = 4
    elif 'opt-30b' in results_name:
        model = 'OPT 30B'
        col = 5
    if 'place_of_birth' in results_name:
        dataset = 'Place of Birth'
        row = 2
    if 'capitals' in results_name:
        dataset = 'Capitals'
        row = 0
    if 'trivia' in results_name:
        dataset = 'Trivia QA'
        row = 3
    if 'founders' in results_name:
        dataset = 'Founders'
        row = 1
    correct = np.array(['Non-Hallucination' if i==True else 'Hallucination' for i in correct_dict[results_name]])
    hue_order=['Non-Hallucination', 'Hallucination']
    decomp = TSNE(n_components=2).fit_transform(hidden_dict[results_name])
    legend = False
    if col==0 and row==0:
        legend = True
    sns.scatterplot(x=decomp[:,0], y=decomp[:,1], hue=correct, hue_order=hue_order, ax=axes[row, col], legend=legend, alpha=alpha)

axes[0][0].legend(loc="upper left")
axes[0][0].set_title('OpenLlama 7B')
axes[0][1].set_title('OpenLlama 13B')
axes[0][2].set_title('Falcon 7B')
axes[0][3].set_title('Falcon 40B')
axes[0][4].set_title('OPT 6.7B')
axes[0][5].set_title('OPT 30B')
axes[0][0].set_ylabel('Capitals', rotation=90, size='large')
axes[1][0].set_ylabel('Founders', rotation=90, size='large')
axes[2][0].set_ylabel('Place of Birth', rotation=90, size='large')
axes[3][0].set_ylabel('General Trivia', rotation=90, size='large')
plt.savefig('figs/fully_connected_tsne_all_models_all_data.png', bbox_inches='tight')
plt.show()

In [None]:
# Attention
fig, axes = plt.subplots(4, 6, sharex=False, figsize=(20,12))

alpha=0.2

for results_name in tqdm(correct_dict.keys()):
    if 'open_llama_7b' in results_name:
        model = 'OpenLlama 7B'
        col = 0
    elif 'open_llama_13b' in results_name:
        model = 'OpenLlama 13B'
        col = 1
    elif 'falcon-7b' in results_name:
        model = 'Falcon 7B'
        col = 2
    elif 'falcon-40b' in results_name:
        model = 'Falcon 40B'
        col = 3
    elif 'opt-6.7b' in results_name:
        model = 'OPT 6.7B'
        col = 4
    elif 'opt-30b' in results_name:
        model = 'OPT 30B'
        col = 5
    if 'place_of_birth' in results_name:
        dataset = 'Place of Birth'
        row = 2
    if 'capitals' in results_name:
        dataset = 'Capitals'
        row = 0
    if 'trivia' in results_name:
        dataset = 'Trivia QA'
        row = 3
    if 'founders' in results_name:
        dataset = 'Founders'
        row = 1
    correct = np.array(['Non-Hallucination' if i==True else 'Hallucination' for i in correct_dict[results_name]])
    hue_order=['Non-Hallucination', 'Hallucination']
    decomp = TSNE(n_components=2).fit_transform(attr_dict[results_name])
    legend = False
    if col==0 and row==0:
        legend = True
    sns.scatterplot(x=decomp[:,0], y=decomp[:,1], hue=correct, hue_order=hue_order, ax=axes[row, col], legend=legend, alpha=alpha)

axes[0][0].legend(loc="upper left")
axes[0][0].set_title('OpenLlama 7B')
axes[0][1].set_title('OpenLlama 13B')
axes[0][2].set_title('Falcon 7B')
axes[0][3].set_title('Falcon 40B')
axes[0][4].set_title('OPT 6.7B')
axes[0][5].set_title('OPT 30B')
axes[0][0].set_ylabel('Capitals', rotation=90, size='large')
axes[1][0].set_ylabel('Founders', rotation=90, size='large')
axes[2][0].set_ylabel('Place of Birth', rotation=90, size='large')
axes[3][0].set_ylabel('General Trivia', rotation=90, size='large')
plt.savefig('figs/ig_tsne_all_models_all_data.png', bbox_inches='tight')
plt.show()

In [None]:
# Attention
fig, axes = plt.subplots(4, 6, sharex=False, figsize=(20,12))

alpha=0.2

for results_name in tqdm(correct_dict.keys()):
    if 'open_llama_7b' in results_name:
        model = 'OpenLlama 7B'
        col = 0
    elif 'open_llama_13b' in results_name:
        model = 'OpenLlama 13B'
        col = 1
    elif 'falcon-7b' in results_name:
        model = 'Falcon 7B'
        col = 2
    elif 'falcon-40b' in results_name:
        model = 'Falcon 40B'
        col = 3
    elif 'opt-6.7b' in results_name:
        model = 'OPT 6.7B'
        col = 4
    elif 'opt-30b' in results_name:
        model = 'OPT 30B'
        col = 5
    if 'place_of_birth' in results_name:
        dataset = 'Place of Birth'
        row = 2
    if 'capitals' in results_name:
        dataset = 'Capitals'
        row = 0
    if 'trivia' in results_name:
        dataset = 'Trivia QA'
        row = 3
    if 'founders' in results_name:
        dataset = 'Founders'
        row = 1
    correct = np.array(['Non-Hallucination' if i==True else 'Hallucination' for i in correct_dict[results_name]])
    hue_order=['Non-Hallucination', 'Hallucination']
    decomp = TSNE(n_components=2).fit_transform(softmax_dict[results_name])
    legend = False
    if col==0 and row==0:
        legend = True
    sns.scatterplot(x=decomp[:,0], y=decomp[:,1], hue=correct, hue_order=hue_order, ax=axes[row, col], legend=legend, alpha=alpha)

axes[0][0].legend(loc="upper left")
axes[0][0].set_title('OpenLlama 7B')
axes[0][1].set_title('OpenLlama 13B')
axes[0][2].set_title('Falcon 7B')
axes[0][3].set_title('Falcon 40B')
axes[0][4].set_title('OPT 6.7B')
axes[0][5].set_title('OPT 30B')
axes[0][0].set_ylabel('Capitals', rotation=90, size='large')
axes[1][0].set_ylabel('Founders', rotation=90, size='large')
axes[2][0].set_ylabel('Place of Birth', rotation=90, size='large')
axes[3][0].set_ylabel('General Trivia', rotation=90, size='large')
plt.savefig('figs/softmax_tsne_all_models_all_data.png', bbox_inches='tight')
plt.show()

In [None]:
# Attention
fig, axes = plt.subplots(4, 6, sharex=False, figsize=(20,12))

alpha=0.2

for results_name in tqdm(correct_dict.keys()):
    if 'open_llama_7b' in results_name:
        model = 'OpenLlama 7B'
        col = 0
    elif 'open_llama_13b' in results_name:
        model = 'OpenLlama 13B'
        col = 1
    elif 'falcon-7b' in results_name:
        model = 'Falcon 7B'
        col = 2
    elif 'falcon-40b' in results_name:
        model = 'Falcon 40B'
        col = 3
    elif 'opt-6.7b' in results_name:
        model = 'OPT 6.7B'
        col = 4
    elif 'opt-30b' in results_name:
        model = 'OPT 30B'
        col = 5
    if 'place_of_birth' in results_name:
        dataset = 'Place of Birth'
        row = 2
    if 'capitals' in results_name:
        dataset = 'Capitals'
        row = 0
    if 'trivia' in results_name:
        dataset = 'Trivia QA'
        row = 3
    if 'founders' in results_name:
        dataset = 'Founders'
        row = 1
    correct = np.array(['Non-Hallucination' if i==True else 'Hallucination' for i in correct_dict[results_name]])
    hue_order=['Non-Hallucination', 'Hallucination']
    decomp = TSNE(n_components=2).fit_transform(attention_dict[results_name])
    legend = False
    if col==0 and row==0:
        legend = True
    sns.scatterplot(x=decomp[:,0], y=decomp[:,1], hue=correct, hue_order=hue_order, ax=axes[row, col], legend=legend, alpha=alpha)

axes[0][0].legend(loc="upper left")
axes[0][0].set_title('OpenLlama 7B')
axes[0][1].set_title('OpenLlama 13B')
axes[0][2].set_title('Falcon 7B')
axes[0][3].set_title('Falcon 40B')
axes[0][4].set_title('OPT 6.7B')
axes[0][5].set_title('OPT 30B')
axes[0][0].set_ylabel('Capitals', rotation=90, size='large')
axes[1][0].set_ylabel('Founders', rotation=90, size='large')
axes[2][0].set_ylabel('Place of Birth', rotation=90, size='large')
axes[3][0].set_ylabel('General Trivia', rotation=90, size='large')
plt.savefig('figs/attention_tsne_all_models_all_data.png', bbox_inches='tight')
plt.show()