In [1]:
import argparse
import sys
import shap
import numpy as np
from transformers import AutoTokenizer
import matplotlib.pyplot as plt
import pickle

In [2]:
def main(args, sampler, scorer, conv_expl=None):
    if args['randomize_data']:
        sampler.randomize()
    conv_to_explain = []
    background_data = []
    explain_flag = True
    background_flag = True
    conv_it = sampler.get_next()
    random_conv_it = sampler.get_next_random()
    num_iteras = max(args['num_examples'], args['background_examples']) if args["all"] == False else len(sampler)
    print('Number of conversations to analyze:', args['num_examples'] if args["all"] == False else len(sampler))
    for i in range(num_iteras):
        # print(i)
        # if i == len(sampler) - 1:
        #     break
        try:
            econv = next(conv_it).strip()
        except StopIteration as _:
            explain_flag = False
        try:
            bconv = next(random_conv_it).strip()
        except StopIteration as _:
            background_flag = False
        if explain_flag:
            conv_to_explain.append(econv)
        if background_flag:
            background_data.append(bconv)
        if not args['all'] and (i + 1) == args['num_examples']:
            explain_flag = False
        if (i + 1) == args['background_examples']:
            background_flag = False
    if conv_expl:
        conv_to_explain = conv_expl
    print(f'Number of conversation: {len(conv_to_explain)}')
    print(f"Number of background conversations: {len(background_data)}")
    print("First conversation to explain:", conv_to_explain[0])
    ctxt_responses = []
    if args['contains_response']:
        fmtd_input = np.array(scorer.format_conversations(conv_to_explain))
        bgd_fmtd = np.array(scorer.format_conversations(background_data))
    else:
        ctxt_to_explain, res_to_explain = zip(*[(conv[:conv.rfind('\n')], conv.split('\n')[-1].strip()) for conv in conv_to_explain])
        ctxt_bgd, res_bgd = zip(*[(conv[:conv.rfind('\n')], conv.split('\n')[-1].strip()) for conv in background_data])
        fmtd_input = np.array(scorer.format_conversations(res_to_explain))
        bgd_fmtd = np.array(scorer.format_conversations(res_bgd))
    
    shap_values = []
    if args['contains_response']:
        response_lengths = [len(scorer.get_tokenizer().encode(conv.split('\n')[-1])) for conv in bgd_fmtd]
        scorer.set_response_lengths(response_lengths)
    else:
        scorer.set_contexts(ctxt_bgd)
    # print(f"These are the contexts: {ctxt_to_explain}")
    # print(f"First Background conversation: {background_data[0]}")
    # print(f"First five background contexts: {ctxt_bgd[:5]}")
    # print(f"First five background responses: {bgd_fmtd[:5]}")
    if args['algo'] == 'kernel':
        explainer = shap.KernelExplainer(scorer.get_scores, (bgd_fmtd, scorer.get_tokenizer()), special_token=50256)
    elif args['algo'] == 'partition':
        explainer = shap.Explainer(scorer.get_scores, scorer.get_masker())

    for i, conv in enumerate(fmtd_input):
        if args['contains_response']:
            scorer.set_response_lengths([len(scorer._tokenizer.encode(conv.split('\n')[-1]))])
        else:
            scorer.set_contexts([ctxt_to_explain[i]])
        if args['algo'] == 'kernel':
            shap_values.append(explainer.shap_values(np.array([conv]), nsamples=args['n_samples']))
        elif args['algo'] == 'partition':
            shap_values.append(explainer(np.array([conv])))
    return shap_values, fmtd_input

In [3]:
args = {'sampler_path' : '/home/ubuntu/adversary_generator/test_conversations/diversity', \
        # 'scorer_path' : '/home/ubuntu/dialogue_evaluation/', \
        'scorer_path' : '/home/ubuntu/DialogRPT/', \
        'randomize_data': False, \
        'all' : True, \
        'num_examples' : 10, \
        'background_examples': 100, \
        'e' : 'vocab', \
        'top_k' : 1, \
        'aggregate' : False, \
        'n_samples' : 1000, \
        'algo' : 'kernel', \
        'contains_response' : False,
        'sample_adversary' : True,
        'load_saved' : True,
        'metric_name' : "DialogRPT",
        'ds_name' : "daily_dialogue",
        'save_pickle' : True} #whether the input to the scorer is just the response (False) or response+context (True)
output_dir = os.path.join("./results", args["metric_name"], args["ds_name"])
print(f"This is the output directory:  {output_dir}")


This is the output directory:  ./results/DialogRPT/daily_dialogue


In [4]:
args

{'sampler_path': '/home/ubuntu/adversary_generator/test_conversations/entailment',
 'scorer_path': '/home/ubuntu/DialogRPT/',
 'randomize_data': False,
 'all': True,
 'num_examples': 10,
 'background_examples': 100,
 'e': 'vocab',
 'top_k': 1,
 'aggregate': False,
 'n_samples': 1000,
 'algo': 'kernel',
 'contains_response': False,
 'sample_adversary': True,
 'load_saved': True,
 'metric_name': 'DialogRPT',
 'ds_name': 'daily_dialogue',
 'save_pickle': True}

In [5]:
import importlib.util
score_spec = importlib.util.spec_from_file_location("score", os.path.join(args["scorer_path"], "score.py"))
score = importlib.util.module_from_spec(score_spec)
score_spec.loader.exec_module(score)
if args["sample_adversary"]:
    from sample_adversarial import adversary_sampler
    sampler = adversary_sampler(args["sampler_path"])
else:
    sample_spec = importlib.util.spec_from_file_location("sample", os.path.join(args["sampler_path"], "sample.py"))
    sample = importlib.util.module_from_spec(sample_spec)
    sample_spec.loader.exec_module(sample)
    sampler = sample.conversation_sampler()
# metric_name='context', mask_token='<|endoftext|>', ngrams=2 | these can be the input arguments to the daily dialogue evaluation scorer.
scorer = score.conversation_scorer(metric_name='overall', mask_token='<|endoftext|>', ngrams=2)

Loading Model: updown
Loading Model: width
Loading Model: depth
Loading Model: human-vs-rand


In [6]:
# scorer(['my name is baber'])
# for ix in range(len(sampler)//2):
ix = 0
print(sampler.candidate_conversations[(ix * 2)])
print("--------------")
print(sampler.candidate_conversations[(ix * 2) + 1])
print("--------------")
print(f"The score difference: {sampler.score_diffs[ix]}")
print(f"The original scores: {sampler.og_scores[ix]}")
print("Average metric scores for human conversations:", np.average([x[0] for x in sampler.og_scores]))
print("Average metric scores for adversarial conversations:", np.average([x[1] for x in sampler.og_scores]))
len(sampler)
# it = sampler.get_next()
# next(it)

Okay . This trail looks the best . It's a little steep . But I'm sure it will be alright .
Well . You're the tour guide , I'll follow you .
... What a stink . This place stinks like rotten eggs .
--------------
Okay . This trail looks the best . It's a little steep . But I'm sure it will be alright .
Well . You're the tour guide , I'll follow you .
Suddenly the ' great outdoors ' isn't so appealing . Let's hike a little faster ...
--------------
The score difference: 0.5657510216291812
The original scores: (0.02466196716220529, 0.5904129887913865)
Average metric scores for human conversations: 0.16255878959204428
Average metric scores for adversarial conversations: 0.28474506882822487


60

In [10]:
# f'{}_shap_values.pkl'
output_fname = f'{args["sampler_path"].split("/")[-1]}_shap_values.pkl'
output_fname

'entailment_shap_values.pkl'

In [9]:
# shap_values, conversations = main(args, sampler, scorer, conv_expl=[*sampler.candidate_conversations[:2]])
shap_values, conversations = main(args, sampler, scorer)

Number of conversations to analyze: 60
Number of conversation: 60
Number of background conversations: 30
First conversation to explain: Okay . This trail looks the best . It's a little steep . But I'm sure it will be alright .
Well . You're the tour guide , I'll follow you .
... What a stink . This place stinks like rotten eggs .
Data Shape: (30,)
Provided model function fails when applied to the provided data set.


KeyboardInterrupt: 

In [11]:
print(len(shap_values))
if args["save_pickle"]:
    with open(os.path.join(output_dir, output_fname), "wb") as shap_pkl_file:
        pickle.dump(shap_values, shap_pkl_file)

60


In [16]:
if args["load_saved"]:
    import pickle
    with open("./results//DialogRPT/daily_dialogue/repetitiveness_shap_values.pkl", "rb") as pshap_vals:
        shap_values = pickle.load(pshap_vals)

In [17]:
# for i in range(len(shap_values)):
#     print(shap_values[i]0
#     break
len(shap_values)

172

In [None]:
def get_vocab_importance(args, importance_dict, shap_values, conversation=None):
    # data_points
    if args['algo'] == 'kernel':
        shap_vals = shap_values.reshape((-1,))
        for i, data_point in enumerate(scorer.get_tokenizer().tokenize(conversation)):
            if data_point.startswith('Ġ'):
                data_point = data_point[1:]
            data_point = data_point.lower()
            importance_dict.setdefault(data_point, (0, 0))
            importance_dict[data_point] = \
                (importance_dict[data_point][0] + shap_vals[i], importance_dict[data_point][1] + 1)
    else:
        for i, data_point in enumerate(shap_values.data[0]):
            if data_point.startswith('Ġ'):
                data_point = data_point[1:].lower()
            importance_dict.setdefault(data_point, (0, 0))
            importance_dict[data_point] = (importance_dict[data_point][0] + shap_values.values[0][i], importance_dict[data_point][1] + 1)

    return importance_dict

In [None]:
def normalize_importance(vimportance):
    importance = []
    words = []
    for word in vimportance:
        importance.append(vimportance[word])
        words.append(word)

    importance = np.array(importance)
    norm_imp = (importance - min(importance))/(max(importance) - min(importance))
    to_return = dict()
    for i, word in enumerate(words):
        to_return[word] = norm_imp[i]
    return to_return

In [None]:
#aggregate importance over all the conversations
vimp_human = dict()
vimp_adv = dict()
for ix in range(len(shap_values)):
    response = sampler.candidate_conversations[ix].split('\n')[-1]
    # print(response)
    if ix % 2 == 0:
        vimp_human = get_vocab_importance(args, vimp_human, shap_values[ix], response)
    else:
        vimp_adv = get_vocab_importance(args, vimp_adv, shap_values[ix], response)
print(vimp_human['i'])
vimp_human = {word: vimp_human[word][0]/vimp_human[word][1] for word in vimp_human}
vimp_adv = {word: vimp_adv[word][0]/vimp_adv[word][1] for word in vimp_adv}
# print("Human vocab importance")
vimp_human_norm = normalize_importance(vimp_human)
vimp_adv_norm = normalize_importance(vimp_adv)
# print("Adversarial vocab importance")
# print(normalize_importance(vimp_adv))

In [None]:
# vimp_human_norm['i']

In [None]:
def unroll_path(path):
    if len(path) == 0:
        return []
    return unroll_path(path[0]) + [path[1]] 

def get_feature_diff(vec1, vec2):
    matrix = [[0 for i in range(len(vec1))] for i in range(len(vec2))]
    path_matrix = [[tuple() for i in range(len(vec1))] for i in range(len(vec2))]
    overall_max = 0
    max_ix = (-1, -1)
    for i in range(len(vec2)):
        for j in range(len(vec1)):
            matched = int(vec2[i] == vec1[j])
            neighbors = [(0 if (i - 1 < 0) or (j - 1 < 0) else matrix[i-1][j-1]) + matched, 0 if (i - 1 < 0) else matrix[i-1][j], 0 if (j - 1 < 0) else matrix[i][j - 1]]
            chng_nbr_ix = [(1, 1), (1, 0), (0, 1)]
            matrix[i][j] = np.max(neighbors)
            max_nbr = np.argmax(neighbors)
            if max_nbr == 0 and matched == 1:
                path_matrix[i][j] = tuple((path_matrix[i-1][j-1], (i, j)))
            else:
                di, dj = chng_nbr_ix[max_nbr]
                path_matrix[i][j] = path_matrix[i - di][j - dj]
            if matrix[i][j] > overall_max:
                max_ix = (i, j)
                overall_max = matrix[i][j]
    max_ix = np.argmax(matrix)
    # print("The max index:", max_ix)
    max_row, max_col = max_ix // len(matrix[0]), max_ix % len(matrix[0])
    # print(max_row, max_col)
    # print(matrix[max_row][max_col])
    # print(path_matrix[max_row][max_col])

    return list(zip(*unroll_path(path_matrix[max_row][max_col])))

In [None]:
def bar_plot_features(human_features, adv_features, feature_names, xtitle, ytitle):
    total_width = 1.5
    start = 0.1
    bar_offset = 0.2
    human_coords = [start + ((total_width + bar_offset) * i) for i in range(len(human_features))]
    adv_coords = [start + (total_width/2) + ((total_width + bar_offset) * i) for i in range(len(human_features))]
    plt.bar(human_coords, human_features, width=total_width/2)
    plt.bar(adv_coords, adv_features, width=total_width/2)
    plt.xticks([coord - (total_width/4) for coord in adv_coords], feature_names)
    plt.xlabel(xtitle)
    plt.ylabel(ytitle)
    plt.legend(["Human Use", "Adversarial Use"])

In [None]:
def get_unmatched(vec, feat):
    to_ret = []
    for i in range(len(vec)):
        if i not in feat:
            to_add = vec[i]
            if vec[i].startswith('Ġ'):
                to_add = to_add[1:]
            to_add = to_add.lower()
            to_ret.append(to_add)
    return to_ret

human_features = set()
adv_features = set()
for i in range(0, len(sampler.candidate_conversations), 2):
    human_res = sampler.candidate_conversations[i].split('\n')[-1]
    adv_res = sampler.candidate_conversations[i + 1].split('\n')[-1]
    t = scorer.get_tokenizer()
    vec1 = t.tokenize(human_res)
    vec2 = t.tokenize(adv_res)

    feat2, feat1 = get_feature_diff(vec1, vec2)
    hum_resp_feats = get_unmatched(vec1, feat1)
    adv_resp_feats = get_unmatched(vec2, feat2)
    human_features |= set(hum_resp_feats)
    adv_features |= set(adv_resp_feats)
    # print("----------------------")
human_features = list(human_features)
adv_features = list(adv_features)
adv_features.sort(key=lambda x: -vimp_adv_norm[x])
human_features.sort(key=lambda x: -vimp_human_norm[x])
topk = 10
topk_hfeats = [vimp_human_norm[feat] for feat in human_features[:topk] if feat in vimp_adv_norm]
topk_afeats = [vimp_adv_norm[feat] for feat in human_features[:topk] if feat in vimp_adv_norm]
bar_plot_features(topk_hfeats, topk_afeats, [feat for feat in human_features[:topk] if feat in vimp_adv_norm], "Most Important Pronouns", "Importance")

# print(f"Normalized importance of top {topk} features when used in adversarial setting.")
# print("{:<11} {:<20} {:<20}".format("Feature", "Natural Use", "Adversarial Use"))
# for feat in human_features[:topk]:
#     if feat not in vimp_adv_norm:
#         continue
#     print("{:<11} {:<20} {:<20}".format(feat, round(vimp_human_norm[feat], 2), round(vimp_adv_norm[feat], 2)))
# print(adv_features[:10])

In [None]:
conv_number = 0
shap.initjs()
for ix in [conv_number * 2, (conv_number * 2) + 1]:
    vocab_importance = get_vocab_importance(args, dict(), shap_values[ix], conversations[ix])
    imp_dict = {word: vocab_importance[word][0]/vocab_importance[word][1] for word in vocab_importance}
    imp_dict = normalize_importance(imp_dict)
    print('Original Conversation:')
    print(sampler.candidate_conversations[ix])
    print("-----------------")
    vocab_expl = shap.Explanation(values=[imp_dict[k] for k in imp_dict.keys()], \
        data=[k for k in vocab_importance.keys()])
    shap.plots.bar(vocab_expl, show=False)
    # shap.plots.text(vocab_expl)
    plt.figure()
    # plt.savefig("interpretability_example.png")

In [None]:
imp_dict

In [None]:
# for conv in conversations:
count = 0
for i in range(len(sampler.candidate_conversations)//2):
    conv1 = sampler.candidate_conversations[i * 2]
    conv2 = sampler.candidate_conversations[(i * 2) + 1]
    # print(conv1)
    # print("----------")
    # print(conv2.strip())
    # print("##########")
    last_utterance1 = conv1.split('\n')[-1]
    last_utterance2 = conv2.split('\n')[-1]
    tokenizer = scorer.get_tokenizer()
    count += (len(tokenizer.tokenize(last_utterance1)) < len(tokenizer.tokenize(last_utterance2)))
print(f"Proportion of increased lengths: {count/len(sampler.candidate_conversations)}")

In [None]:
property_under_consideration = 'understand'
np.mean(shap_values[:, :, property_under_consideration].mean(0).values), \
    np.std(shap_values[:, :, property_under_consideration].mean(0).values)

In [None]:
import pickle

In [None]:
# pickle.dump(shap_values, open('kernelshap_values.pkl', 'wb'))

In [None]:
# ['interesting', 'engaging', 'specific', 'relevant', 'correct', \
#                 'semantically appropriate', 'understandable', 'fluent', 'coherent', 'error recovery', \
#                 'consistent', 'diverse', 'depth', 'likeable', 'understand', 'flexible', 'informative', 'inquisitive']
# shap_values[:, :, 0].mean(0).shape
# shap.initjs()
# shap.plots.bar(shap_values[:, :, 0].mean(0), max_display=10)
# shap_values[0, :250]
# print(next(sampler.get_next()))
scores = dict()
conv_tokens = scorer._tokenizer.tokenize(scorer.format_conversations([next(sampler.get_next())])[0])
for i, token in enumerate(conv_tokens):
    scores.setdefault(token, (0, 0))
    scores[token] = (scores[token][0] + shap_values[0][i], scores[token][1] + 1)
scores
expl_obj = shap.Explanation([scores[k][0]/scores[k][1] for k in scores.keys()], data=list(scores.keys()))
shap.plots.bar(expl_obj)