In [None]:
import itertools
import json
import nltk
import random
import scipy
import torch
import tqdm

import numpy as np
import pandas as pd

from evaluate import load
from scipy.spatial import distance
from transformers import (
    RobertaTokenizerFast,
    RobertaForSequenceClassification,
    TrainingArguments,
    Trainer,
    AutoConfig,
)
from zss import simple_distance, Node

nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
from nltk import pos_tag, word_tokenize, RegexpParser

In [None]:
with open("results/pplm_results.json", "r", encoding='utf-8') as file:
    pplm = json.loads(file.read())
with open("results/relitc_results.json", "r", encoding='utf-8') as file:
    relitc = json.load(file)
with open("results/polyjuice_results.json", "r", encoding='utf-8') as file:
    polyjuice = json.loads(file.read())
with open("results/ground_truth.json", "r", encoding='utf-8') as file:
    gt = json.loads(file.read())

# Success rate

In [None]:
def success_rate(results):
    i = 0
    for row in results:
        target = row['target']
        row['success'] = False
        for ce in row['counterfactuals']:
            if ce['label'] == target:
                i +=1
                row['success'] = True
                break
                
    return i / len(results)

In [None]:
success_rate(pplm)

In [None]:
success_rate(polyjuice)

In [None]:
success_rate(relitc)

# Faithfulness

In [None]:
def faithfulness(results):
    res = []
    for row, gt_row in zip(results, gt):
        if len(row['counterfactuals']) == 0:
            continue
        
        r = 1 * (gt_row['label'] == gt_row['classification']['label'])
        
        if not row['success']:
            nce = np.random.choice(row['counterfactuals'], size=1)
            if nce[0]['label'] == gt_row['label']:
                r -= 1
                
        res.append(r)
                
    return np.mean(res), np.std(res)

In [None]:
faithfulness(pplm)

In [None]:
faithfulness(polyjuice)

In [None]:
faithfulness(relitc)

# Create counterfactual dataset

In [None]:
original_strings = []
pplm_strings = []
polyjuice_strings = []
relitc_strings = []
mask = []
labels = []
targets = []

for x_pplm, x_polyjuice, x_relitc in zip(pplm, polyjuice, relitc):
    all_present = True
    
    for method, array in [(x_pplm, pplm_strings), (x_polyjuice, polyjuice_strings), (x_relitc, relitc_strings)]:
        if method['success']:
            best = 0
            string = ''
            for ce in method['counterfactuals']:
                if ce['label'] == method['target'] and ce['score'] > best:
                    string = ce['text']
                    best = ce['score']
        else:
            if len(method['counterfactuals']) == 0:
                all_present = False
                string = ''
            else:
                string = method['counterfactuals'][0]['text']
        array.append(string.replace("hawkish : ", "").replace("dovish : ", "").replace("neutral :", "").capitalize())
    
    mask.append(all_present)
    original_strings.append(x_pplm['text'])
    labels.append(x_pplm['label'])
    targets.append(x_pplm['target'])
    
out = pd.DataFrame.from_dict(data={'Fact_ID': np.arange(1, len(pplm)+1),
                                    'Factual': original_strings, 
                                    'PPLM': pplm_strings, 
                                    'Polyjuice': polyjuice_strings, 
                                    'RELITC': relitc_strings,
                                    'Label': labels, 
                                    'Target': targets,
                                    'PPLM_Success': [x['success'] for x in pplm],
                                    'Polyjuice_Success': [x['success'] for x in polyjuice],
                                    'RELITC_Success': [x['success'] for x in relitc]})

In [None]:
out = out.sample(frac=1, random_state = 2).reset_index(drop=True).reset_index().rename(columns={'index':'Question_ID'})

In [None]:
copies = []
for perm in itertools.permutations(['PPLM', 'Polyjuice', 'RELITC']):
    copy = out.copy()
    copy[['Method_1', 'Method_2', 'Method_3']] = perm
    copies.append(copy)

In [None]:
out = pd.concat(copies).sort_values(['Question_ID'])

In [None]:
out = out.reset_index(drop=True)

In [None]:
out

In [None]:
out.to_csv("survey/survey_data.csv", index_label='Block_ID')

# Overlapping sentences

In [None]:
def overlapping_indexes(results):
    out = []
    for i, row in enumerate(results):
        if row['success']:
            out.append(i)
    return out

In [None]:
over_pplm = overlapping_indexes(pplm)
over_relitc = overlapping_indexes(relitc)
over_polyjuice = overlapping_indexes(polyjuice)

In [None]:
len(np.intersect1d(np.intersect1d(over_pplm, over_relitc), over_polyjuice))

# Prepare counterfactuals

In [None]:
def get_counterfactuals(results):
    ces = []
    for result in results:
        if result['success']:
            best = 0
            string = ''
            for ce in result['counterfactuals']:
                if ce['label'] == result['target'] and ce['score'] > best:
                    string = ce['text']
                    best = ce['score']
        else:
            if len(result['counterfactuals']) == 0:
                string = ''
            else:
                string = result['counterfactuals'][0]['text']
                
        string = string.replace("hawkish : ", "").replace("dovish : ", "").replace("neutral :", "").capitalize()
        if len(string) == 0:
            string = None
            
        ces.append(string)
    
    return ces

In [None]:
relitc_ces = get_counterfactuals(relitc)
pplm_ces = get_counterfactuals(pplm)
polyjuice_ces = get_counterfactuals(polyjuice)

relitc_success = [x['success'] for x in relitc]
pplm_success = [x['success'] for x in pplm]
polyjuice_success = [x['success'] for x in polyjuice]

factuals = [x['text'] for x in relitc]
ids = [x['id'] for x in relitc]
labels = [x['label'] for x in relitc]
targets = [x['target'] for x in relitc]

assert len(relitc_ces) == len(pplm_ces) == len(polyjuice_ces) == len(factuals) == len(ids)

In [None]:
data = {
    'id': ids,
    'factual': factuals,
    'polyjuice': polyjuice_ces,
    'pplm': pplm_ces,
    'relitc': relitc_ces,
    'label': labels,
    'target': targets,
    'polyjuice_success': polyjuice_success,
    'pplm_success': pplm_success,
    'relitc_success': relitc_success
}

counterfactuals = pd.DataFrame.from_dict(data).dropna()

In [None]:
counterfactuals = pd.read_csv('metrics_calculated.csv')

# Perplexity

In [None]:
perplexity = load("perplexity", module_type="metric")
model_id = 'lxyuan/distilgpt2-finetuned-finance'
row_ending = '_finance'

In [None]:
perplexity_factual = perplexity.compute(predictions=counterfactuals['factual'], model_id=model_id)

perplexity_pplm = perplexity.compute(predictions=counterfactuals['pplm'], model_id=model_id)

perplexity_relitc = perplexity.compute(predictions=counterfactuals['relitc'], model_id=model_id)

perplexity_polyjuice = perplexity.compute(predictions=counterfactuals['polyjuice'], model_id=model_id)

In [None]:
print(f"Mean perplexity: PPLM {perplexity_pplm['mean_perplexity']}, RELITC {perplexity_relitc['mean_perplexity']}, Polyjuice {perplexity_polyjuice['mean_perplexity']}")

In [None]:
counterfactuals[f'polyjuice_perplexity{row_ending}'] = perplexity_polyjuice['perplexities']
counterfactuals[f'pplm_perplexity{row_ending}'] = perplexity_pplm['perplexities']
counterfactuals[f'relitc_perplexity{row_ending}'] = perplexity_relitc['perplexities']
counterfactuals[f'factual_perplexity{row_ending}'] = perplexity_factual['perplexities']

In [None]:
print(f"Mean perplexity:  PPLM {np.mean(perplexity_pplm['perplexities'])} ({np.std(perplexity_pplm['perplexities'])}), RELITC {np.mean(perplexity_relitc['perplexities'])} ({np.std(perplexity_relitc['perplexities'])}), Polyjuice {np.mean(perplexity_polyjuice['perplexities'])} ({np.std(perplexity_polyjuice['perplexities'])})")

gpt2 Mean perplexity:  PPLM 36.96563018292797 (16.888468914984436), RELITC 100.94388296360873 (125.2103380847652), Polyjuice 90.97536222496811 (172.05128632625437)

facebook/opt-125m Mean perplexity:  PPLM 36.06742319379534 (15.906898235873141), RELITC 108.86210835125982 (153.81738769957212), Polyjuice 107.05753771918161 (291.9220278232838)

lxyuan/distilgpt2-finetuned-finance Mean perplexity:  PPLM 43.89094138048133 (23.459948587720152), RELITC 119.99030995271644 (141.95862479447706), Polyjuice 104.06404116688942 (150.25669361509838)

In [None]:
perp_ratio_polyjuice = np.array(counterfactuals[f'polyjuice_perplexity{row_ending}'])/np.array(counterfactuals[f'factual_perplexity{row_ending}'])
perp_ratio_pplm = np.array(counterfactuals[f'pplm_perplexity{row_ending}'])/np.array(counterfactuals[f'factual_perplexity{row_ending}'])
perp_ratio_relitc = np.array(counterfactuals[f'relitc_perplexity{row_ending}'])/np.array(counterfactuals[f'factual_perplexity{row_ending}'])

In [None]:
print(f"Mean perplexity ratio: PPLM {np.mean(perp_ratio_pplm)} ({np.std(perp_ratio_pplm)}), RELITC {np.mean(perp_ratio_relitc)} ({np.std(perp_ratio_relitc)}), Polyjuice {np.mean(perp_ratio_polyjuice)} ({np.std(perp_ratio_polyjuice)})")

gpt2 Mean perplexity ratio: PPLM 0.7825383744866578 (0.45501397950664685), RELITC 1.6701788089491778 (1.160213137606713), Polyjuice 1.7966660528593301 (4.570208204237788)

facebook/opt-125m Mean perplexity ratio: PPLM 0.6846708977679352 (0.3876675021703708), RELITC 1.52035572506549 (0.8401463845005294), Polyjuice 1.8977431251370729 (7.945316275893119)

lxyuan/distilgpt2-finetuned-finance Mean perplexity ratio: PPLM 0.6892793996521159 (0.40347158098998825), RELITC 1.5177018800599018 (1.0019046096414141), Polyjuice 1.6296250138552448 (3.836941709718984)

# Edit Distance

In [None]:
# https://github.com/RedTeamingforLLMs/RedTeamingforLLMs/blob/main/utils/metrics.py
def levenshtein(a, b):
    # We want a to be the (potentially) longer string
    if len(a) > len(b):
        a, b = b, a

    distances = range(len(a) + 1)
    for b_index, b_element in enumerate(b):
        min_distances = [b_index + 1]

        for a_index, a_element in enumerate(a):
            if a_element == b_element:
                min_distances.append(distances[a_index])
            else:
                min_distances.append(
                    1 + min(distances[a_index], distances[a_index + 1], min_distances[-1]))

        distances = min_distances
    return distances[0 - 1]

In [None]:
pplm_dist = [levenshtein(cont, fact)/len(fact) for cont, fact in zip(counterfactuals['pplm'], counterfactuals['factual'])]

relitc_dist = [levenshtein(cont, fact)/len(fact) for cont, fact in zip(counterfactuals['relitc'], counterfactuals['factual'])]

polyjuice_dist = [levenshtein(cont, fact)/len(fact) for cont, fact in zip(counterfactuals['polyjuice'], counterfactuals['factual'])]

In [None]:
print(f"Mean edit distance: PPLM {np.mean(pplm_dist)}, RELITC {np.mean(relitc_dist)}, Polyjuice {np.mean(polyjuice_dist)}")

In [None]:
counterfactuals['polyjuice_edit_distance'] = polyjuice_dist
counterfactuals['pplm_edit_distance'] = pplm_dist
counterfactuals['relitc_edit_distance'] = relitc_dist

# Tree edit distance

In [None]:
# Source: https://www.geeksforgeeks.org/syntax-tree-natural-language-processing/

#Extract all parts of speech from any text
chunker = RegexpParser("""
                       NP: {<DT>?<JJ>*<NN>}    #To extract Noun Phrases
                       P: {<IN>}               #To extract Prepositions
                       V: {<V.*>}              #To extract Verbs
                       PP: {<p> <NP>}          #To extract Prepositional Phrases
                       VP: {<V> <NP|PP>*}      #To extract Verb Phrases
                       """)

def get_tree(text):
    # Find all parts of speech in above sentence
    tagged = pos_tag(word_tokenize(text))

    # Print all parts of speech in above sentence
    output = chunker.parse(tagged)
    return output

In [None]:
def traverse_tree(tree):
    
    zss_tree = Node(tree.label())
    
    for subtree in tree:
        if type(subtree) == nltk.tree.Tree:
            zss_tree = zss_tree.addkid(traverse_tree(subtree))
        else:
            zss_tree = zss_tree.addkid(Node(subtree[0]))
            
    return zss_tree

In [None]:
def get_zss(text):
    
    tree = get_tree(text)
    
    return traverse_tree(tree)

In [None]:
def tree_edit_dist(text_a, text_b):
    return simple_distance(get_zss(text_a), get_zss(text_b))

In [None]:
pplm_tree_dist = [tree_edit_dist(cont, fact) for cont, fact in zip(counterfactuals['pplm'], counterfactuals['factual'])]
relitc_tree_dist = [tree_edit_dist(cont, fact) for cont, fact in zip(counterfactuals['relitc'], counterfactuals['factual'])]
polyjuice_tree_dist = [tree_edit_dist(cont, fact) for cont, fact in zip(counterfactuals['polyjuice'], counterfactuals['factual'])]

In [None]:
print(f"Mean tree edit distance: PPLM {np.mean(pplm_tree_dist)}, RELITC {np.mean(relitc_tree_dist)}, Polyjuice {np.mean(polyjuice_tree_dist)}")

In [None]:
counterfactuals['polyjuice_tree_edit_distance'] = polyjuice_tree_dist
counterfactuals['pplm_tree_edit_distance'] = pplm_tree_dist
counterfactuals['relitc_tree_edit_distance'] = relitc_tree_dist

# Embedding distance

In [None]:
model_id = "gtfintechlab/FOMC-RoBERTa"
tokenizer = RobertaTokenizerFast.from_pretrained(model_id)
config = AutoConfig.from_pretrained(model_id)
model = RobertaForSequenceClassification.from_pretrained(model_id, config=config).cuda()


In [None]:
def get_embeddings(texts):
    idxs = [x for x in range(0, len(texts), 32)]
    idxs.append(len(texts))

    hiddens = []
    for i in tqdm.tqdm(range(len(idxs)-1)):
        tokens = tokenizer(texts[idxs[i]:idxs[i+1]], return_tensors="pt", padding=True).to('cuda')
        embedding = model(**tokens, output_hidden_states=True).hidden_states[-1].detach()[:, -1, :]
        hiddens += embedding
    return torch.stack(hiddens).cpu()

In [None]:
names = ['polyjuice', 'pplm', 'relitc']
classes = [0, 1, 2]

embeddings = {}
cl_emb = {c: get_embeddings(counterfactuals['factual'][counterfactuals['label'] == c].to_list()) for c in classes}
embeddings['factual'] = cl_emb

for name in names:    
    cl_emb = get_embeddings(counterfactuals[name].to_list())
    embeddings[name] = cl_emb

In [None]:
def get_embedding_distance(cfs):
    out = []
    for i in range(len(counterfactuals)):
        out.append(np.min(distance.cdist([cfs[i]], embeddings['factual'][counterfactuals.iloc[i]['target']])))
    return out

In [None]:
get_embedding_distance(embeddings['polyjuice'])

In [None]:
counterfactuals['polyjuice_embedding_distance'] = get_embedding_distance(embeddings['polyjuice'])
counterfactuals['pplm_embedding_distance'] = get_embedding_distance(embeddings['pplm'])
counterfactuals['relitc_embedding_distance'] = get_embedding_distance(embeddings['relitc'])

In [None]:
min_distances = {name: {cl: distance.cdist(embeddings[name][cl], embeddings['factual'][cl]).min(axis=1) for cl in classes} for name in names}

In [None]:
min_distances

In [None]:
min_distances_mean = {name: np.mean(np.concatenate([min_distances[name][cl] for cl in classes])) for name in names}

In [None]:
min_distances_mean

In [None]:
embeddings

In [None]:
n_samples = 50
indices = {c: torch.randperm(len(embeddings['factual'][c]))[:n_samples] for c in classes} # Random indices for subsampling target class embeddings

def get_implausibility(cfs):
    out = []
    for i in range(len(counterfactuals)):
        target = counterfactuals.iloc[i]['target']
        out.append(np.mean(distance.cdist([cfs[i]], embeddings['factual'][target][indices[target]])))
    return out

In [None]:
implausibilities = {name: get_implausibility(embeddings[name]) for name in names}

In [None]:
counterfactuals['polyjuice_implausibility'] = implausibilities['polyjuice']
counterfactuals['pplm_implausibility'] = implausibilities['pplm']
counterfactuals['relitc_implausibility'] = implausibilities['relitc']

In [None]:
implausibilities_mean = {name: np.mean(implausibilities[name]) for name in implausibilities}

In [None]:
implausibilities_mean

In [None]:
counterfactuals.to_csv('metrics_calculated.csv')

# Tables

In [None]:
counterfactuals = pd.read_csv('metrics_calculated.csv')

In [None]:
counterfactuals.iloc[:8][['polyjuice_success', 'pplm_success', 'relitc_success']]

In [None]:
def get_table(df, success_only=False):
    generators = ['Polyjuice', 'PPLM', 'RELITC']

    metrics = [
        ('Perplexity', [], []),
        ('Perplexity ratio', [], []),
        ('Edit distance', [], []),
        ('Tree edit distance', [], []),
        ('Embedding distance', [], []),
        ('Implausibility', [], []),
        ('Success rate', [], []),
    ]
    
    for generator in generators:
        curr_df = df
        gen_lower = generator.lower()
        
        if success_only:
            curr_df = df[df[f'{gen_lower}_success'] == True]
        
        metrics[0][1].append(np.mean(curr_df[f'{gen_lower}_perplexity']))
        metrics[1][1].append(np.mean(np.array(curr_df[f'{gen_lower}_perplexity'])/np.array(curr_df[f'factual_perplexity'])))
        metrics[2][1].append(np.mean(curr_df[f'{gen_lower}_edit_distance']))
        metrics[3][1].append(np.mean(curr_df[f'{gen_lower}_tree_edit_distance']))
        metrics[4][1].append(np.mean(curr_df[f'{gen_lower}_embedding_distance']))
        metrics[5][1].append(np.mean(curr_df[f'{gen_lower}_implausibility']))
        metrics[6][1].append(np.mean(curr_df[f'{gen_lower}_success']))
        
        metrics[0][2].append(np.std(curr_df[f'{gen_lower}_perplexity']))
        metrics[1][2].append(np.std(np.array(curr_df[f'{gen_lower}_perplexity'])/np.array(curr_df[f'factual_perplexity'])))
        metrics[2][2].append(np.std(curr_df[f'{gen_lower}_edit_distance']))
        metrics[3][2].append(np.std(curr_df[f'{gen_lower}_tree_edit_distance']))
        metrics[4][2].append(np.std(curr_df[f'{gen_lower}_embedding_distance']))
        metrics[5][2].append(np.std(curr_df[f'{gen_lower}_implausibility']))
        metrics[6][2].append(np.std(curr_df[f'{gen_lower}_success']))
        
    if success_only:
        metrics = metrics[:-1]
    
    out = ''
    
    out += '|   |'
    for metric in metrics:
        out += f' {metric[0]} |'
    
    out += '\n'
    out += '|---|'
    
    for _ in range(len(metrics)):
        out += f'---|'
        
    out += '\n'
    
    for i, gen in enumerate(generators):
        
        out += f'| {gen} |'
        
        for metric in metrics:
            out += f' {metric[1][i]:.2f} ({metric[2][i]:.1f}) |'
            
        out += '\n'
        
    return out   

In [None]:
print(get_table(counterfactuals))

|   | Perplexity | Perplexity ratio | Edit distance | Tree edit distance | Embedding distance | Success rate |
|---|---|---|---|---|---|---|
| Polyjuice | 90.975 | 1.797 | 0.307 | 19.667 | 20.323 | 0.339 |
| PPLM | 36.966 | 0.783 | 0.689 | 36.941 | 20.885 | 0.506 |
| RELITC | 100.944 | 1.670 | 0.145 | 10.724 | 21.961 | 0.737 |

In [None]:
print(get_table(counterfactuals, True))

|   | Perplexity | Perplexity ratio | Edit distance | Tree edit distance | Embedding distance | Success rate |
|---|---|---|---|---|---|---|
| Polyjuice | 99.642 | 1.908 | 0.363 | 22.102 | 20.354 | 1.000 |
| PPLM | 36.643 | 0.772 | 0.758 | 36.254 | 20.691 | 1.000 |
| RELITC | 104.039 | 1.680 | 0.121 | 9.903 | 21.841 | 1.000 |

In [None]:
print(get_table(counterfactuals[counterfactuals['target'] == 0]))

In [None]:
print(get_table(counterfactuals[counterfactuals['target'] == 1]))

In [None]:
print(get_table(counterfactuals[counterfactuals['target'] == 2]))

In [None]:
print(get_table(counterfactuals[counterfactuals['label'] == 0]))

In [None]:
print(get_table(counterfactuals[counterfactuals['label'] == 1]))

In [None]:
print(get_table(counterfactuals[counterfactuals['label'] == 2]))

In [None]:
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

import matplotlib.pyplot as plt

In [None]:
plt_names = ['factual', 'polyjuice', 'pplm', 'relitc']
classes = [0, 1, 2]

points = {
    'embeddings': [],
    'labels': [],
    'names': [],
    'idxs': []
}

for i, name in enumerate(names):
    for j, cl in enumerate(classes):
        for embedding in embeddings[name][cl]:
            points['embeddings'].append(embedding)
            points['labels'].append(cl)
            points['names'].append(name)
            points['idxs'].append(j * len(classes) * 2 + i)
            

In [None]:
pca = PCA(n_components=2).fit(np.array(points['embeddings']))

In [None]:
ex = []
for i, name in enumerate(names):
    for j, cl in enumerate(classes):
        ex.append([j * len(classes) * 2 + i, f'{name} class {cl}'])

labs = [f'{name} class {cl}' for name, cl in zip(points['names'], points['labels'])]

plt.scatter(points['tsne'][:, 0], points['tsne'][:, 1], c=points['idxs'], cmap='viridis')


plt.show()

In [None]:
counterfactuals

In [None]:
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, sharey=True, figsize=(15, 5))
ax1.scatter(counterfactuals['factual_perplexity'], counterfactuals['polyjuice_perplexity'])
ax2.scatter(counterfactuals['factual_perplexity'], counterfactuals['pplm_perplexity'])
ax3.scatter(counterfactuals['factual_perplexity'], counterfactuals['relitc_perplexity'])
plt.ylim(bottom=-13, top=750)

print(
    f"""
    Pearson r perplexity factual vs counterfactual
    Polyjuice: {scipy.stats.pearsonr(counterfactuals['factual_perplexity'], counterfactuals['polyjuice_perplexity'])}
    PPLM: {scipy.stats.pearsonr(counterfactuals['factual_perplexity'], counterfactuals['pplm_perplexity'])}
    RELITC: {scipy.stats.pearsonr(counterfactuals['factual_perplexity'], counterfactuals['relitc_perplexity'])}
    """
)

In [None]:
non_expert = pd.read_csv('survey_results/non-expert_mean_results.csv')

ids = np.array(non_expert['id'])

ordered = counterfactuals.set_index('id').loc[ids-1]

fig, (ax1, ax2, ax3) = plt.subplots(1, 3, sharey=True, figsize=(15, 5))
ax1.scatter(ordered['polyjuice_perplexity'], non_expert['polyjuice'])
ax2.scatter(ordered['pplm_perplexity'], non_expert['pplm'])
ax3.scatter(ordered['relitc_perplexity'], non_expert['relitc'])

In [None]:
plt.scatter(ordered['polyjuice_perplexity'], non_expert['polyjuice'], label='Polyjuice')
plt.scatter(ordered['pplm_perplexity'], non_expert['pplm'], label='PPLM')
plt.scatter(ordered['relitc_perplexity'], non_expert['relitc'], label='RELITC')
plt.legend()

In [None]:

font = {'family' : 'sans-serif',
        'weight' : 'normal',
        'size'   : 16}

plt.rc('font', **font)
plt.rcParams['axes.grid'] = True

fig, (ax1, ax2, ax3) = plt.subplots(1, 3, sharey=True, sharex=True, figsize=(15, 4))
ax2.scatter(ordered['polyjuice_edit_distance'], non_expert['polyjuice'])
ax3.scatter(ordered['pplm_edit_distance'], non_expert['pplm'])
ax1.scatter(ordered['relitc_edit_distance'], non_expert['relitc'])

ax2.set_title('Polyjuice')
ax3.set_title('PPLM')
ax1.set_title('RELITC')

ax1.set_ylabel('Fluency score')
ax2.set_xlabel('Edit distance')

plt.gcf().subplots_adjust(bottom=0.15)

plt.savefig('fluency_edit_plot.png')

print(
    f"""
    Pearson r perplexity factual vs counterfactual
    Polyjuice: {scipy.stats.pearsonr(ordered['polyjuice_edit_distance'], non_expert['polyjuice'])}
    PPLM: {scipy.stats.pearsonr(ordered['pplm_edit_distance'], non_expert['pplm'])}
    RELITC: {scipy.stats.pearsonr(ordered['relitc_edit_distance'], non_expert['relitc'])}
    """
)

In [None]:
for method in ['polyjuice', 'pplm', 'relitc']:
    with open(f"fig_data/{method}.dat", 'w') as file:
        file.write('edit_dist fluency\n')
        for a,b in zip(ordered[f'{method}_edit_distance'], non_expert[f'{method}']):
            file.write(f'{a} {b}\n')

In [None]:
a = 0.5

plt.scatter(ordered['pplm_edit_distance'], non_expert['pplm'], label='PPLM', alpha=a)
plt.scatter(ordered['relitc_edit_distance'], non_expert['relitc'], label='RELITC', alpha=a)
plt.scatter(ordered['polyjuice_edit_distance'], non_expert['polyjuice'], label='Polyjuice', alpha=a)
plt.legend()

plt.ylabel('Fluency score')
plt.xlabel('Edit distance')

plt.savefig('fluency_edit_plot.png')

In [None]:
fluency = pd.concat([non_expert['polyjuice'], non_expert['pplm'], non_expert['relitc']])
perplexity = pd.concat([ordered['polyjuice_perplexity'], ordered['pplm_perplexity'], ordered['relitc_perplexity']])
perplexity_ratio = list(np.array(ordered['polyjuice_perplexity']) / np.array(ordered['factual_perplexity']))\
                + list(np.array(ordered['pplm_perplexity']) / np.array(ordered['factual_perplexity']))\
                + list(np.array(ordered['relitc_perplexity']) / np.array(ordered['factual_perplexity']))
edit_dist = pd.concat([ordered['polyjuice_edit_distance'], ordered['pplm_edit_distance'], ordered['relitc_edit_distance']])
tree_edit_dist = pd.concat([ordered['polyjuice_tree_edit_distance'], ordered['pplm_tree_edit_distance'], ordered['relitc_tree_edit_distance']])
embedding_dist = pd.concat([ordered['polyjuice_embedding_distance'], ordered['pplm_embedding_distance'], ordered['relitc_embedding_distance']])
implausibility = pd.concat([ordered['polyjuice_implausibility'], ordered['pplm_implausibility'], ordered['relitc_implausibility']])

print(
    f"""
    Pearson r perplexity: {scipy.stats.pearsonr(fluency, perplexity)}
    Pearson r perplexity ratio: {scipy.stats.pearsonr(fluency, perplexity_ratio)}
    Pearson r edit distance: {scipy.stats.pearsonr(fluency, edit_dist)}
    Pearson r tree edit distance: {scipy.stats.pearsonr(fluency, tree_edit_dist)}
    Pearson r embedding distance: {scipy.stats.pearsonr(fluency, embedding_dist)}
    Pearson r implausibility: {scipy.stats.pearsonr(fluency, implausibility)}
    """
)

In [None]:
plt.scatter(tree_edit_dist, fluency)

In [None]:
print(
    f"""
    Pearson r edit distance
    Polyjuice: {scipy.stats.pearsonr(non_expert['polyjuice'], ordered['polyjuice_edit_distance'])}
    PPLM: {scipy.stats.pearsonr(non_expert['pplm'], ordered['pplm_edit_distance'])}
    RELITC: {scipy.stats.pearsonr(non_expert['relitc'], ordered['relitc_edit_distance'])}
    """
)

In [None]:
expert = pd.read_csv('survey_results/expert_mean_results.csv').dropna()

ids = np.array(expert['id'])

ordered = counterfactuals.set_index('id').loc[ids-1]

fluency = pd.concat([expert['polyjuice_fluency'], expert['pplm_fluency'], expert['relitc_fluency']])
plausibility = pd.concat([expert['polyjuice_plausibility'], expert['pplm_plausibility'], expert['relitc_plausibility']])

perplexity = pd.concat([ordered['polyjuice_perplexity'], ordered['pplm_perplexity'], ordered['relitc_perplexity']])
perplexity_ratio = list(np.array(ordered['polyjuice_perplexity']) / np.array(ordered['factual_perplexity']))\
                + list(np.array(ordered['pplm_perplexity']) / np.array(ordered['factual_perplexity']))\
                + list(np.array(ordered['relitc_perplexity']) / np.array(ordered['factual_perplexity']))
edit_dist = pd.concat([ordered['polyjuice_edit_distance'], ordered['pplm_edit_distance'], ordered['relitc_edit_distance']])
tree_edit_dist = pd.concat([ordered['polyjuice_tree_edit_distance'], ordered['pplm_tree_edit_distance'], ordered['relitc_tree_edit_distance']])
embedding_dist = pd.concat([ordered['polyjuice_embedding_distance'], ordered['pplm_embedding_distance'], ordered['relitc_embedding_distance']])
implausibility = pd.concat([ordered['polyjuice_implausibility'], ordered['pplm_implausibility'], ordered['relitc_implausibility']])

print(
    f"""
    Fluency
    Pearson r perplexity: {scipy.stats.pearsonr(fluency, perplexity)}
    Pearson r perplexity ratio: {scipy.stats.pearsonr(fluency, perplexity_ratio)}
    Pearson r edit distance: {scipy.stats.pearsonr(fluency, edit_dist)}
    Pearson r tree edit distance: {scipy.stats.pearsonr(fluency, tree_edit_dist)}
    Pearson r embedding distance: {scipy.stats.pearsonr(fluency, embedding_dist)}
    Pearson r implausibility: {scipy.stats.pearsonr(fluency, implausibility)}
    
    Plausibility
    Pearson r perplexity: {scipy.stats.pearsonr(plausibility, perplexity)}
    Pearson r perplexity ratio: {scipy.stats.pearsonr(plausibility, perplexity_ratio)}
    Pearson r edit distance: {scipy.stats.pearsonr(plausibility, edit_dist)}
    Pearson r tree edit distance: {scipy.stats.pearsonr(plausibility, tree_edit_dist)}
    Pearson r embedding distance: {scipy.stats.pearsonr(plausibility, embedding_dist)}
    Pearson r implausibility: {scipy.stats.pearsonr(plausibility, implausibility)}
    """
)

In [None]:
implausibility

In [None]:
plt.scatter(implausibility, plausibility)

In [None]:
plt.scatter(fluency, plausibility)

In [None]:
scipy.stats.pearsonr(fluency, plausibility)