In [None]:
import numpy as np
import pandas as pd
import json
import transformers
from datasets import load_dataset

In [18]:
with open("results_raw/pplm_results.json", "r") as file:
    pplm = json.loads(file.read())
    
relitc = []
with open("results_raw/relitc_results.json", "r", encoding='utf-8') as file:
    for line in file.readlines():
        relitc.append(json.loads(line))
        
with open("results_raw/polyjuice_results.json", "r") as file:
    polyjuice = json.loads(file.read())
        
with open("results_raw/poc.json", "r") as file:
    poc = json.loads(file.read())

In [None]:
len(pplm)

In [None]:
len(relitc)

In [None]:
len(polyjuice)

# PPLM

In [6]:
for row in pplm:
    for i, ce in enumerate(row['counterfactuals']):
        row['counterfactuals'][i] = ce[len("<|endoftext|>"):]

In [7]:
classifier_fomc = "gtfintechlab/FOMC-RoBERTa"

pipe = transformers.pipeline(model=classifier_fomc, device=0)

In [8]:
def classify(sentences):
    classified = []
    for text, out in zip(sentences, pipe(sentences, batch_size=8, truncation="only_first")):
        out['text'] = text
        label = out['label'].split("_")[1]
        out['label'] = int(label)
        classified.append(out)
    return classified

In [None]:
for row in pplm:
    row['counterfactuals'] = classify(row['counterfactuals'])

In [None]:
dataset_id = "TextCEsInFinance/fomc-communication-counterfactual"

dataset = load_dataset(dataset_id)['test'].to_pandas()

In [11]:
for i in range(len(pplm)):
    pplm[i]['target'] = int(dataset.iloc[i]['target'])

In [None]:
pplm[0]

# RELITC

In [None]:
relitc[0]

In [22]:
new_out = []
outer_fields = ['id', 'text', 'target']

for row in relitc:
    new_row = {k: row[k] for k in row if k in outer_fields}
    new_row['label'] = row['original_label']
    new_row['id'] = int(new_row['id'].split('_')[1]) + 1
    ces = []
    if row['counterfactuals'] != []:
        for ce in row['counterfactuals']:
            ces.append({'text': ce['edited_output_text'], 'label': ce['pred_class'], 'score': ce['pred_proba']})
    else:
        best = row['results_best']['results'][0]
        ce = best['edited_output_text'].replace('neutral : ', '').replace('hawkish : ', '').replace('dovish : ', '').capitalize()
        ces = [{'text': ce, 'label': best['pred_class'], 'score': best['pred_proba']}]
    new_row['counterfactuals'] = ces
    new_out.append(new_row)

relitc_out = new_out

In [None]:
relitc_out[0]

In [None]:
polyjuice[0]

In [25]:
with open("results/pplm_results.json", "w", encoding='utf-8') as file:
    json.dump(pplm, file, indent=4)

with open("results/relitc_results.json", "w", encoding='utf-8') as file:
    json.dump(relitc_out, file, indent=4)

with open("results/polyjuice_results.json", "w", encoding='utf-8') as file:
    json.dump(polyjuice, file, indent=4)