### Objectives

Given an (original, counterfactual) pair, we want to find the difference between them. Here difference is defined as the tokens **added**, **deleted**, and **substituted**. With this information, we can find patterns in these differences based on the following typology:

- construct-driven
- construct-agnostic
    - negation
    - distancing: could be done via hedges from this [paper](https://www.aclweb.org/anthology/2020.lrec-1.380.pdf)
    - sarcasm: very hard to get automatically

In [None]:
import difflib
import pandas as pd
import string

In [None]:
DATA_ROOT = 'DATAROOT'
constructs = ['sentiment', 'sexism', 'hatespeech']

In [None]:
# initiate the Differ object
d = difflib.Differ()

# calculate the difference between the two texts
diff = d.compare(original.split(), edited.split())

additions = []
deletions = []
for token in diff:
    print(token)
    if token.startswith("+"):
        additions.append(token[2:])
    if token.startswith("-"):
        deletions.append(token[2:])
        
additions, deletions   

In [None]:
def find_diff(original, edited):
    """
    given a pair of sentences, return the tokens that have been changed (subsituted, added, or deleted)
    """
    result = {}
    
#     result['original'] = original
#     result['counterfactual'] = edited
    
    d = difflib.Differ()
    
    original = original.translate(str.maketrans('', '', string.punctuation))
    edited = edited.translate(str.maketrans('', '', string.punctuation))

    # calculate the difference between the two texts
    diff = d.compare(original.split(), edited.split())

    # output the result
    additions = []
    deletions = []
    for token in diff:
        # additions
        if token.startswith("+"):
            additions.append(token[2:])
        if token.startswith("-"):
            deletions.append(token[2:])
        
    result['additions'] = additions
    result['deletions'] = deletions   
    
    return result
    

### Generate diffs

For all constructs, the paired data is saved as a csv with the following fields:
- original_id
- counterfactual_id
- original_text 	
- counterfactual_text 	
- original_label 	
- counterfactual_label

Augment this with the diff field which will be a dictionary with addition and deletion keywords

In [None]:
import ast

def generate_diffs(construct):
    data = pd.read_csv("../../data/data/%s/train/paired.csv" %(construct), sep = "\t")
    #print(data.head())
    data['diff'] = [find_diff(row['original_text'], row['counterfactual_text']) for n, row in data.iterrows()]
    data.to_csv("../../data/data/%s/train/paired.csv" %(construct), sep = "\t", index = False)
    
    return data

data = {}

for construct in constructs:
    #run once, save, and load
    #data[construct] = generate_diffs(construct)
    data[construct] = pd.read_csv("../../data/data/%s/train/paired.csv" %(construct), sep = "\t")
    data[construct]['diff'] = [ast.literal_eval(i) for i in data[construct]['diff']]

### Use lexicons to categorize diffs

In [None]:
import pickle

# with open('../../data/lexica/lexica.pkl', 'wb') as handle:
#     pickle.dump(lexicons, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('../../data/lexica/lexica.pkl', 'rb') as handle:
    lexicons = pickle.load(handle)

diff_types = lexicons.keys()
directions = ['additions', 'deletions']

# combine hate and identity words
lexicons['hate words'].extend(lexicons['identity word'])

lexicons.keys()

In [None]:
diff = find_diff("I find [group] abhorrent", "I shouldn't find [group] abhorrent")

In [None]:
# diff types are negation, affect word / sentiment flip, gender word change, identity word change 
def classify_diff(diff, direction = 'additions', diff_type = 'negation'):
#     print(diff)
#     print(set(lexicons[diff_type]))
    print(diff[direction])
    if len(set(diff[direction]).intersection(set(lexicons[diff_type]))) > 0:
        return True
    return False

In [None]:
classify_diff(diff, diff_type = 'negation')

In [None]:
for construct in constructs:
    for diff_type in diff_types:
        for direction in directions:
            data[construct][diff_type + "_" + direction] = [classify_diff(row['diff'], direction, diff_type)
                                                           for n, row in data[construct].iterrows()]

            
data['sentiment']            

In [None]:
#classify once and save
# for construct in constructs:
#     data[construct].to_csv("../../data/data/%s/train/paired.csv" %(construct), sep = "\t", index= False)

In [None]:
data = {}
for construct in constructs:
    data[construct] = pd.read_csv("../../data/data/%s/train/paired.csv" %(construct), sep = "\t")

In [None]:
types = ['negation_additions',
       'negation_deletions', 'affect word_additions', 'affect word_deletions',
       'gender word_additions', 'gender word_deletions',
       'identity word_additions', 'identity word_deletions',
        'hedges_additions', 'hedges_deletions',
        'hate words_additions', 'hate words_deletions']

In [None]:
dist_dict = {}
dist_list = []

for construct in constructs:
    df = data[construct]
    dist_dict = {}
    dist_dict['construct'] = construct
    dist_dict['total'] = len(df)
    total = len(df)
    print("total examples of " + construct + ": " + str(total))
    for diff_type in types:
        dist_dict[diff_type] = len(df[df[diff_type] == True])/total
        print(diff_type + ": " + str(len(df[df[diff_type] == True])/total))
    print()
    dist_list.append(dist_dict)

In [None]:
print(pd.DataFrame(dist_list).round(3).to_latex())