# AMR Analysis

- References:
  - https://github.com/Sean-Blank/AMRcoref

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
from transformers import pipeline
from transformers.pipelines.pt_utils import KeyDataset
from datasets import load_dataset, Dataset
from sibyl import *
import random
import pandas as pd
import amrlib
import penman
from tqdm.notebook import tqdm

torch.use_deterministic_algorithms(False)
pd.set_option('display.float_format', lambda x: '%.2f' % x)

### Dataset

In [3]:
dataset = load_dataset("glue", "sst2", split="train")
dataset = dataset.rename_column("sentence", "text")
original_text, original_labels = dataset['text'], dataset['label']

Found cached dataset glue (C:/Users/Fabrice/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


### Boolean Featurizers

In [4]:
stog = amrlib.load_stog_model(device=0, max_sent_len=256)

In [5]:
def text2amr(texts):
    # if not isinstance(texts, list): texts = [texts]
    gs = stog.parse_sents(texts, add_metadata=False)
    gs = [LeGraph(g) for g in gs]
    return gs

In [99]:
class LeGraph:
    def __init__(self, graph):
        self.graph = penman.decode(graph) if not isinstance(graph, penman.graph.Graph) else graph
        self.amr_text = penman.encode(self.graph)

    def contains_concept(self, concepts):
        """
        Concepts are nodes / instances in the AMR graph.
        """
        if not isinstance(concepts, list): concepts = [concepts]
        graph_concepts = [t.target for t in self.graph.instances()]
        return any(c for c in graph_concepts if c in concepts)

    def contains_role(self, roles):
        """
        Roles are edges in the AMR graph.
        """
        if not isinstance(roles, list): roles = [roles]
        print(roles)
        graph_roles = [e.role for e in self.graph.edges()]
        print(graph_roles)
        print(self.graph.edges())
        return any(r for r in graph_roles if r in roles)

    def contains_attribute(self, attributes):
        """
        Attributes are properties of concept nodes, i.e. relationships to 
        constant values.
        """
        if not isinstance(attributes, list): attributes = [attributes]
        graph_attrs = [a.target for a in self.graph.attributes()]
        return any(a for a in graph_attrs if a in attributes)

In [7]:
def contains_duplicates(X):
    seen = set()
    seen_add = seen.add
    for x in X:
        if (x in seen or seen_add(x)):
            return True
    return False

In [125]:
# attributes =============================================================

def contains_imperative(g): return g.contains_attribute("imperative")
def contains_exlamation(g): return g.contains_attribute("expressive")
def contains_negation(g):   return g.contains_attribute("-")

# concepts ===============================================================

def contains_conjunctions(g):         return g.contains_concept(["and", "or", "contrast-01", "either", "neither"])
def contains_interrogative_clause(g): return g.contains_concept("truth-value")
def contains_question(g):             return g.contains_concept(["amr-unknown", "amr-choice"])

# roles ==================================================================

def contains_coreferences(g): return any(r for r in g.amr_text.split() if r in ['i', 'you', 'he', 'she', 'it', 'we', 'they'])
def contains_number(g):       return any(a for a in g.graph.attributes() if a.target.isnumeric())

def contains_accompanier(g):  return g.contains_role(':accompanier')
def contains_age(g):          return g.contains_role(':age')
def contains_beneficiary(g):  return g.contains_role(':beneficiary')
def contains_concession(g):   return g.contains_role(':concession')
def contains_condition(g):    return g.contains_role(':condition')
def contains_consist_of(g):   return any(r for r in g.amr_text.split() if r in [':consist-of'])
def contains_degree(g):       return g.contains_role(':degree')
def contains_destination(g):  return g.contains_role(':destination')
def contains_direction(g):    return g.contains_role(':direction')
def contains_domain(g):       return g.contains_role(':domain')
def contains_duration(g):     return g.contains_role(':duration')
def contains_example(g):      return g.contains_role(':example')
def contains_extent(g):       return g.contains_role(':extent')
def contains_frequency(g):    return g.contains_role(':frequency')
def contains_instrument(g):   return g.contains_role(':instrument')
# def contains_li(g):           return g.contains_role(':li')
def contains_location(g):     return g.contains_role(':location')
def contains_manner(g):       return g.contains_role(':manner')
def contains_medium(g):       return g.contains_role(':medium')
def contains_mod(g):          return g.contains_role(':mod')
def contains_mode(g):         return any(a for a in g.graph.attributes() if ":mode" in a.role)
def contains_name(g):         return g.contains_role(':name')
def contains_ord(g):          return g.contains_role(':ord')
def contains_part(g):         return g.contains_role(':part')
def contains_path(g):         return g.contains_role(':path')
def contains_polarity(g):     return g.contains_role(':polarity')
def contains_polite(g):       return any(r for r in g.amr_text.split() if r in [':polite'])
def contains_poss(g):         return g.contains_role(':poss')
def contains_purpose(g):      return g.contains_role(':purpose')
def contains_quant(g):        return g.contains_role(':quant')
def contains_range(g):        return g.contains_role(':range')
def contains_scale(g):        return g.contains_role(':scale')
def contains_source(g):       return g.contains_role(':source')
def contains_subevent(g):     return g.contains_role(':subevent')
def contains_time(g):         return g.contains_role(':time')
def contains_topic(g):        return g.contains_role(':topic')
def contains_unit(g):         return g.contains_role(':unit')
# def contains_value(g):        return g.contains_role(':value')
def contains_wiki(g):         return g.contains_role(':wiki')

In [9]:
import pickle
import os

def chunker(seq, size):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))

def pickle_list(a_list, a_path):
    with open(a_path, 'wb') as fp:
        pickle.dump(a_list, fp)

def unpickle_list(a_path):
    with open(a_path, 'rb') as fp:
        n_list = pickle.load(fp)
        return n_list

# def write_list(a_list, a_path):
#     with open(a_path, 'w') as fp:
#         fp.write('\n'.join(a_list))

# def read_list(a_path):
#     with open(a_path, 'r') as fp:
#         n_list = fp.read().split("\n")
#         return n_list

### Convert Text to AMRs

In [10]:
penman_path = "penmans.pkl"
batch_size = 36

In [17]:
if not os.path.exists(penman_path):
    print("computing amrs...")
    amrs = []
    for texts in tqdm(chunker(original_text, batch_size), total=len(original_text)/batch_size):
        amrs.extend(text2amr(texts))
    pickle_list(amrs, penman_path)
else:
    print("loading precomputed amrs...")
    amrs = unpickle_list(penman_path)

loading precomputed amrs...


### Model

In [20]:
pipe = pipeline(task="sentiment-analysis", device=0)

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


### Transforms

In [21]:
blacklist = [Concept2Sentence, ConceptMix, Emojify]
ts = [t(task_name="sentiment", return_metadata=True) for t in TRANSFORMATIONS if t not in blacklist]

### Analysis

In [22]:
from sibyl import acc_at_k
from sklearn.metrics import accuracy_score

In [23]:
def extract_probs(results):
    return np.array([[1-r['score'], r['score']] if r['label'] == "POSITIVE" else [r['score'], 1-r['score']] for r in results])

def compute_accuracy(predictions, labels):
    if len(labels.shape) > 1:
        acc = acc_at_k(labels, predictions, k=2)       
    else:
        acc = accuracy_score(labels, np.argmax(predictions, -1))
    return acc

In [166]:
class AMRBooleanFeature:
    def __init__(self, dataset, amrs, min_samples=5):
        self.dataset = dataset
        self.amrs = amrs     
        self.min_samples = min_samples
    
    def extract_data_containing_feature(self, featurizer, max_size=1000):
        
        self.featurizer = featurizer 
        
        # extract T and F evaluation sets
        ts, fs = [], []
        for g, s, l in zip(self.amrs, self.dataset['text'], self.dataset['label']):
            if self.featurizer(g):
                ts.append((s, l))
                if len(ts) >= max_size:
                    break
            else:
                fs.append((s,l))
        
        len_ts = len(ts)
        len_fs = len(fs)
        
        if len_ts < self.min_samples:
            print(f"Only {len_ts} inputs contain the feature, skipping for insufficient data...")
            self.t_dataset = []
            self.f_dataset = []
            return
        if len_fs < len_ts:
            ts = ts[:len_fs]

        t_texts, t_labels = zip(*ts)
        f_texts, f_labels = zip(*random.sample(fs, len(t_texts)))

        t_texts, t_labels = list(t_texts), one_hot_encode(t_labels, 2)
        f_texts, f_labels = list(f_texts), one_hot_encode(f_labels, 2)

        self.t_dataset = Dataset.from_list(
            [{'text': t, 'label': l} for t, l in zip(t_texts, t_labels)])
        self.f_dataset = Dataset.from_list(
            [{'text': t, 'label': l} for t, l in zip(f_texts, f_labels)])
            
    def generate_transformed_datasets(self, transform):
        
        self.transform = transform
         
        # apply transformation to each set of texts containing the target feature
        transformed_t_text, transformed_t_labels = transform.transform_batch(
            batch=(self.t_dataset['text'], self.t_dataset['label']))
        transformed_f_text, transformed_f_labels = transform.transform_batch(
            batch=(self.f_dataset['text'], self.f_dataset['label']))
        transformed_t_labels = np.stack([np.array(a).squeeze() for a in transformed_t_labels])
        transformed_f_labels = np.stack([np.array(a).squeeze() for a in transformed_f_labels])
        
        self.tran_t_dataset = Dataset.from_list(
            [{'text': t, 'label': l} for t, l in zip(transformed_t_text, transformed_t_labels)])
        self.tran_f_dataset = Dataset.from_list(
            [{'text': t, 'label': l} for t, l in zip(transformed_f_text, transformed_f_labels)]) 
        
        self.t_changed = sum([t1 != t2 for t1, t2 in zip(self.t_dataset, self.tran_t_dataset)]) / len(self.t_dataset['text'])
        self.f_changed = sum([t1 != t2 for t1, t2 in zip(self.f_dataset, self.tran_f_dataset)]) / len(self.f_dataset['text'])
    
    def evaluate_original(self, pipe):
        
        # pass data through the model
        self.orig_t_preds = extract_probs(pipe(KeyDataset(self.t_dataset, "text")))
        self.orig_f_preds = extract_probs(pipe(KeyDataset(self.f_dataset, "text")))
        
        # compute accuracy
        self.orig_t_acc = compute_accuracy(
            self.orig_t_preds, np.argmax(self.t_dataset['label'], -1))
        self.orig_f_acc = compute_accuracy(
            self.orig_f_preds, np.argmax(self.f_dataset['label'], -1))
        
        results = {
            "transform": "original",
            "featurizer": self.featurizer.__name__,
            "num_samples": len(self.t_dataset),
            "T_orig_acc": self.orig_t_acc,
            "F_orig_acc": self.orig_f_acc,
            "T_tran_acc": 0,
            "F_tran_acc": 0,
            "T_changed": 0,
            "F_changed": 0
        }
        
        return results
            
    def evaluate_transform(self, pipe):
               
        # pass data through the model
        self.tran_t_preds = extract_probs(pipe(KeyDataset(self.tran_t_dataset, "text")))
        self.tran_f_preds = extract_probs(pipe(KeyDataset(self.tran_f_dataset, "text")))
        
        # compute accuracy
        self.tran_t_acc = compute_accuracy(
            self.tran_t_preds, np.array(self.tran_t_dataset['label']))
        self.tran_f_acc = compute_accuracy(
            self.tran_f_preds, np.array(self.tran_f_dataset['label']))
        
        results = {
            "transform": self.transform.__class__.__name__,
            "featurizer": self.featurizer.__name__,
            "num_samples": len(self.t_dataset),
            "T_orig_acc": self.orig_t_acc,
            "F_orig_acc": self.orig_f_acc,
            "T_tran_acc": self.tran_t_acc,
            "F_tran_acc": self.tran_f_acc,
            "T_changed": self.t_changed,
            "F_changed": self.f_changed
        }
        
        return results
    
    def get_prediction_details(self):
        # Feature DataFrame: T
        t_df = pd.DataFrame(
            [
                f.t_dataset["text"], 
                f.t_dataset["label"],
                f.orig_t_preds,
                f.tran_t_dataset["text"],
                f.tran_t_dataset["label"],
                f.tran_t_preds,
            ]).T
        t_df.columns = ['orig_t_text', 'orig_t_label', 'orig_t_preds', 
                        'tran_t_text', 'tran_t_label', 'tran_t_preds']
        
        t_df['t_pred_diff'] = t_df.apply(lambda row: np.array(row['orig_t_preds']) - np.array(row['tran_t_preds']), axis=1)

        # Feature DataFrame: F
        f_df = pd.DataFrame(
            [
                f.f_dataset["text"], 
                f.f_dataset["label"],
                f.orig_f_preds,
                f.tran_f_dataset["text"],
                f.tran_f_dataset["label"],
                f.tran_f_preds,
            ]).T
        f_df.columns = ['orig_f_text', 'orig_f_label', 'orig_f_preds', 
                        'tran_f_text', 'tran_f_label', 'tran_f_preds']

        f_df['f_pred_diff'] = f_df.apply(lambda row: np.array(row['orig_f_preds']) - np.array(row['tran_f_preds']), axis=1)
        
        return t_df, f_df

In [167]:
featurizers = [    
    contains_imperative,contains_exlamation,contains_negation,
    contains_conjunctions,contains_interrogative_clause,contains_question,
    contains_coreferences,contains_number,contains_accompanier,
    contains_age,contains_beneficiary,contains_concession,
    contains_condition,contains_consist_of,contains_degree,
    contains_destination,contains_direction,contains_domain,
    contains_duration,contains_example,contains_extent,
    contains_frequency,contains_instrument,contains_location,
    contains_manner,contains_medium,contains_mod,
    contains_mode,contains_name,contains_ord,
    contains_part,contains_path,contains_polarity,
    contains_polite,contains_poss,contains_purpose,
    contains_quant,contains_range,contains_scale,
    contains_source,contains_subevent,contains_time,
    contains_topic,contains_unit,contains_wiki
]

In [168]:
f = AMRBooleanFeature(dataset, amrs)

feature_results = {}
for featurizer in tqdm(featurizers):
    
    # feature info
    feature_name = featurizer.__name__
    print(f"Featurizer: {feature_name}")

    # create feature datasets
    f.extract_data_containing_feature(featurizer, 100)
    if not f.t_dataset:
        continue
    print(f"Dataset Size: {len(f.t_dataset)}")
    print(f"T Feature: {f.t_dataset[0]}")
    print(f"F Feature: {f.f_dataset[0]}")
    
    # evaluation
    results = []
    # evaluate original
    results.append(f.evaluate_original(pipe))
    # evaluate transformations
    for t in ts:
        f.generate_transformed_datasets(t)
        result = f.evaluate_transform(pipe)
        # print(result)
        t_df, f_df = f.get_prediction_details()
        result['t_df'] = t_df
        result['f_df'] = f_df
        results.append(result)
        
    df = pd.DataFrame(results)
    df['T_diff'] = df['T_orig_acc'] - df['T_tran_acc']
    df['F_diff'] = df['F_orig_acc'] - df['F_tran_acc']
        
    feature_results[feature_name] = df

  0%|          | 0/45 [00:00<?, ?it/s]

Featurizer: contains_imperative
Dataset Size: 100
T Feature: {'text': 'hide new secretions from the parental units ', 'label': [1.0, 0.0]}
F Feature: {'text': 'randolph ', 'label': [0.0, 1.0]}




Featurizer: contains_exlamation
Dataset Size: 100
T Feature: {'text': 'no lika da ', 'label': [1.0, 0.0]}
F Feature: {'text': "a little too ponderous to work as shallow entertainment , not remotely incisive enough to qualify as drama , monsoon wedding serves mostly to whet one 's appetite for the bollywood films . ", 'label': [1.0, 0.0]}




Featurizer: contains_negation
Dataset Size: 100
T Feature: {'text': 'contains no wit , only labored gags ', 'label': [1.0, 0.0]}
F Feature: {'text': 'right stuff ', 'label': [0.0, 1.0]}




Featurizer: contains_conjunctions
Dataset Size: 100
T Feature: {'text': 'contains no wit , only labored gags ', 'label': [1.0, 0.0]}
F Feature: {'text': 'warm water under a red bridge is a celebration of feminine energy , a tribute to the power of women to heal . ', 'label': [0.0, 1.0]}




Featurizer: contains_interrogative_clause
Dataset Size: 78
T Feature: {'text': 'the problem with the film is whether these ambitions , laudable in themselves , justify a theatrical simulation of the death camp of auschwitz ii-birkenau . ', 'label': [1.0, 0.0]}
F Feature: {'text': 'immature and unappealing ', 'label': [1.0, 0.0]}




Featurizer: contains_question
Dataset Size: 100
T Feature: {'text': "... a sour little movie at its core ; an exploration of the emptiness that underlay the relentless gaiety of the 1920 's ... the film 's ending has a `` what was it all for ? '' ", 'label': [1.0, 0.0]}
F Feature: {'text': 'of ` ethnic cleansing ', 'label': [1.0, 0.0]}




Featurizer: contains_coreferences
Dataset Size: 100
T Feature: {'text': "proves once again he has n't lost his touch , bringing off a superb performance in an admittedly middling film . ", 'label': [0.0, 1.0]}
F Feature: {'text': "has always been part of for the most part wilde 's droll whimsy helps `` being earnest '' overcome its weaknesses and parker 's creative interference ... ", 'label': [0.0, 1.0]}




Featurizer: contains_number
Dataset Size: 100
T Feature: {'text': "a depressed fifteen-year-old 's suicidal poetry ", 'label': [1.0, 0.0]}
F Feature: {'text': "it 's supposed to be a romantic comedy - it suffers from too much norma rae and not enough pretty woman . ", 'label': [1.0, 0.0]}




Featurizer: contains_accompanier
Dataset Size: 100
T Feature: {'text': "build some robots , haul 'em to the theater with you for the late show , and put on your own mystery science theatre 3000 tribute to what is almost certainly going to go down as the worst -- and only -- killer website movie of this or any other year ", 'label': [1.0, 0.0]}
F Feature: {'text': 'black hawk down with more heart ', 'label': [0.0, 1.0]}




Featurizer: contains_age
Dataset Size: 100
T Feature: {'text': "a depressed fifteen-year-old 's suicidal poetry ", 'label': [1.0, 0.0]}
F Feature: {'text': 'builds gradually until you feel fully embraced by this gentle comedy . ', 'label': [0.0, 1.0]}




Featurizer: contains_beneficiary
Dataset Size: 100
T Feature: {'text': 'pays earnest homage to turntablists ', 'label': [0.0, 1.0]}
F Feature: {'text': 'an unsophisticated sci-fi drama that takes itself all too seriously . ', 'label': [1.0, 0.0]}




Featurizer: contains_concession
Dataset Size: 100
T Feature: {'text': 'some movies suck you in despite their flaws , ', 'label': [0.0, 1.0]}
F Feature: {'text': 'only open new wounds ', 'label': [1.0, 0.0]}




Featurizer: contains_condition
Dataset Size: 100
T Feature: {'text': 'if anything , see it for karen black , who camps up a storm as a fringe feminist conspiracy theorist named dirty dick . ', 'label': [0.0, 1.0]}
F Feature: {'text': 'virtually nothing to show ', 'label': [1.0, 0.0]}




Featurizer: contains_consist_of
Dataset Size: 100
T Feature: {'text': 'enriched by an imaginatively mixed cast of antic spirits ', 'label': [0.0, 1.0]}
F Feature: {'text': 'the characters , cast in impossibly contrived situations , are totally estranged from reality . ', 'label': [1.0, 0.0]}




Featurizer: contains_degree
Dataset Size: 100
T Feature: {'text': 'that loves its characters and communicates something rather beautiful about human nature ', 'label': [0.0, 1.0]}
F Feature: {'text': ', home movie will leave you wanting more , not to mention leaving you with some laughs and a smile on your face . ', 'label': [0.0, 1.0]}




Featurizer: contains_destination
Dataset Size: 100
T Feature: {'text': 'can open the door to liberation . ', 'label': [0.0, 1.0]}
F Feature: {'text': "the little girls understand , and mccracken knows that 's all that matters . ", 'label': [0.0, 1.0]}




Featurizer: contains_direction
Dataset Size: 100
T Feature: {'text': 'fashioning an engrossing entertainment out ', 'label': [0.0, 1.0]}
F Feature: {'text': "the film runs on a little longer than it needs to -- muccino either does n't notice when his story ends or just ca n't tear himself away from the characters -- ", 'label': [1.0, 0.0]}




Featurizer: contains_domain
Dataset Size: 100
T Feature: {'text': 'the plot is nothing but boilerplate clichés from start to finish , ', 'label': [1.0, 0.0]}
F Feature: {'text': 'thank ', 'label': [0.0, 1.0]}




Featurizer: contains_duration
Dataset Size: 100
T Feature: {'text': 'remains utterly satisfied to remain the same throughout ', 'label': [1.0, 0.0]}
F Feature: {'text': 'plumbs uncharted depths of stupidity , incoherence and sub-sophomoric sexual banter . ', 'label': [1.0, 0.0]}




Featurizer: contains_example
Dataset Size: 100
T Feature: {'text': 'demonstrates that the director of such hollywood blockbusters as patriot games can still turn out a small , personal film with an emotional wallop . ', 'label': [0.0, 1.0]}
F Feature: {'text': ", it 's because there 's no discernible feeling beneath the chest hair ", 'label': [1.0, 0.0]}




Featurizer: contains_extent
Dataset Size: 92
T Feature: {'text': 'wide-awake all the way through ', 'label': [0.0, 1.0]}
F Feature: {'text': "one of the year 's best films ", 'label': [0.0, 1.0]}




Featurizer: contains_frequency
Dataset Size: 100
T Feature: {'text': 'will find little of interest in this film , which is often preachy and poorly acted ', 'label': [1.0, 0.0]}
F Feature: {'text': 'luridly graphic and ', 'label': [1.0, 0.0]}




Featurizer: contains_instrument
Dataset Size: 100
T Feature: {'text': "delivers what it promises : a look at the `` wild ride '' that ensues when brash young men set out to conquer the online world with laptops , cell phones and sketchy business plans ", 'label': [0.0, 1.0]}
F Feature: {'text': 'in welcome perspective ', 'label': [0.0, 1.0]}




Featurizer: contains_location
Dataset Size: 100
T Feature: {'text': "the part where nothing 's happening , ", 'label': [1.0, 0.0]}
F Feature: {'text': 'a journey spanning nearly three decades of bittersweet camaraderie and history , in which we feel that we truly know what makes holly and marina tick ', 'label': [0.0, 1.0]}




Featurizer: contains_manner
Dataset Size: 100
T Feature: {'text': 'demonstrates that the director of such hollywood blockbusters as patriot games can still turn out a small , personal film with an emotional wallop . ', 'label': [0.0, 1.0]}
F Feature: {'text': '... the efforts of its star , kline , to lend some dignity to a dumb story are for naught . ', 'label': [1.0, 0.0]}




Featurizer: contains_medium
Dataset Size: 100
T Feature: {'text': "build some robots , haul 'em to the theater with you for the late show , and put on your own mystery science theatre 3000 tribute to what is almost certainly going to go down as the worst -- and only -- killer website movie of this or any other year ", 'label': [1.0, 0.0]}
F Feature: {'text': 'prepare ', 'label': [0.0, 1.0]}




Featurizer: contains_mod
Dataset Size: 96
T Feature: {'text': 'contains no wit , only labored gags ', 'label': [1.0, 0.0]}
F Feature: {'text': 'far less sophisticated and ', 'label': [1.0, 0.0]}




Featurizer: contains_mode
Only 0 inputs contain the feature, skipping for insufficient data...
Featurizer: contains_name
Dataset Size: 100
T Feature: {'text': 'on the worst revenge-of-the-nerds clichés the filmmakers could dredge up ', 'label': [1.0, 0.0]}
F Feature: {'text': 'hip hop beat ', 'label': [0.0, 1.0]}




Featurizer: contains_ord
Dataset Size: 100
T Feature: {'text': 'covers this territory with wit and originality , suggesting that with his fourth feature ', 'label': [0.0, 1.0]}
F Feature: {'text': 'more than ably ', 'label': [0.0, 1.0]}




Featurizer: contains_part
Dataset Size: 100
T Feature: {'text': "swimming is above all about a young woman 's face , and by casting an actress whose face projects that woman 's doubts and yearnings , it succeeds . ", 'label': [0.0, 1.0]}
F Feature: {'text': 'heart as important as humor ', 'label': [0.0, 1.0]}




Featurizer: contains_path
Dataset Size: 100
T Feature: {'text': 'he appears miserable throughout as he swaggers through his scenes ', 'label': [1.0, 0.0]}
F Feature: {'text': "it 's funny and human and really pretty damned wonderful , all at once . ", 'label': [0.0, 1.0]}




Featurizer: contains_polarity
Dataset Size: 100
T Feature: {'text': 'have i seen a film so willing to champion the fallibility of the human heart ', 'label': [0.0, 1.0]}
F Feature: {'text': 'measured against practically any like-themed film other than its oscar-sweeping franchise predecessor the silence of the lambs , red dragon rates as an exceptional thriller . ', 'label': [0.0, 1.0]}




Featurizer: contains_polite
Only 0 inputs contain the feature, skipping for insufficient data...
Featurizer: contains_poss
Dataset Size: 100
T Feature: {'text': 'that loves its characters and communicates something rather beautiful about human nature ', 'label': [0.0, 1.0]}
F Feature: {'text': 'skip this dreck , ', 'label': [1.0, 0.0]}




Featurizer: contains_purpose
Dataset Size: 100
T Feature: {'text': 'very good viewing alternative ', 'label': [0.0, 1.0]}
F Feature: {'text': "'s no surprise that as a director washington demands and receives excellent performances , ", 'label': [0.0, 1.0]}




Featurizer: contains_quant
Dataset Size: 100
T Feature: {'text': 'lend some dignity to a dumb story ', 'label': [1.0, 0.0]}
F Feature: {'text': 'a markedly inactive film ', 'label': [1.0, 0.0]}




Featurizer: contains_range
Dataset Size: 7
T Feature: {'text': "the first bond movie in ages that is n't fake fun ", 'label': [0.0, 1.0]}
F Feature: {'text': 'like the english patient and the unbearable lightness of being ', 'label': [0.0, 1.0]}




Featurizer: contains_scale
Only 1 inputs contain the feature, skipping for insufficient data...
Featurizer: contains_source
Dataset Size: 100
T Feature: {'text': 'hate to tear your eyes away from the images long enough to read the subtitles ', 'label': [0.0, 1.0]}
F Feature: {'text': 'seems timely and important ', 'label': [0.0, 1.0]}




Featurizer: contains_subevent
Dataset Size: 89
T Feature: {'text': 'provide the funniest moments in this oddly sweet comedy about jokester highway patrolmen ', 'label': [0.0, 1.0]}
F Feature: {'text': 'though overall an overwhelmingly positive portrayal ', 'label': [0.0, 1.0]}




Featurizer: contains_time
Dataset Size: 100
T Feature: {'text': "for those moviegoers who complain that ` they do n't make movies like they used to anymore ", 'label': [1.0, 0.0]}
F Feature: {'text': 'things will turn out okay ', 'label': [0.0, 1.0]}




Featurizer: contains_topic
Dataset Size: 100
T Feature: {'text': 'that loves its characters and communicates something rather beautiful about human nature ', 'label': [0.0, 1.0]}
F Feature: {'text': 'human nature is a goofball movie , in the way that malkovich was , but it tries too hard ', 'label': [1.0, 0.0]}




Featurizer: contains_unit
Dataset Size: 100
T Feature: {'text': "a depressed fifteen-year-old 's suicidal poetry ", 'label': [1.0, 0.0]}
F Feature: {'text': 'an admittedly middling film ', 'label': [0.0, 1.0]}




Featurizer: contains_wiki
Only 0 inputs contain the feature, skipping for insufficient data...


In [181]:
# save dictionary as pickle file
pickle_out = open('analysis_results.pkl', 'wb')
pickle.dump(feature_results, pickle_out)
pickle_out.close()

In [None]:
# create new dictionary from pickle file
pickle_in = open('analysis_results.pkl', 'rb')
new_dict = pickle.load(pickle_in)

In [178]:
feature_results['contains_negation']

Unnamed: 0,transform,featurizer,num_samples,T_orig_acc,F_orig_acc,T_tran_acc,F_tran_acc,T_changed,F_changed,t_df,f_df,T_diff,F_diff
0,original,contains_negation,100,0.98,0.99,0.0,0.0,0.0,0.0,,,0.98,0.99
1,ExpandContractions,contains_negation,100,0.98,0.99,0.98,0.99,0.0,0.0,orig...,orig...,0.0,0.0
2,ContractContractions,contains_negation,100,0.98,0.99,0.97,0.99,0.1,0.03,orig...,orig...,0.01,0.0
3,AddPositiveEmoji,contains_negation,100,0.98,0.99,0.98,0.98,1.0,1.0,orig...,orig...,0.0,0.01
4,AddNegativeEmoji,contains_negation,100,0.98,0.99,0.98,0.98,1.0,1.0,orig...,orig...,0.0,0.01
5,AddNeutralEmoji,contains_negation,100,0.98,0.99,0.98,0.98,1.0,1.0,orig...,orig...,0.0,0.01
6,Demojify,contains_negation,100,0.98,0.99,0.98,0.99,0.0,0.0,orig...,orig...,0.0,0.0
7,RemovePositiveEmoji,contains_negation,100,0.98,0.99,0.98,0.99,0.0,0.0,orig...,orig...,0.0,0.0
8,RemoveNegativeEmoji,contains_negation,100,0.98,0.99,0.98,0.99,0.0,0.0,orig...,orig...,0.0,0.0
9,RemoveNeutralEmoji,contains_negation,100,0.98,0.99,0.98,0.99,0.0,0.0,orig...,orig...,0.0,0.0


#### Observations

Note: Significant differences are any greater than 10%

1. Relative to a no-transform baseline, the presence of a question in a text leads to more stable behavior in sibyl transforms.
   - Twelve (12) transforms induce significant accuracy changes for texts not containing a question.
   - Only six (6) transforms induce significant accuracy changes for texts containing a question.  
2. Four (4) transforms exhibit significant differences in impact for different `contains_question` features.
3. Six (6) transforms did not apply to the sampled dataset.

In [193]:
# significant impact 
df[df['T_diff'] > 0.1]

Unnamed: 0,transform,featurizer,T_orig_acc,F_orig_acc,T_tran_acc,F_tran_acc,T_changed,F_changed,t_df,f_df,T_diff,F_diff
0,original,contains_question,1.0,0.994624,0.0,0.0,0.0,0.0,,,1.0,0.994624
12,InsertPositivePhrase,contains_question,1.0,0.994624,0.586022,0.849462,1.0,1.0,ori...,ori...,0.413978,0.145161
28,ChangeAntonym,contains_question,1.0,0.994624,0.870968,0.650538,0.951613,0.876344,ori...,ori...,0.129032,0.344086
32,HomoglyphSwap,contains_question,1.0,0.994624,0.83871,0.510753,1.0,1.0,ori...,ori...,0.16129,0.483871
34,TextMix,contains_question,1.0,0.994624,0.897849,0.833333,1.0,1.0,ori...,ori...,0.102151,0.16129
35,SentMix,contains_question,1.0,0.994624,0.897849,0.822581,1.0,1.0,ori...,ori...,0.102151,0.172043
36,WordMix,contains_question,1.0,0.994624,0.811828,0.790323,1.0,1.0,ori...,ori...,0.188172,0.204301


In [194]:
df[df['F_diff'] > 0.1]

Unnamed: 0,transform,featurizer,T_orig_acc,F_orig_acc,T_tran_acc,F_tran_acc,T_changed,F_changed,t_df,f_df,T_diff,F_diff
0,original,contains_question,1.0,0.994624,0.0,0.0,0.0,0.0,,,1.0,0.994624
12,InsertPositivePhrase,contains_question,1.0,0.994624,0.586022,0.849462,1.0,1.0,ori...,ori...,0.413978,0.145161
13,InsertNegativePhrase,contains_question,1.0,0.994624,0.935484,0.741935,1.0,1.0,ori...,ori...,0.064516,0.252688
17,AddNegativeLink,contains_question,1.0,0.994624,0.951613,0.88172,1.0,1.0,ori...,ori...,0.048387,0.112903
19,AddNegation,contains_question,1.0,0.994624,0.973118,0.870968,0.435484,0.322581,ori...,ori...,0.026882,0.123656
27,ChangeSynonym,contains_question,1.0,0.994624,0.908602,0.83871,0.951613,0.908602,ori...,ori...,0.091398,0.155914
28,ChangeAntonym,contains_question,1.0,0.994624,0.870968,0.650538,0.951613,0.876344,ori...,ori...,0.129032,0.344086
29,ChangeHyponym,contains_question,1.0,0.994624,0.924731,0.849462,0.956989,0.844086,ori...,ori...,0.075269,0.145161
30,ChangeHypernym,contains_question,1.0,0.994624,0.930108,0.876344,0.956989,0.887097,ori...,ori...,0.069892,0.11828
32,HomoglyphSwap,contains_question,1.0,0.994624,0.83871,0.510753,1.0,1.0,ori...,ori...,0.16129,0.483871


In [195]:
# significant impact difference by feature
df[abs(df['T_diff'] - df['F_diff']) > 0.1]

Unnamed: 0,transform,featurizer,T_orig_acc,F_orig_acc,T_tran_acc,F_tran_acc,T_changed,F_changed,t_df,f_df,T_diff,F_diff
12,InsertPositivePhrase,contains_question,1.0,0.994624,0.586022,0.849462,1.0,1.0,ori...,ori...,0.413978,0.145161
13,InsertNegativePhrase,contains_question,1.0,0.994624,0.935484,0.741935,1.0,1.0,ori...,ori...,0.064516,0.252688
28,ChangeAntonym,contains_question,1.0,0.994624,0.870968,0.650538,0.951613,0.876344,ori...,ori...,0.129032,0.344086
32,HomoglyphSwap,contains_question,1.0,0.994624,0.83871,0.510753,1.0,1.0,ori...,ori...,0.16129,0.483871


In [128]:
# inapplicable transforms
df[(df['T_changed'] == 0) & (df['F_changed'] == 0)] 

Unnamed: 0,transform,featurizer,T_orig_acc,F_orig_acc,T_tran_acc,F_tran_acc,T_changed,F_changed,T_diff,F_diff
0,original,contains_question,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0
1,ExpandContractions,contains_question,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0
6,Demojify,contains_question,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0
7,RemovePositiveEmoji,contains_question,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0
8,RemoveNegativeEmoji,contains_question,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0
9,RemoveNeutralEmoji,contains_question,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0
18,ImportLinkText,contains_question,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0


In [198]:
# InsertPositivePhrase Details
df.iloc[12].t_df.iloc[0].orig_t_text

"... a sour little movie at its core ; an exploration of the emptiness that underlay the relentless gaiety of the 1920 's ... the film 's ending has a `` what was it all for ? '' "

In [202]:
df.iloc[12].t_df.iloc[0].orig_t_label

[1.0, 0.0]

In [199]:
df.iloc[12].t_df.iloc[0].tran_t_text

"... a sour little movie at its core ; an exploration of the emptiness that underlay the relentless gaiety of the 1920 's ... the film 's ending has a `` what was it all for ? ''  That being said, I loved it."

In [200]:
df.iloc[12].t_df.iloc[0].tran_t_label

[0.8640776699029126, 0.13592233009708743]

In [204]:
df.iloc[12].t_df.iloc[0].orig_t_preds

array([9.99622107e-01, 3.77893448e-04])

In [203]:
df.iloc[12].t_df.iloc[0].tran_t_preds

array([0.96844983, 0.03155017])