In [None]:
import seaborn as sns
import json
import pandas as pd 
import numpy as np
from matplotlib import pyplot as plt 
import re
import pathlib 

from data import get_source_triggers, split_by_intent

colors = ['#7f3b08','#b35806','#e08214','#fdb863','#8e0152','#d73027','#4575b4','#b2abd2','#8073ac','#542788','#2d004b']
# intents_and_functions = ['play_radio', "FindManager", 'email_query', "Tomorrow", 'email_querycontact', "PlaceHasFeature", 'general_quirky', "DoNotConfirm", 'traffic', "FenceAttendee", "total"]
intents_and_functions = [50, "FindManager", 15, "Tomorrow", 16, "PlaceHasFeature", 27, "DoNotConfirm", 66, "FenceAttendee", "total"]
color_mapping = {k:v for k,v in zip(intents_and_functions, colors)}


In [32]:
from dataflow.core.lispress import parse_lispress, render_compact
from dataflow.leaderboard.evaluate import evaluate_prediction_exact_match 
from dataflow.core.turn_prediction import TurnPrediction, TurnAnswer
from dataflow.core.dialogue import TurnId, ProgramExecutionOracle

def get_accuracy_intent(pred_file, intent, triggers):
    try:
        with open(pred_file) as f1:
            pred_data = json.load(f1)
    except FileNotFoundError:
        print(f"file {pred_file} doesn't exist")
        return np.nan, [], [], 0
    #with open(data_file) as f1:
    
    total = 0
    correct_examples = []
    incorrect_examples = []

    for example in pred_data:
        true_label = example['true']
        source = re.split("\s+", example['input']) 
        has_trigger = any([t in source for t in triggers])
        if true_label != intent and has_trigger: 
            max_pred = np.argmax(example['pred'])
            total += 1
            if max_pred == true_label: 
                correct_examples.append(example)
            else:
                incorrect_examples.append(example)
        else:
            continue

    try:
        acc = len(correct_examples)/total 
    except ZeroDivisionError:
        return np.nan, [], [], 0
    # print(f"{pred_file} has total {total}")
    return acc, correct_examples, incorrect_examples, total




def get_accuracy_calflow(pred_tgt, true_src, true_tgt, fxn, triggers):
    # print(len(true_src), len(true_tgt), len(pred_tgt))
    assert(len(true_src) == len(true_tgt) == len(pred_tgt))

    total = 0
    correct_examples = []
    incorrect_examples = []

    for pred, src, tgt in zip(pred_tgt, true_src, true_tgt):
        pred, src, tgt = pred.strip(), src.strip(), tgt.strip() 
        try:
            pred_str = render_compact(parse_lispress(pred))
        except:
            pred_str = "(ERROR)"
        true_str = render_compact(parse_lispress(tgt))
        pred = TurnPrediction(TurnId("test", 0), src, pred_str)
        true = TurnAnswer(TurnId("test", 0), src, true_str, ProgramExecutionOracle(False, True))

        match, match_no_refer = evaluate_prediction_exact_match(pred, true)

        is_correct = match

        tgt_tok = re.split("\s+", tgt)
        src_tok = re.split("\s+", src)
        has_fxn = fxn in tgt_tok
        has_trigger = any([t in src_tok for t in triggers])
        if not has_fxn and has_trigger:
            total += 1
            if is_correct:
                correct_examples.append((pred, src, tgt))
            else:
                incorrect_examples.append((pred, src, tgt))

    try:
        acc = len(correct_examples)/total 
    except ZeroDivisionError:
        return np.nan, [], [], 0
    return acc, correct_examples, incorrect_examples, total  

In [None]:

def collect_intent(paths_and_types, splits = [750, 1500, 3000, 7500, 15000, 18000], fxn_splits = [15, 30, 75], intents=[50, 66, 16, 27, 15], seeds=[12, 31, 64], 
                    train_data_path = "/home/estengel/incremental-function-learning/intent/data/nlu_eval_data/"):

    df = pd.DataFrame(columns=["fxn", "type", "train", "examples", "seed", "acc", "total"], dtype=object) 

    for path, type_name in paths_and_types:
        path = pathlib.Path(path)
        for intent in intents:
            for seed in seeds:
                for split in splits:
                    for fxn_split in fxn_splits:
                        pred_path = path.joinpath(str(intent), f"{seed}_seed", f"{split}_{fxn_split}", "test_predictions.json")
                        train_data, __, __ = split_by_intent(train_data_path, intent, split, fxn_split)
                        triggers = get_source_triggers(train_data, intent)
                        acc, correct, incorrect, total = get_accuracy_intent(pred_path, intent, triggers)
                        df = df.append({"fxn": intent, "train": str(split), "examples": fxn_split, "type": type_name, "seed": seed, "acc": acc, "total": total}, ignore_index=True)
    return df 


df = collect_intent([("/brtx/603-nvme1/estengel/intent_fixed_test/intent/", "baseline"), 
                     ("/brtx/603-nvme1/estengel/intent_fixed_test/intent_no_source/", "no_source")])



In [33]:

trigger_lookup = {"FindManager": ["boss", "manager", "supervisor"],
                  "Tomorrow": ["tomorrow"],
                  "DoNotConfirm": ["cancel", "n't", "no"],  
                  "PlaceHasFeature": ["takeout","casual","waiter"],
                  "FenceAttendee": ["mom", "meet"]}

def collect_calflow(paths_and_types, splits = [5000, 10000, 20000, 50000, 100000, "max"], fxn_splits = [100], fxns=["FindManager", "Tomorrow", "DoNotConfirm", "FenceAttendee", "PlaceHasFeature"], seeds=[12, 31, 64], 
                    train_data_path = "/brtx/601-nvme1/estengel/resources/data/smcalflow.agent.data/"):

    df = pd.DataFrame(columns=["fxn", "type", "train", "examples", "seed", "acc", "total"], dtype=object) 

    train_data_path = pathlib.Path(train_data_path)    

    true_tgt_path = train_data_path.joinpath("test_valid.tgt")
    true_src_path = train_data_path.joinpath("test_valid.src")
    with open(true_src_path) as tsf, open(true_tgt_path) as ttf:
        true_src = tsf.readlines()
        true_tgt = ttf.readlines()

    for path, type_name in paths_and_types:
        path = pathlib.Path(path)
        for fxn in fxns:
            for seed in seeds:
                for split in splits:
                    for fxn_split in fxn_splits:
                        pred_tgt_path = path.joinpath(f"{fxn}_{split}_{fxn_split}_{seed}_seed_test_valid.tgt") 
                        if not pred_tgt_path.exists():
                            pred_tgt_path = path.joinpath(f"{fxn}_{seed}_{split}_{fxn_split}_test_valid.tgt") 
                            if not pred_tgt_path.exists():
                                print(f"Missing {pred_tgt_path}")
                                continue
                        with open(pred_tgt_path) as ptf:
                            pred_tgt = ptf.readlines()
                        triggers = trigger_lookup[fxn]
                        acc, correct, incorrect, total = get_accuracy_calflow(pred_tgt, true_src, true_tgt, fxn, triggers)

                        df = df.append({"fxn": fxn, "train": str(split), "examples": fxn_split, "type": type_name, "seed": seed, "acc": acc, "total": total}, ignore_index=True)
    return df 


calflow_df = collect_calflow([("/home/estengel/papers/incremental_function_learning/results/transformer", "baseline"),
                              ("/home/estengel/papers/incremental_function_learning/results/no_source", "no_source")], 
                              fxns=["FenceAttendee", "FindManager", "DoNotConfirm", "PlaceHasFeature", "Tomorrow"])



Missing /home/estengel/papers/incremental_function_learning/results/transformer/FenceAttendee_12_5000_100_test_valid.tgt
(let (x0 (DateAtTimeWithDefaults (NextDOW (Tuesday)) (NumberAM 9L))) (Yield (CreateCommitEventWrapper (CreatePreflightEventWrapper (& (& (Event.subject_? (?= "Work")) (Event.start_? (?= x0))) (Event.end_? (?= (TimeAfterDateTime x0 (NumberPM 5L)))))))))
(let (x0 (DateAtTimeWithDefaults (NextDOW (Tuesday)) (NumberAM 9L))) (Yield (CreateCommitEventWrapper (CreatePreflightEventWrapper (& (& (Event.subject_? (?= " Work ")) (Event.start_? (?= x0))) (Event.end_? (?= (TimeAfterDateTime x0 (NumberPM 5L)))))))))
True


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:

# sub_df = sub_df[sub_df['fxn'] == 66]
# sns.lineplot(data=sub_df, x="train", y='acc', hue='fxn', style="type", palette=color_mapping, err_style=None)

In [None]:

print(calflow_df)

fig, ax = plt.subplots(1,2, figsize=(4,4))

intent_df = df[df['examples']==30]
# intent_df['dataset']

sns.barplot(data=intent_df, x="type", y = "acc", hue="fxn", palette=color_mapping, ax=ax[0])
sns.despine()


print(calflow_df[calflow_df['type'] == 'baseline'])
sns.barplot(data=calflow_df, x="type", y = "acc", hue="fxn", palette=color_mapping, ax=ax[1])
sns.despine()

plt.savefig("/home/estengel/papers/incremental_function_learning/figures/for_paper/difficult_examples.pdf")

In [24]:

from source_lookup import get_probs, tokenize

intent = 66
splits = [750, 1500, 3000, 7500, 15000, 18000]
fxn_split=75
train_path = "/home/estengel/incremental-function-learning/intent/data/nlu_eval_data/" 
#with open("/home/estengel/incremental-function-learning/intent/data/nlu_eval_data/train.json") as f1:
#    train_data = json.load(f1)

for split in splits:
    train_data, __, __ = split_by_intent(train_path, intent, split, fxn_split)
    train_data = tokenize(train_data)

    prob_intent_given_word, prob_word_given_intent = get_probs(train_data, exclude_function=True)

    triggers = get_source_triggers(train_data, intent)
    print(split, triggers)
    for trig in triggers:
        print(f"\t{trig}: {prob_intent_given_word[trig][intent]}") 


There are 17 instances of 66 in dev and 46 in test
750 ['there', 'now', 'traffic']
	there: 0.47058823529411764
	now: 0.6
	traffic: 1.0
There are 17 instances of 66 in dev and 46 in test
1500 ['current', 'there', 'traffic']
	current: 0.34782608695652173
	there: 0.4482758620689655
	traffic: 0.9824561403508771
There are 17 instances of 66 in dev and 46 in test
3000 ['how', 'there', 'traffic']
	how: 0.14018691588785046
	there: 0.22580645161290322
	traffic: 0.9827586206896551
There are 17 instances of 66 in dev and 46 in test
7500 ['current', 'there', 'traffic']
	current: 0.08247422680412371
	there: 0.08955223880597014
	traffic: 0.9692307692307692
There are 17 instances of 66 in dev and 46 in test
15000 ['now', 'there', 'traffic']
	now: 0.049107142857142856
	there: 0.07234042553191489
	traffic: 0.9344262295081968
There are 17 instances of 66 in dev and 46 in test
18000 ['now', 'there', 'traffic']
	now: 0.03214285714285714
	there: 0.050359712230215826
	traffic: 0.9384615384615385


In [None]:
prob_intent_given_word['fm'][intent]
