# Author: ddukic

In [1]:
import numpy as np
import wandb
import sys

sys.path.append("../")

api = wandb.Api()


def fetch_run_summary(run_url):
    run_key = run_url.split("/")[-1]
    return api.run("ddukic/generative-ie-paper/" + run_key).summary

In [2]:
def get_data(url):
    from itertools import product

    summary_metric_avg = {
        k: v["f1_avg"]
        for k, v in fetch_run_summary(url).items()
        if "unlock_config" in k
    }

    metric_avg = [
        y[1]
        for y in sorted(
            summary_metric_avg.items(), key=lambda x: int(x[0].split("_")[-1])
        )
    ]

    return dict(zip([x for x in product(range(2), repeat=4)], metric_avg))

In [17]:
def get_correct_combos(current_config, include_0000=False):
    from itertools import product

    correct_combos = []

    last_one = [i for i in range(4) if current_config[i] == 1]

    if current_config == (0, 0, 0, 0) and include_0000:
        for unmask_config in product(range(2), repeat=4):
            if unmask_config != current_config:
                correct_combos.append(unmask_config)
        return correct_combos
    elif not last_one:
        return []
    else:
        last_one = last_one[-1]

    for unmask_config in product(range(2), repeat=4):
        any_ones_right_of_last_one = any(
            unmask_config[i] == 1 for i in range(last_one + 1, 4)
        )
        if (
            unmask_config[last_one] == 1
            and any_ones_right_of_last_one
            and unmask_config != current_config
            and unmask_config[:last_one] == current_config[:last_one]
        ):
            correct_combos.append(unmask_config)

    return correct_combos

In [18]:
def test_differences(url):
    from scipy.stats import ttest_rel

    data = get_data(url)

    before = []
    after = []

    for k in data.keys():
        combos = get_correct_combos(k)

        print(k, combos)

        if combos is not []:
            for combo in combos:
                before.append(data[k])
                after.append(data[combo])

    # the mean of the distribution underlying the first sample is less than the mean of the distribution underlying the second sample
    _, p_value = ttest_rel(before, after, alternative="less")

    if p_value < 0.01:
        print("Reject null hypothesis")
    else:
        print("Failed to reject")

In [19]:
# NER
url_valid = "https://wandb.ai/ddukic/generative-ie-paper/runs/c14fwgh8"
url_test = "https://wandb.ai/ddukic/generative-ie-paper/runs/kjf652rs"

print("NER valid")

test_differences(url_valid)

print("NER test")

test_differences(url_test)

# ABSA
url_valid = "https://wandb.ai/ddukic/generative-ie-paper/runs/4ny7ljyz"
url_test = "https://wandb.ai/ddukic/generative-ie-paper/runs/lplcw1aa"

print("ABSA valid")

test_differences(url_valid)

print("ABSA test")

test_differences(url_test)

# ACE05
url_valid = "https://wandb.ai/ddukic/generative-ie-paper/runs/qrwv2h9x"
url_test = "https://wandb.ai/ddukic/generative-ie-paper/runs/293m18vj"

print("ACE05 valid")

test_differences(url_valid)

print("ACE05 test")

test_differences(url_test)

# Chunking
url_valid = "https://wandb.ai/ddukic/generative-ie-paper/runs/oi5njdj6"
url_test = "https://wandb.ai/ddukic/generative-ie-paper/runs/nhuaxqcp"

print("Chunking valid")

test_differences(url_valid)

print("Chunking test")

test_differences(url_test)

NER valid
(0, 0, 0, 0) []
(0, 0, 0, 1) []
(0, 0, 1, 0) [(0, 0, 1, 1)]
(0, 0, 1, 1) []
(0, 1, 0, 0) [(0, 1, 0, 1), (0, 1, 1, 0), (0, 1, 1, 1)]
(0, 1, 0, 1) []
(0, 1, 1, 0) [(0, 1, 1, 1)]
(0, 1, 1, 1) []
(1, 0, 0, 0) [(1, 0, 0, 1), (1, 0, 1, 0), (1, 0, 1, 1), (1, 1, 0, 0), (1, 1, 0, 1), (1, 1, 1, 0), (1, 1, 1, 1)]
(1, 0, 0, 1) []
(1, 0, 1, 0) [(1, 0, 1, 1)]
(1, 0, 1, 1) []
(1, 1, 0, 0) [(1, 1, 0, 1), (1, 1, 1, 0), (1, 1, 1, 1)]
(1, 1, 0, 1) []
(1, 1, 1, 0) [(1, 1, 1, 1)]
(1, 1, 1, 1) []
Reject null hypothesis
NER test
(0, 0, 0, 0) []
(0, 0, 0, 1) []
(0, 0, 1, 0) [(0, 0, 1, 1)]
(0, 0, 1, 1) []
(0, 1, 0, 0) [(0, 1, 0, 1), (0, 1, 1, 0), (0, 1, 1, 1)]
(0, 1, 0, 1) []
(0, 1, 1, 0) [(0, 1, 1, 1)]
(0, 1, 1, 1) []
(1, 0, 0, 0) [(1, 0, 0, 1), (1, 0, 1, 0), (1, 0, 1, 1), (1, 1, 0, 0), (1, 1, 0, 1), (1, 1, 1, 0), (1, 1, 1, 1)]
(1, 0, 0, 1) []
(1, 0, 1, 0) [(1, 0, 1, 1)]
(1, 0, 1, 1) []
(1, 1, 0, 0) [(1, 1, 0, 1), (1, 1, 1, 0), (1, 1, 1, 1)]
(1, 1, 0, 1) []
(1, 1, 1, 0) [(1, 1, 1, 1)]
(1, 1, 1, 1) [