In [61]:
import matplotlib.pyplot as plt
import pandas as pd
from experiments import generate
from tqdm import tqdm

def biggest_jump(data, col):
    x = 1
    jump = 0
    for i in data[col]:
        if i > 1:
            return -1
        jump = max(jump, abs(x-i))
        x = i
    return jump

def jump_to_zero(data, col, theta=0):
    x = data[col][0]
    for i, j in enumerate(data[col]):
        if j > 1:
            return -1
        if x - j > theta:
            return i
        x = j
    return -1

def run(gr, n=25, k=13):
    dfs = []
    reslist = []
    for i in tqdm(range(reps)):
        res = generate.simple_test(n=n, k=k, gr=gr, verbose=False)
        reslist.append(res)
        df = res[0]
        df['inc'] = df['inc'].cumsum()/n
        dfs.append(df)
    return dfs, reslist


not_zero = lambda x: [a for a in x if a != 0]
positive = lambda x: [a for a in x if a >= 0]
avg_not_zero = lambda x: sum(x)/max(len(not_zero(x), 1))
avg_positive = lambda x: sum(positive(x))/max(len(positive(x)), 1)

def process(dfs, col, theta=0):
    biggest, drop = [], []
    missed = 0
    for x in dfs:
        j = biggest_jump(x, col)
        if j > 0:
            biggest.append(j)
        else:
            missed += 1
        drop.append(jump_to_zero(x, col, theta))
    # print("average-biggest-jump", col, sum(biggest)/len(biggest), missed)
    # print("drop-to-theta", col, avg_positive(drop), "number of drops", len(positive(drop)))
    print(col, sum(biggest)/len(biggest), avg_positive(drop), len(positive(drop)), missed)

def analyse(dfs, theta=0):
    keys = "tbleu1-base,tbleu0-base,smatch-base".split(",")
    for k in keys:
        process(dfs, k, theta)

In [62]:
reps = 100

In [63]:
gr = generate.random_graph_generator()
#normal, normal_res = run(gr)
analyse(normal, theta=0.2)

tbleu1-base 0.19250077705800922 4.580645161290323 31 0
tbleu0-base 0.10853161169518648 4.0 3 0
smatch-base 0.13664932596894416 7.0 1 67


In [64]:
gr = generate.random_graph_generator(strategy=generate.STRATEGY["ADD"])
#add, add_res = run(gr)
add_analysis = analyse(add)

tbleu1-base 0.1006795451411029 1.0101010101010102 99 0
tbleu0-base 0.10061028297633175 1.0101010101010102 99 0
smatch-base 0.11919899561929537 1.1764705882352942 34 70


In [65]:
gr = generate.random_graph_generator(strategy=generate.STRATEGY["CADD"])
#cadd, cadd_res = run(gr)
cadd_analysis = analyse(cadd)

tbleu1-base 0.09780432519386534 1.0 100 0
tbleu0-base 0.09780432519386534 1.0 100 0
smatch-base 0.11985722645502492 1.12 25 78


In [66]:
gr = generate.random_graph_generator(strategy=generate.STRATEGY["RELABEL"])
#relabel, relabel_res = run(gr)
relabel_analysis = analyse(relabel,theta=0.2)

tbleu1-base 0.26842965694025106 3.9827586206896552 58 0
tbleu0-base 0.15857692655681788 8.384615384615385 13 0
smatch-base 0.1634615384615385 9.0 1 96
