In [39]:
# pip install altair vega_datasets pandas
!cd ~/SemanticDebugger/
!pwd
!which python

/private/home/yuchenlin/SemanticDebugger/semanticdebugger/debug_algs
/usr/bin/python


In [40]:
import numpy as np 
import pandas as pd
import json 
import os
import altair as alt 



In [41]:
# Load the json data
path = "../../logs/nq_dev_online_debug_result.json"
assert os.path.exists(path)
online_debug_result = json.load(open(path))


## Forgetting Curve on Passes

In [42]:
print(online_debug_result.keys())

pass_forgetting_data = []
# draw the forgetting curve on the sampled passes 
for timecode, item in enumerate(online_debug_result["res_on_passes"]):
    result, result_all = item 
    # print(timecode, result["EM"])
    d = dict(timecode=timecode, em=result["EM"], f1=result["QA-F1"])
    pass_forgetting_data.append(d)

pass_forgetting_data = pd.DataFrame(pass_forgetting_data)
print(pass_forgetting_data)

dict_keys(['res_on_bugs', 'res_on_passes', 'em_prefixed_bugs', 'f1_prefixed_bugs', 'em_fixed_bugs', 'f1_fixed_bugs', 'forgotten_passes'])
    timecode        em        f1
0          0  1.000000  1.000000
1          1  0.937500  0.977865
2          2  0.984375  0.996094
3          3  1.000000  1.000000
4          4  0.984375  0.996094
5          5  0.953125  0.984189
6          6  0.906250  0.965723
7          7  0.937500  0.977628
8          8  0.906250  0.968814
9          9  0.859375  0.941284
10        10  0.812500  0.904230
11        11  0.781250  0.878389
12        12  0.718750  0.831862
13        13  0.656250  0.803950
14        14  0.625000  0.781629
15        15  0.687500  0.817942
16        16  0.734375  0.846588
17        17  0.750000  0.854009
18        18  0.734375  0.842923
19        19  0.718750  0.839277
20        20  0.781250  0.885110
21        21  0.843750  0.914203
22        22  0.937500  0.983337
23        23  0.875000  0.937020
24        24  0.828125  0.917923
25  

In [43]:

x = alt.X("timecode", type="ordinal", title="Timecode")
y_em = alt.Y("em", type="quantitative", title="EM", scale=alt.Scale(domain=[0.5, 1.0]))
y_f1 = alt.Y("f1", type="quantitative", title="F1", scale=alt.Scale(domain=[0.6, 1.0]))
em_line = alt.Chart(pass_forgetting_data).mark_line(interpolate='natural', point=True).encode(x=x, y=y_em, opacity=alt.value(0.8), color=alt.value('red'))
f1_line = alt.Chart(pass_forgetting_data).mark_line(interpolate='natural', point=True).encode(x=x, y=y_f1, opacity=alt.value(0.8), color=alt.value('blue'))

alt.layer(em_line).resolve_scale(
    y = 'independent'
).properties(width=1000).configure_axis(
    labelFontSize=18,
    titleFontSize=16
)



## Error-Fixing Rate


In [45]:
 
print(online_debug_result.keys())

error_fixing_data = [] 
bsz = 20
for timecode, ((before, after), em_fixed, f1_fixed, em_prefixed, f1_prefixed) in enumerate(zip(online_debug_result["res_on_bugs"], online_debug_result["em_fixed_bugs"], online_debug_result["f1_fixed_bugs"], online_debug_result["em_prefixed_bugs"], online_debug_result["f1_prefixed_bugs"])):
    em_before = before["EM"]
    em_after = after["EM"]
    f1_before = before["QA-F1"]
    f1_after = after["QA-F1"]
    em_prefixed_num = len(em_prefixed)
    em_fixed_num = len(em_fixed)
    em_fix_rate = len(em_fixed)/(bsz-len(em_prefixed))
    f1_fix_rate = len(f1_fixed)/(bsz-len(f1_prefixed))
    em_improve = em_after - em_before
    f1_improve = f1_after - f1_before
    d = dict(timecode=timecode, em_before=em_before, em_after=em_after, f1_before=f1_before, f1_after=f1_after, em_fix_rate=em_fix_rate, f1_fix_rate=f1_fix_rate, em_improve=em_improve, f1_improve=f1_improve, em_prefixed_num=em_prefixed_num, em_fixed_num=em_fixed_num)
    error_fixing_data.append(d)
error_fixing_data_list = error_fixing_data[:]
error_fixing_data = pd.DataFrame(error_fixing_data)
print(error_fixing_data)


SyntaxError: invalid syntax (<ipython-input-45-b20db5c1974e>, line 5)

In [30]:
x = alt.X("timecode", type="ordinal", title="Timecode")
y_em = alt.Y("em_improve", type="quantitative", title="EM Improvement", scale=alt.Scale(domain=[-0.1, 0.30]))
y_f1 = alt.Y("f1_improve", type="quantitative", title="F1 Improvement", scale=alt.Scale(domain=[0.0, 0.50]))
em_line = alt.Chart(error_fixing_data).mark_line(interpolate="natural", point=True).encode(x=x, y=y_em, opacity=alt.value(0.8), color=alt.condition(
        alt.datum.em_improve > 0,
        alt.value("black"),  # The positive color
        alt.value("red")  # The negative color
    ))
f1_line = alt.Chart(error_fixing_data).mark_line(interpolate='step-after').encode(x=x, y=y_f1, opacity=alt.value(0.8), color=alt.value('blue'))

alt.layer(em_line).resolve_scale(
    y = 'independent'
).properties(width=1000).configure_axis(
    labelFontSize=18,
    titleFontSize=16
)



In [8]:

reformatted_data = []
for item in error_fixing_data_list:
    d_before = {}
    d_before["timecode"] = item["timecode"]
    d_before["status"] = "before"
    d_before["em"] = item["em_before"]
    reformatted_data.append(d_before)
    d_after = {}
    d_after["timecode"] = item["timecode"]
    d_after["status"] = "after"
    d_after["em"] = item["em_after"]
    reformatted_data.append(d_after)

reformatted_data = pd.DataFrame(reformatted_data)
 
# base = alt.Chart(error_fixing_data).encode(x=x) 
scale = alt.Scale(domain=['before', 'after'], range=['red', 'green'])
color=alt.Color('status:N', scale=scale)



# fig = alt.Chart(reformatted_data).mark_area(opacity=0.6).encode(x="timecode:O", y=alt.Y("em:Q", stack=None, title="EM"), color=color)
 
fig = alt.Chart(reformatted_data).mark_bar(opacity=0.3).encode(x="timecode:O", y=alt.Y("em:Q", stack=None, title="EM", scale=alt.Scale(domain=[0.0, 0.4])), color=color)

fig.properties(width=1000).configure_axis(
    labelFontSize=18,
    titleFontSize=16, 
).configure_legend(titleFontSize=0, labelFontSize=20, orient='top-right') 


In [32]:
x = alt.X("timecode", type="ordinal", title="Timecode")
y_em = alt.Y("em_fix_rate", type="quantitative", title="Fix Rate (EM)", scale=alt.Scale(domain=[0.0, 0.50]))
y_f1 = alt.Y("f1_fix_rate", type="quantitative", title="Fix Rate (F1)", scale=alt.Scale(domain=[0.0, 0.50]))
em_line = alt.Chart(error_fixing_data).mark_line(point=True, interpolate="step").encode(x=x, y=y_em, opacity=alt.value(0.8), color=alt.value('green'))
# f1_line = alt.Chart(error_fixing_data).mark_line(interpolate='step-after').encode(x=x, y=y_f1, opacity=alt.value(0.8), color=alt.value('blue'))

alt.layer(em_line).resolve_scale(
    y = 'independent'
).properties(width=1000).configure_axis(
    labelFontSize=18,
    titleFontSize=16
)


In [38]:
x = alt.X("timecode", type="ordinal", title="Timecode")
y_em = alt.Y("em_fixed_num", type="quantitative", title="Fixed Bug", scale=alt.Scale(domain=[0.0, 0.50]))
em_line = alt.Chart(error_fixing_data).mark_line(point=True, interpolate="step").encode(x=x, y=y_em, opacity=alt.value(0.8), color=alt.value('green'))
# f1_line = alt.Chart(error_fixing_data).mark_line(interpolate='step-after').encode(x=x, y=y_f1, opacity=alt.value(0.8), color=alt.value('blue'))



alt.layer(em_line).resolve_scale(
    y = 'independent'
).properties(width=1000).configure_axis(
    labelFontSize=18,
    titleFontSize=16
)
