In [1]:
from tqdm import tqdm
import json
import numpy as np
import copy

import spot_utils
import synthTL

In [2]:
translate_dict = synthTL.create_translate_dict('rawcontext_decomposition-worker.xlsx')
decompose_dict = synthTL.create_decompose_dict('rawcontext_decomposition-worker.xlsx')
cur_nl_spec_fname = "specs/amba_worker.txt"
test_str = open(cur_nl_spec_fname,"r").read()
formula_DUT = translate_dict[test_str]
cur_graph = synthTL.Node(test_str)
synthTL.cur_DUT_variables = spot_utils.get_variables(formula_DUT)

In [3]:
def load_llm_translation(exp_name):
    cur_retranslation_dict = json.load(open(exp_name+"_translations.json"))
    cur_redecomposition_dict = json.load(open(exp_name+"_decompositions.json"))
    all_retranslation_dict = json.load(open(exp_name+"_alltranslations.json"))
    all_redecomposition_dict = json.load(open(exp_name+"_alldecompositions.json"))
    correct_abs_id_list = json.load(open(exp_name+"_correctednodeids.json"))
    return cur_retranslation_dict, cur_redecomposition_dict, all_retranslation_dict, all_redecomposition_dict, correct_abs_id_list

In [4]:
def create_test_graph_list(nl_spec_fname,num_graph=1,root_only=False,mode='cache'):
    assert mode in ['cache','LLM']
    res_list = []
    for i in tqdm(range(num_graph)):
        test_str = open(nl_spec_fname,"r").read()
        formula_DUT = translate_dict[test_str]
        cur_graph = synthTL.Node(test_str)
        if not root_only:
            synthTL.dfs_decompose(cur_graph,mode=mode,decompose_dict=decompose_dict)
            synthTL.dfs_translate(cur_graph,mode=mode,t_type='template',translate_dict=translate_dict)
        res_list.append(cur_graph)
    return res_list, formula_DUT


In [5]:
def construct_all_node_perturb(cur_graph,unary_op_str):
    new_graph_list = []
    all_culprit_list = []
    for cur_node in synthTL.get_all_descendants(cur_graph):
        new_graph = synthTL.copy_graph(cur_graph)
        new_node = synthTL.find_descendant(new_graph,synthTL.get_unique_node_id(cur_node))
        new_translation = unary_op_str+"("+synthTL.get_node_translation(new_node)+")"
        if len(new_node.dcmp_dict) > 0:
            new_node.template_translation = new_translation
        else:
            new_node.translation = new_translation
        synthTL.dfs_translate(new_graph,mode='NoRun',t_type='template')
        new_graph_list.append(new_graph)
        all_culprit_list.append([new_node])
    return new_graph_list, all_culprit_list

In [6]:
cur_test_list,formula_DUT = create_test_graph_list(cur_nl_spec_fname,num_graph=1)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 636.85it/s]


In [7]:
def save_culprit_data(exp_name,cur_graph,all_culprit_list,all_possible_culprit_list):
    all_node_ids = [synthTL.get_unique_node_id(node) for node in synthTL.get_all_descendants(cur_graph)]
    all_culprit_ids = [[synthTL.get_unique_node_id(node) for node in cur_list]for cur_list in all_culprit_list]
    all_possible_culprit_ids = [[synthTL.get_unique_node_id(node) for node in cur_list]for cur_list in all_possible_culprit_list]
    data = \
    {
        "all_nodes":all_node_ids, \
        "all_culprit_list":all_culprit_ids, \
        "all_possible_culprit_list":all_possible_culprit_ids,
    }
    with open(exp_name+'_culpritdata.json', 'w') as f:
        json.dump(data, f)

def load_culprit_data(exp_name):
    data = json.load(open(exp_name+'_culpritdata.json','r'))
    node_list=data["all_nodes"]
    all_culprit_list = data["all_culprit_list"]
    all_possible_culprit_list = data["all_possible_culprit_list"]
    return node_list, all_culprit_list, all_possible_culprit_list

#### incorrect spec

In [8]:
mode = "allculprit"
#mode = "allculpritfilter"
#unary_op_str="G"
unary_op_str="X"
#unary_op_str="!"

In [9]:
perturb_list,all_culprit_list = construct_all_node_perturb(cur_test_list[0],unary_op_str=unary_op_str)
np.random.seed(0)
num_ex = 10
select_idx = np.random.choice(len(perturb_list),size=num_ex,replace=False)
perturb_list = [perturb_list[idx] for idx in select_idx]
all_culprit_list = [all_culprit_list[idx] for idx in select_idx]

hold_set = synthTL.batch_model_check([g.translation for g in perturb_list],formula_DUT)
buggy_list = [entry for entry in perturb_list if entry.translation not in hold_set]
all_culprit_list = [all_culprit_list[i] for i in range(len(perturb_list)) if perturb_list[i].translation not in hold_set]

In [10]:
%%time
if mode == "allculprit":
    all_possible_culprit_list = synthTL.get_culprit_batch(buggy_list,formula_DUT,depth=2)
elif mode == "allculpritfilter":
    cur_possible_culprit_list = synthTL.get_culprit_batch(buggy_list,formula_DUT,depth=2)
    all_possible_culprit_list = []
    for i in tqdm(range(len(cur_possible_culprit_list))):
        cur_list = synthTL.filter_if_dependancy_holds(buggy_list[i],cur_possible_culprit_list[i],formula_DUT,fail_nodes=cur_possible_culprit_list[i])
        all_possible_culprit_list.append(cur_list)

CPU times: user 41 s, sys: 120 ms, total: 41.1 s
Wall time: 41 s


In [11]:
exp_name = "tmp_buggySpec"
save_culprit_data(exp_name=exp_name,cur_graph=cur_test_list[0],all_culprit_list=all_culprit_list,all_possible_culprit_list=all_possible_culprit_list)
node_list, all_culprit_list, all_possible_culprit_list = load_culprit_data(exp_name=exp_name)

In [12]:
num_nodes = len(synthTL.get_all_descendants(cur_test_list[0]))
res_list = []
total_sum = 0
total_recall = 0
print("fraction marked as possible culprit:")
for i in range(len(all_possible_culprit_list)):
    correct_culprits = set(all_possible_culprit_list[i]).intersection(set(all_culprit_list[i]))
    print(len(all_possible_culprit_list[i])/num_nodes) #% marked as culprit
    res_list.append(len(all_possible_culprit_list[i])/num_nodes)
    total_sum += len(all_possible_culprit_list[i])/num_nodes
    total_recall += len(correct_culprits)/len(all_culprit_list[i])

fraction marked as possible culprit:
0.4339622641509434
0.49056603773584906
0.49056603773584906
0.49056603773584906
1.0
1.0


In [13]:
print(len(all_possible_culprit_list))
print(num_nodes)
print("avg. marked as possible culprit:",total_sum/len(all_possible_culprit_list))
print("avg. true culprit recall:",total_recall/len(all_possible_culprit_list))

6
53
avg. marked as possible culprit: 0.6509433962264151
avg. true culprit recall: 1.0


#### DUT incorrect

In [14]:
#mode = "allculprit"
mode = "allculpritfilter"
#unary_op_str="G"
unary_op_str="X"
#unary_op_str="!"

In [15]:
perturb_list,perturb_culprit_list = construct_all_node_perturb(cur_test_list[0],unary_op_str=unary_op_str)
num_ex = 10
np.random.seed(0)
select_idx = np.random.choice(len(perturb_list),size=num_ex,replace=False)
perturb_list = [perturb_list[idx] for idx in select_idx]
perturb_culprit_list = [perturb_culprit_list[idx] for idx in select_idx]
buggy_list = []
all_culprit_list = []
for i in tqdm(range(len(perturb_list))):
    if not spot_utils.check_formula_contains_formula(cur_test_list[0].translation,perturb_list[i].translation,use_contains_split=True):
        buggy_list.append(perturb_list[i])
        all_culprit_list.append(perturb_culprit_list[i])

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:17<00:00,  1.80s/it]


In [16]:
all_possible_culprit_list = []
for g in tqdm(buggy_list):
    if mode == "allculprit":
        all_possible_culprit_list += synthTL.get_culprit_batch(cur_test_list,g.translation,depth=2)
    elif mode == "allculpritfilter":
        cur_possible_culprit_list = synthTL.get_culprit_batch(cur_test_list,g.translation,depth=2)[0]
        cur_list = synthTL.filter_if_dependancy_holds(cur_test_list[0],cur_possible_culprit_list,g.translation,fail_nodes=cur_possible_culprit_list)
        all_possible_culprit_list.append(cur_list)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [01:06<00:00,  8.29s/it]


In [17]:
exp_name = "tmp_buggyDUT"
save_culprit_data(exp_name=exp_name,cur_graph=cur_test_list[0],all_culprit_list=all_culprit_list,all_possible_culprit_list=all_possible_culprit_list)

In [18]:
node_list, all_culprit_list, all_possible_culprit_list = load_culprit_data(exp_name=exp_name)

In [19]:
num_nodes = len(synthTL.get_all_descendants(cur_test_list[0]))
total_sum = 0
total_recall = 0
for i in range(len(all_possible_culprit_list)):
    #assert all_culprit_list[i][0] in all_possible_culprit_list[i]
    
    correct_culprits = set(all_possible_culprit_list[i]).intersection(set(all_culprit_list[i]))
    #print(len(all_possible_culprit_list[i])/num_nodes,len(correct_culprits)/len(all_culprit_list[i]))
    #print(len(all_possible_culprit_list[i])/num_nodes)
    print(len(all_possible_culprit_list[i])/num_nodes) #% marked as culprit
    total_sum += len(all_possible_culprit_list[i])/num_nodes
    total_recall += len(correct_culprits)/len(all_culprit_list[i])

0.05660377358490566
0.09433962264150944
0.09433962264150944
0.09433962264150944
0.09433962264150944
0.05660377358490566
0.018867924528301886
0.05660377358490566


In [20]:
print(len(all_possible_culprit_list))
print(num_nodes)
print("avg. marked as possible culprit:",total_sum/len(all_possible_culprit_list))
print("avg. true culprit recall:",total_recall/len(all_possible_culprit_list))

8
53
avg. marked as possible culprit: 0.07075471698113207
avg. true culprit recall: 1.0
