In [1]:
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
from evaluator.CodeBLEU import calc_code_bleu
import numpy as np
import os
from evaluator.bleu import _bleu
from utils import read_examples, check
from fuzzywuzzy import fuzz
%matplotlib inline
import matplotlib.pyplot as plt
from transformers import AutoTokenizer
import ast
import astor

tokenizer = AutoTokenizer.from_pretrained("Salesforce/codet5-small")
loc = "unix_left4_dist"
eval_examples = read_examples("/home/ubuntu/callargs/pkl_data/test.pkl", -1)

wh = "best-ppl"
a = f"/home/ubuntu/results/{loc}/test_{wh}.output"
b = f"/home/ubuntu/results/{loc}/test_{wh}.gold"
pred_nls = [x.strip() for x in open(a).readlines()]
gold = [x.strip() for x in open(b).readlines()]  

def sim_acc(pred_nls, gold, eval_examples):
    accs = []
    sims = []
    for x, y, ex in zip(pred_nls, gold, eval_examples):
        res = check(x.strip()) == check(y.strip())
        sim = fuzz.ratio(check(x.strip()), check(y.strip()))
        accs.append(res)
        sims.append(sim)
    return accs, sims
accs, sims = sim_acc(pred_nls, gold, eval_examples)
print(np.mean(accs), np.mean(sims))



0.46325761434246715 73.28587144748904


In [None]:
a = f"/home/ubuntu/results/gpt_dist_level4_completion_4000_use/predictions_call.txt"
pred_nls = [x.strip() for x in open(a).readlines()]
accs, sims = sim_acc(pred_nls, gold, eval_examples)
print(np.mean(accs), np.mean(sims))

In [2]:
def analyze(pred_nls, gold, eval_examples):
    result = {}
    tys = sorted(list(set([x.type for x in eval_examples])))
    accs = []
    sims = []
    detailed = {x:[[], []] for x in tys}
    for x, y, ex in zip(pred_nls, gold, eval_examples):
        res = check(x.strip()) == check(y.strip())
        sim = fuzz.ratio(check(x.strip()), check(y.strip()))
        accs.append(res)
        sims.append(sim)
        detailed[ex.type][0].append(res)
        detailed[ex.type][1].append(sim)
    # 0, in this project, 1, from other dependencies 2, stdlib of python
    print(np.mean(accs)*100, np.mean(sims))
    for x in tys:
        acc = round(np.mean(detailed[x][0]) * 100, 2)
        edit = round(np.mean(detailed[x][1]), 2)
        print(f"For type {x}, acc: {acc}, edit: {edit}")
    length = np.array([len(x.split(',')) for x in gold])
    types = np.array([x.type for x in eval_examples])
    pred_length = np.array([len(x.split(',')) for x in pred_nls])
    nums = pred_length==length
    print(np.mean(nums))
    accs = np.array(accs)
    sims = np.array(sims)
    
    for i in range(1,6):
        if i<5:
            part = length==i
        else:
            part = length >= 5
        print(i, sum(part), np.mean(accs[part])*100, np.mean(sims[part]), np.mean(nums[part]))
    print()
    
    for j in range(1,6):
        for i in range(3):
            if j<5:
                a = length==j
            else:
                a = length >= 5
            b = types == i 
            part = a&b
            #print(i, j, sum(part), np.mean(accs[part])*100, np.mean(sims[part]))
            print(round(np.mean(accs[part])*100, 2), end="\t")
        print()
        
    for i in range(len(length)):
        if accs[i]==0 and types[i]==1:
            print(i)
            print(eval_examples[i].source[0])
            print()
            print(pred_nls[i])
            print(gold[i])
            print()
            print(eval_examples[i].signature[0]["label"])
            print(eval_examples[i].signature[0]["documentation"]["value"])
            print()
            break
analyze(pred_nls, gold, eval_examples)          

46.325761434246715 73.28587144748904
For type 0, acc: 44.48, edit: 72.1
For type 1, acc: 33.46, edit: 65.14
For type 2, acc: 51.47, edit: 76.56
0.7770398288682897
1 29635 61.636578370170405 79.67929812721444 0.9531297452336764
2 12715 28.1399921352733 67.70475815965395 0.6267400707825403
3 3766 15.719596388741369 59.33988316516198 0.3505045140732873
4 1522 14.5203679369251 57.05584756898817 0.27463863337713534
5 1447 5.6668970283344855 44.75604699378024 0.1299239806496199

62.64	51.03	61.96	
29.27	24.95	27.46	
15.31	10.15	19.88	
16.49	8.0	8.37	
6.13	5.3	2.13	
310
def _raise_worker_exceptions(self):
    """Raises exceptions encountered on worker threads"""
    if self._worker_exception is not None:
        reraise

self._worker_exception, self._worker_exception)
*self._worker_exception)

def reraise(tp: Optional[Type[BaseException]], value: Optional[BaseException], tb: Optional[types.TracebackType]=...) -> NoReturn




In [None]:
pred_nls1 = [x.strip() for x in open(f"/home/ubuntu/results/t5_full4_single.output").readlines()]
pred_nls2 = [x.strip() for x in open(f"/home/ubuntu/results/t5_full4_refer.output").readlines()]
accs1, sims1 = sim_acc(pred_nls1, gold, eval_examples)
accs2, sims2 = sim_acc(pred_nls2, gold, eval_examples)


In [None]:
def is_class(z):
    for s in z.signature:
        if s["label"].startswith("class") or s["label"].startswith("instance") or s["label"].find("<lambda>")!=-1:
            return True
    return False

def match(s, node):
    try:
        a = ast.parse(s["label"]+":\n"+"    pass")
    except:
        return None, False
    a = a.body[0].args
    dic = {}
    loc = {}
    posonly = {}
    kwonly = {}
    posonlyargs = a.posonlyargs 
    if len(posonlyargs)>0 and posonlyargs[0].arg == "self": posonlyargs=posonlyargs[1:]
    for i, x in enumerate(posonlyargs):
        loc[i] = x.arg
        dic[x.arg] = None
        posonly[x.arg] = True
        
    args = a.args 
    if len(args)>0 and args[0].arg == "self": args=args[1:]    
    for i, x in enumerate(args):
        j = i+len(posonlyargs)
        loc[j] = x.arg
        if i>=len(posonlyargs) + len(args) - len(a.defaults):
            dic[x.arg] = astor.to_source(a.defaults[i - len(posonlyargs) - len(args) + len(a.defaults)]).strip()
        else:
            dic[x.arg] = None
    if a.vararg is not None:
        dic["*"] = []
    for x, y in zip(a.kwonlyargs, a.kw_defaults):
        kwonly[x.arg] = True
        dic[x.arg] = astor.to_source(y).strip() if y is not None else None
    if a.kwarg is not None:
        dic["**"] = set()

    res = dic.copy()
    for i, x in enumerate(node.args):
        if isinstance(x, ast.Starred):
            v = astor.to_source(x.value).strip()
            if res.get("*") is None:
                res["*"] = []
            res["*"].append(v)
        else:
            res[loc.get(i, i)] = astor.to_source(x).strip()
    for x in node.keywords:
        if x.arg is None:
            if res.get("**") is None:
                res["**"] = set()
            res["**"].add(astor.to_source(x.value).strip())
        else:
            res[x.arg] = astor.to_source(x.value).strip()

    ff = True
    for x in res:
        if x in kwonly:
            if res[x] is None and len(res.get('**', set()))==0:
                return res, False
        elif x in posonly:
            if res[x] is None and len(res.get('*', []))==0:
                return res, False
        elif x not in ["*", "**"] and x not in dic:
            return res, False
        else:    
            if res[x] is None and len(res.get('**', set()))==0 and len(res.get('*', []))==0:
                return res, False
        
    return res, ff
        
def exists(z, b):
    for s in z.signature:
        res, ff = match(s, b)
        if ff:
            return res, s
    return None

def details(pred_nls, gold, eval_examples):
    syn_incorrect = 0
    gold_incorrect = 0
    sem_correct = 0
    tot = 0
    match_imp = 0
    tota = 0
    ma = 0
    mia = 0
    em = 0
    study = []
    for x in pred_nls:
        try:
            a = ast.parse("f("+x)
            a = a.body[0].value
            assert isinstance(a, ast.Call)
        except:
            syn_incorrect += 1
            continue

    for y in gold:
        try:
            b = ast.parse("f("+y)
            b = b.body[0].value
        except:
            gold_incorrect+=1
            continue
    paccs = []
    psims = []
    for x,y,z in zip(pred_nls, gold, eval_examples):
        try:
            b = ast.parse("f("+y)
            b = b.body[0].value
        except:
            continue
        if is_class(z):
            continue

        res = exists(z, b)
        if res is None:
            continue

        res, s = res
        tota += len(res)
        tot+=1
        
        ac = check(x.strip()) == check(y.strip())
        sim = fuzz.ratio(check(x.strip()), check(y.strip()))
        paccs.append(ac)
        psims.append(sim)
        
        try:
            a = ast.parse("f("+x)
            a = a.body[0].value
            assert isinstance(a, ast.Call)
        except:
            continue
            
        try:
            res_p, ff = match(s, a)
        except:
            print(ast.dump(a))
            print(x)
            raise ValueError
        match_imp += ff
        if ff == 1 and len(res)!=len(res_p):
            print(x)
            print(y)
            print(s["label"])
            print(res_p)
            print(res)
            print()

        cur = 1
        ss = 0
        for k in res:
            st = 0
            if k=="*":
                st = (res[k] == res_p.get('*', []))
            elif k=="**":
                st = (res[k] == res_p.get('**', set()))
            elif res[k]==res_p.get(k):
                st = 1
            ss += st
            cur = cur and st
        em+=cur
        ma+=ss
        mia += ss/len(res)
        if cur==0 and ff==1 and ss>0:
            study.append((res_p, res, s, z, x, y))
    print("partial acc, sim:"np.mean(paccs), np.mean(psims))
    print("syntax correct for gold:", 1-syn_incorrect/len(gold), gold_incorrect)
    print(tot, match_imp/tot, em/tot, mia/tot)
    print(tota, ma/tota)
    return study 
print(len(gold))
lis = details(pred_nls, gold, eval_examples)

In [None]:
i = 786
print(lis[i][0])
print(lis[i][1])
print(lis[i][4])
print(lis[i][5])
print(lis[i][2])
print(lis[i][3].source[0])
#print(lis[i][3].usages[1][0])
print(lis[i][3].usages[1][-1])