In [2]:
import json
import os
import regex as re
import tqdm
import random
from pathlib import Path
from scipy.stats import pearsonr

from git_utils import GitRepo
from generate_snippets import generate_snippets
from codegleu.dataflow_match import try_remove_comments_and_docstrings

import codebleu
import codegleu.codegleu

results_loc = "./data/results.json"
scored_loc = "./data/scored_instances.jsonl"
with open(scored_loc, "r") as fp:
    scored = [json.loads(line) for line in fp.readlines()]
with open(results_loc, "r") as fp:
    results = json.load(fp)
resolved, notresolved = [], []
for instance in scored:
    if instance["instance_id"] in results["resolved"]:
        resolved.append(instance)
    elif instance["instance_id"] in results["applied"]:
        notresolved.append(instance)

total = len(resolved) + len(notresolved)
targetsize = min(250, total)
resolved = random.sample(resolved, int(targetsize * len(resolved) / total + 0.5))
notresolved = random.sample(notresolved, int(targetsize * len(notresolved) / total + 0.5))

res_bleu = sum([i["bleu"] for i in resolved]) / len(resolved)
res_codebleu = sum([i["codebleu"]["codebleu"] for i in resolved]) / len(resolved)
res_codegleu = sum([i["codegleu"]["codegleu"] for i in resolved]) / len(resolved)

nres_bleu = sum([i["bleu"] for i in notresolved]) / len(notresolved)
nres_codebleu = sum([i["codebleu"]["codebleu"] for i in notresolved]) / len(notresolved)
nres_codegleu = sum([i["codegleu"]["codegleu"] for i in notresolved]) / len(notresolved)

resornot = [1] * len(resolved) + [0] * len(notresolved)
bleu_pearson =     pearsonr(resornot, [i["bleu"] for i in resolved] + [i["bleu"] for i in notresolved])
codebleu_pearson = pearsonr(resornot, [i["codebleu"]["codebleu"] for i in resolved] + [i["codebleu"]["codebleu"] for i in notresolved])
codegleu_pearson = pearsonr(resornot, [i["codegleu"]["codegleu"] for i in resolved] + [i["codegleu"]["codegleu"] for i in notresolved])

print(f"Resolved Instance Averages:     BLEU: {res_bleu} CodeBLEU: {res_codebleu} CodeGLEU: {res_codegleu}")
print(f"Non-Resolved Instance Averages: BLEU: {nres_bleu} CodeBLEU: {nres_codebleu} CodeGLEU: {nres_codegleu}")
print(f"Pearson Correlation:            BLEU: {bleu_pearson[0]} CodeBLEU: {codebleu_pearson[0]} CodeGLEU: {codegleu_pearson[0]}")
print(f"Pearson Correlation P:          BLEU: {bleu_pearson[1]} CodeBLEU: {codebleu_pearson[1]} CodeGLEU: {codegleu_pearson[1]}")

with open("./data/250_scored_instances.jsonl", "+a") as fp:
    for i in resolved + notresolved:
        fp.write(json.dumps(i) + "\n")

Resolved Instance Averages:     BLEU: 0.9867756678012728 CodeBLEU: 0.9693382349199752 CodeGLEU: 0.9523196822765444
Non-Resolved Instance Averages: BLEU: 0.9561008753943101 CodeBLEU: 0.9089246345352121 CodeGLEU: 0.8705410641063742
Pearson Correlation:            BLEU: 0.20583082475340775 CodeBLEU: 0.296436467836879 CodeGLEU: 0.27920835494206975
Pearson Correlation P:          BLEU: 0.001063068925772764 CodeBLEU: 1.8307927783384553e-06 CodeGLEU: 7.3945986192177185e-06
