In [1]:
import pandas as pd
import requests
import json
import numpy as np
import copy

r = requests.get("http://gersteincodegenprod.s3.amazonaws.com/results-default.json")

data = json.loads(r.text)

In [2]:
backup = copy.deepcopy(data)

In [3]:
data = copy.deepcopy(backup)
data = [x for x in data if "starcoder-" in x["test_case_id"]]
# data = [x for x in data if "Python" in x["test_case_id"]]
# data = [x for x in data if "-NoCommentPrompts-" in x["test_case_id"]]
print("Number of test cases:", len(data))

Number of test cases: 19696


In [4]:
data[0]

{'test_case_id': 'starcoder-v1-Java-Prompts-36e5e6a5cf9d5649542d15cb6caf3ba675c43fc7cc5d286fdf552f0798127c77-19',
 'res': False,
 'finished': True,
 'body': {'test_case_repo': 'lilbillybiscuit/323tester',
  'file': '36e5e6a5cf9d5649542d15cb6caf3ba675c43fc7cc5d286fdf552f0798127c77.java',
  'filePath': 'none',
  'lineStart': 0,
  'lineEnd': 0,
  'test_case_id': 'starcoder-v1-Java-Prompts-36e5e6a5cf9d5649542d15cb6caf3ba675c43fc7cc5d286fdf552f0798127c77-19',
  'methodBody': 'none',
  'num_tests': 50,
  'random_stringadasd': 'm9st4Pb2t5FhqeY1E+/FfHB/vsAIYWKaOZpUwnlgbbw='}}

In [5]:
prompt_types = ["-Prompts-", "-NoCommentPrompts-", "-AnnotationPrompts-", "-ReorderedPrompts-", "-SmallPrompts"]
models = ["gpt", "codegen-6B-mono", "codegen2-7B", "starcoder-", "starcoderplus", "instructcodet5p", "santacoder", "incoder-6B"]
languages = ["Java", "Python"]

In [6]:
def test_model(model_name, prompt_type, language):
    data = copy.deepcopy(backup)
    data = [x for x in data if model_name in x["test_case_id"]]
    data = [x for x in data if prompt_type in x["test_case_id"]]
    data = [x for x in data if language in x["test_case_id"]]
    
    print(model_name, prompt_type, language, "Number of test cases:", len(data))

for model in models:
    for prompt_type in prompt_types:
        for language in languages:
            test_model(model, prompt_type, language)

gpt -Prompts- Java Number of test cases: 998
gpt -Prompts- Python Number of test cases: 3140
gpt -NoCommentPrompts- Java Number of test cases: 1000
gpt -NoCommentPrompts- Python Number of test cases: 3140
gpt -AnnotationPrompts- Java Number of test cases: 1000
gpt -AnnotationPrompts- Python Number of test cases: 3140
gpt -ReorderedPrompts- Java Number of test cases: 0
gpt -ReorderedPrompts- Python Number of test cases: 3140
gpt -SmallPrompts Java Number of test cases: 996
gpt -SmallPrompts Python Number of test cases: 3140
codegen-6B-mono -Prompts- Java Number of test cases: 1000
codegen-6B-mono -Prompts- Python Number of test cases: 3140
codegen-6B-mono -NoCommentPrompts- Java Number of test cases: 1000
codegen-6B-mono -NoCommentPrompts- Python Number of test cases: 3140
codegen-6B-mono -AnnotationPrompts- Java Number of test cases: 1000
codegen-6B-mono -AnnotationPrompts- Python Number of test cases: 3140
codegen-6B-mono -ReorderedPrompts- Java Number of test cases: 0
codegen-6B-mono

In [7]:
# get all objects that have "incoder" in the "test_case_id"
def get_passk(model_name, prompt_type, language):
    data = copy.deepcopy(backup)
    data = [x for x in data if model_name in x["test_case_id"]]
    data = [x for x in data if prompt_type in x["test_case_id"]]
    data = [x for x in data if language in x["test_case_id"]]

    # group
    arr = {}

    for x in data:
        id = x["test_case_id"].split("-")[-2]
        if id not in arr:
            arr[id] = []
        arr[id].append(x)

    results = {}
    for key in arr:
        # count the number of correct, and total elements
        correct = 0
        total = 0
        for x in arr[key]:
            if x["res"] == True:
                correct += 1
            total += 1
        results[key] = {"correct": correct, "total": total}

    # print("Number of functions: ", len(results))

    ks=[1,5,10,20]

    def pass_at_k(n, c, k):
        if n - c < k: return 1.0
        return 1.0 - np.prod(1.0 - k / np.arange(n - c + 1, n + 1))

    k_results = []
    for k in ks:
        # print("k", k)
        temp = []
        for key in results:
            correct = results[key]["correct"]
            total = results[key]["total"]
            k = min(k, total)
            temp.append(pass_at_k(total, correct, k))
        k_results.append(np.mean(temp))
    return k_results



In [8]:
def generate_latex_table(models, prompt_types, languages):
    latex_table = ""
    prompt_conversion = {
        "-Prompts-": "Summary at top",
        "-NoCommentPrompts-": "Uncommented",
        "-AnnotationPrompts-": "Summary Only",
        "-ReorderedPrompts-": "Summary at bottom",
        "-SmallPrompts": "Necessary Only",
    }

    model_conversion = {
        "gpt": "GPT-3",
        "codegen-6B-mono": "Codegen",
        "codegen2-7B": "Codegen2",
        "starcoder-": "StarCoder",
        "starcoderplus": "StarCoder+",
        "instructcodet5p": "InstructCodeT5+",
        "santacoder": "SantaCoder",
        "incoder-6B": "InCoder",
    }

    for model in models:
        first_row = True
        for prompt_type in prompt_types:
            row = ""
            if first_row:
                row += f"\\multirow{{{len(prompt_types)}}}{{*}}{{{model_conversion[model]}}}"
                first_row = False
            row += f" & \\textit{{{prompt_conversion[prompt_type]}}}"
            for language in languages:
                passk_values = get_passk(model, prompt_type, language)
                for value in passk_values:
                    row += f" & {value:.3%}"
            row += " \\\\"
            new_addition = row + "\n"
            latex_table += new_addition
            print(new_addition, end="")
        latex_table += "\\midrule\n"
        print("\\midrule\n", end="")

    latex_table = latex_table.replace("nan", "\\textbackslash")
    latex_table = latex_table.replace("%", "\\%")
    return latex_table
models = ["incoder-6B", "santacoder", "starcoder-", "starcoderplus", "instructcodet5p", "codegen-6B-mono", "codegen2-7B", "gpt"]
# models = ["gpt", "codegen-6B-mono", "codegen2-7B", "starcoder-", "starcoderplus", "instructcodet5p", "santacoder", "incoder-6B"]
prompt_types = ["-Prompts-", "-NoCommentPrompts-", "-AnnotationPrompts-", "-ReorderedPrompts-", "-SmallPrompts"]
languages = ["Java", "Python"]

latex_table = generate_latex_table(models, prompt_types, languages)
print("="*80)
print(latex_table)
with open("latex_table.txt", "w") as f:
    f.write(latex_table)

\multirow{5}{*}{InCoder} & \textit{Summary at top} & 0.000% & 0.000% & 0.000% & 0.000% & 0.828% & 2.016% & 3.006% & 4.459% \\
 & \textit{Uncommented} & 0.000% & 0.000% & 0.000% & 0.000% & 0.032% & 0.159% & 0.318% & 0.637% \\
 & \textit{Summary Only} & 0.000% & 0.000% & 0.000% & 0.000% & 1.688% & 5.320% & 8.332% & 12.006% \\


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


 & \textit{Summary at bottom} & nan% & nan% & nan% & nan% & 0.610% & 2.587% & 4.303% & 6.274% \\
 & \textit{Necessary Only} & 0.000% & 0.000% & 0.000% & 0.000% & 0.032% & 0.159% & 0.318% & 0.637% \\
\midrule
\multirow{5}{*}{SantaCoder} & \textit{Summary at top} & 0.000% & 0.000% & 0.000% & 0.000% & 0.637% & 1.338% & 1.844% & 2.548% \\
 & \textit{Uncommented} & 0.000% & 0.000% & 0.000% & 0.000% & 0.287% & 0.764% & 0.955% & 1.274% \\
 & \textit{Summary Only} & 0.000% & 0.000% & 0.000% & 0.000% & 2.965% & 9.848% & 14.227% & 18.181% \\
 & \textit{Summary at bottom} & nan% & nan% & nan% & nan% & 0.510% & 1.949% & 3.013% & 4.459% \\
 & \textit{Necessary Only} & 0.000% & 0.000% & 0.000% & 0.000% & 0.032% & 0.159% & 0.318% & 0.637% \\
\midrule
\multirow{5}{*}{StarCoder} & \textit{Summary at top} & 0.000% & 0.000% & 0.000% & 0.000% & 3.694% & 13.197% & 19.359% & 24.554% \\
 & \textit{Uncommented} & 0.000% & 0.000% & 0.000% & 0.000% & 0.318% & 1.062% & 1.591% & 2.548% \\
 & \textit{Summary Only}

In [9]:
models = ["incoder-6B", "santacoder", "starcoder-", "starcoderplus", "instructcodet5p-16b", "codegen-6B-mono", "codegen2-7B", "gpt"]
prompt_types = ["-Prompts-", "-NoCommentPrompts-", "-AnnotationPrompts-", "-ReorderedPrompts-", "-SmallPrompts-"]
languages = ["Java", "Python"]
results = {}
for model in models:
    result_prompt = {}
    for prompt_type in prompt_types:
        result_language = {}
        for language in languages:
            result_language[language] = get_passk(model, prompt_type, language)
        result_prompt[prompt_type] = result_language
    results[model] = result_prompt


In [31]:
# print(results)
with open("results.json", "w") as f:
    json.dump(results, f)


In [58]:
# get all objects that have "incoder" in the "test_case_id"
def get_passk2(model_name, prompt_type, language):
    data = copy.deepcopy(backup)
    data = [x for x in data if model_name in x["test_case_id"]]
    data = [x for x in data if prompt_type in x["test_case_id"]]
    data = [x for x in data if language in x["test_case_id"]]

    # group
    arr = {}
    for x in data:
        # get entire string before the last -
        id = x["test_case_id"].rsplit("-", 1)[0]
        if id not in arr:
            arr[id] = []
        arr[id].append(x)

    results = {}
    for key in arr:
        # count the number of correct, and total elements
        correct = 0
        total = 0
        for x in arr[key]:
            if x["res"] == True:
                correct += 1
            total += 1
        results[key] = {"correct": correct, "total": total}


    # print("Number of functions: ", len(results))

    ks=[1,5,10,20]

    def pass_at_k(n, c, k):
        if n - c < k: return 1.0
        return 1.0 - np.prod(1.0 - k / np.arange(n - c + 1, n + 1))

    k_results = {}
    for key in results:
        temp_results = {}
        for k in ks:
            correct = results[key]["correct"]
            total = results[key]["total"]
            k = min(k, total)
            temp_results[str(k)] = pass_at_k(total, correct, k)
        # print(key, temp_results)
        k_results[key] = temp_results
    return k_results


In [None]:
get_passk2("incoder-6B", "-Prompts-", "Java")

In [72]:
result_individual = {}
for model in models:
    result_prompt = {}
    for prompt_type in prompt_types:
        result_language = {}
        for language in languages:
            print(model, prompt_type, language)
            result_language[language] = get_passk2(model, prompt_type, language)
        result_prompt[prompt_type] = result_language
    result_individual[model] = result_prompt

incoder-6B -Prompts- Java
incoder-6B -Prompts- Python
incoder-6B -NoCommentPrompts- Java
incoder-6B -NoCommentPrompts- Python
incoder-6B -AnnotationPrompts- Java
incoder-6B -AnnotationPrompts- Python
incoder-6B -ReorderedPrompts- Java
incoder-6B -ReorderedPrompts- Python
incoder-6B -SmallPrompts- Java
incoder-6B -SmallPrompts- Python
santacoder -Prompts- Java
santacoder -Prompts- Python
santacoder -NoCommentPrompts- Java
santacoder -NoCommentPrompts- Python
santacoder -AnnotationPrompts- Java
santacoder -AnnotationPrompts- Python
santacoder -ReorderedPrompts- Java
santacoder -ReorderedPrompts- Python
santacoder -SmallPrompts- Java
santacoder -SmallPrompts- Python
starcoder- -Prompts- Java
starcoder- -Prompts- Python
starcoder- -NoCommentPrompts- Java
starcoder- -NoCommentPrompts- Python
starcoder- -AnnotationPrompts- Java
starcoder- -AnnotationPrompts- Python
starcoder- -ReorderedPrompts- Java
starcoder- -ReorderedPrompts- Python
starcoder- -SmallPrompts- Java
starcoder- -SmallPrompts-

In [74]:
# convert "starcoder-" to "starcoder"
result_individual["starcoder"] = result_individual["starcoder-"]
del result_individual["starcoder-"]

In [75]:
with open("results_individual.json", "w") as f:
    json.dump(result_individual, f)

In [78]:
result_individual["incoder-6B"]["-SmallPrompts-"]["Python"]["incoder-6B-v1-Python-SmallPrompts-63e7ba3e0826dbfb3673b619c5caa94f1aba97ba1309e73812909e689653f907"]

{'1': 0.0, '5': 0.0, '10': 0.0, '20': 0.0}

In [79]:
with open("results_individual.json", "r") as f:
    result_individual2 = json.load(f)

result_individual2["incoder-6B"]["-SmallPrompts-"]["Python"]["incoder-6B-v1-Python-SmallPrompts-63e7ba3e0826dbfb3673b619c5caa94f1aba97ba1309e73812909e689653f907"]

{'1': 0.0, '5': 0.0, '10': 0.0, '20': 0.0}

In [70]:
from transformers import AutoTokenizer
import tiktoken
models = ["incoder-6B", "santacoder", "starcoder-", "starcoderplus", "instructcodet5p-16b", "codegen-6B-mono", "codegen2-7B", "gpt"]
prompt_types = ["-Prompts-", "-NoCommentPrompts-", "-AnnotationPrompts-", "-ReorderedPrompts-", "-SmallPrompts-"]
languages = ["Java", "Python"]
author_model_pair = [
    "facebook/incoder-6B",
    "bigcode/santacoder",
    "bigcode/starcoder",
    "bigcode/starcoderplus",
    "bigcode/instructcodet5p-16b",
    "Salesforce/codegen-6B-mono",
    "Salesforce/codegen2-7B",
]

def get_tokenizer(model_pair):
    return AutoTokenizer.from_pretrained(model_pair)





ModuleNotFoundError: No module named 'transformers'