# Experiments

In [1]:
import logging
import os

import argparse
import pandas as pd
import resource
import subprocess
import time
import torch
import traceback
import uuid
from tqdm.auto import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import Optional

GENERATED_ROOT_PATH = os.path.join("..", "input", "bugnet")
GENERATED_PAIRS_PATH = os.path.join(GENERATED_ROOT_PATH, "generated_pairs.csv")
CODEGEN_PAIRS_PATH = os.path.join(GENERATED_ROOT_PATH, "codegen_results.csv")

In [2]:
def make_prompt_simple(source: str, language: str) -> str:
    if language == "C++":
        comment = "//"
    elif language == "Python":
        comment = "#"
    else:
        raise NotImplementedError(f"{language} not implemented yet")

    lines = source.splitlines()
    lines.append(f"{comment} Propose code to fix the bug")
    lines.append("")

    return "\n".join(lines)

def make_prompt_multishot(pairs_df: pd.DataFrame, source: str, count: int = 5) -> str:
    pairs_df = pairs_df.iloc[:count]

    result = ""
    for _, row in pairs_df.iterrows():
        result = result + row["original_src"] + "\n\n" + row["changed_src"] + "\n\n"

    result = result + source + "\n"

    return result

In [3]:
# Load the CodeGen model (small - finetuned on Python)
device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained("Salesforce/codegen-350M-mono")
model = AutoModelForCausalLM.from_pretrained("Salesforce/codegen-350M-mono").to(
    device
)

In [4]:
# Load the submission pairs
submission_pairs_df = pd.read_csv(GENERATED_PAIRS_PATH, keep_default_na=False)
pairs_df = submission_pairs_df.groupby("language").head(100)
pairs_df.head()

Unnamed: 0,problem_id,language,original_status,original_src,changed_src,change,i1,i2,j1,j2,error,stderr
0,p00001,C++,Runtime Error,#include <algorithm>\n#include <cstdio>\n\nusi...,#include <algorithm>\n#include <cstdio>\n\nusi...,replace,11,12,11,12,-11,
1,p00001,C++,Time Limit Exceeded,#include <algorithm>\n#include <bitset>\n#incl...,#include <algorithm>\n#include <bitset>\n#incl...,replace,35,38,35,36,TLE,
2,p00001,C++,Time Limit Exceeded,#include <algorithm>\n#include <iostream>\n#in...,#include <algorithm>\n#include <iostream>\n#in...,replace,12,13,12,13,TLE,
3,p00001,C++,Runtime Error,#include <algorithm>\n#include <iostream>\nusi...,#include <algorithm>\n#include <iostream>\nusi...,replace,9,10,9,10,-11,
4,p00001,C++,Time Limit Exceeded,#include <algorithm>\n#include <iostream>\n\nu...,#include <algorithm>\n#include <iostream>\n\nu...,replace,9,14,9,10,TLE,


## Simple Prompt

In [5]:
# Create a prompt using simple function
pair_id, row = next(pairs_df.iterrows())
prompt = make_prompt_simple(row["original_src"], row["language"])
prompt

'#include <algorithm>\n#include <cstdio>\n\nusing namespace std;\n\nint main(void) {\n  int i, array[10];\n\n  for (i = 0; i < 10; i++)\n    scanf("%d", &array[i]);\n  sort(array, array + 10);\n  for (i = 9; i >= 7; i++)\n    printf("%d\\n", array[i]);\n\n  return 0;\n}\n// Propose code to fix the bug\n'

In [6]:
# Tokenize the inputs and check if the tokens are correct
inputs = tokenizer(prompt, return_tensors="pt").to(device)
print(inputs["input_ids"].shape)

tokenizer.decode(inputs["input_ids"][0])

torch.Size([1, 118])


'#include <algorithm>\n#include <cstdio>\n\nusing namespace std;\n\nint main(void) {\n  int i, array[10];\n\n  for (i = 0; i < 10; i++)\n    scanf("%d", &array[i]);\n  sort(array, array + 10);\n  for (i = 9; i >= 7; i++)\n    printf("%d\\n", array[i]);\n\n  return 0;\n}\n// Propose code to fix the bug\n'

In [7]:
# Generate a prediction using CodeGen
sample = model.generate(**inputs, max_length=512, pad_token_id=tokenizer.eos_token_id)
result = tokenizer.decode(sample[0])
predicted = result.removeprefix(prompt)
predicted

'\nint main(void) {\n  int i, array[10];\n\n  for (i = 0; i < 10; i++)\n    scanf("%d", &array[i]);\n  sort(array, array + 10);\n  for (i = 9; i >= 7; i++)\n    printf("%d\\n", array[i]);\n\n  return 0;\n}\n\n"""\n\n#include <algorithm>\n#include <cstdio>\n\n#include <string>\n\n#include <algorithm>\n#include <cstdio>\n\n#include <string>\n\n#include <algorithm>\n#include <cstdio>\n\n#include <string>\n\n#include <algorithm>\n#include <cstdio>\n\n#include <string>\n\n#include <algorithm>\n#include <cstdio>\n\n#include <string>\n\n#include <algorithm>\n#include <cstdio>\n\n#include <string>\n\n#include <algorithm>\n#include <cstdio>\n\n#include <string>\n\n#include <algorithm>\n#include <cstdio>\n\n#include <string>\n\n#include <algorithm>\n#include <cstdio>\n\n#include <string>\n\n#include <algorithm>\n#include <cstdio>\n\n#include <string>\n\n#include <algorithm>\n#include <cstdio>\n\n#include <string>\n\n#include <algorithm>\n#include <cstdio>\n\n#include <string>\n\n#include <algori

## Multishot Prompt

In [8]:
# Create a prompt using simple function
pair_id, row = next(pairs_df.iterrows())
prompt = make_prompt_multishot(pairs_df, row["original_src"], count=3)
prompt[:100]

'#include <algorithm>\n#include <cstdio>\n\nusing namespace std;\n\nint main(void) {\n  int i, array[10];\n\n'

In [9]:
# Tokenize the inputs and check if the tokens are correct
inputs = tokenizer(prompt, return_tensors="pt").to(device)
print(inputs["input_ids"].shape)

tokenizer.decode(inputs["input_ids"][0]) == prompt

torch.Size([1, 1119])


True

In [10]:
# Generate a prediction using CodeGen
sample = model.generate(**inputs, max_length=2048, pad_token_id=tokenizer.eos_token_id)
result = tokenizer.decode(sample[0])
predicted = result.removeprefix(prompt)
predicted

'\n#include <algorithm>\n#include <cctype>\n#include <climits>\n#include <cmath>\n#include <complex>\n#include <cstdio>\n#include <ctime>\n#include <deque>\n#include <functional>\n#include <iomanip>\n#include <iostream>\n#include <list>\n#include <map>\n#include <numeric>\n#include <queue>\n#include <set>\n#include <sstream>\n#include <stack>\n#include <string>\n\ntypedef long long ll;\n\nusing namespace std;\n\nint main(void) {\n  int i, array[10];\n\n  for (i = 0; i < 10; i++) {\n    cin >> i;\n  }\n\n  sort(array, array + 10, greater<int>());\n\n  cout << i << endl << array[0] << endl << array[1] << endl << array[2] << endl << array[3] << endl << array[4] << endl << array[5] << endl << array[6] << endl << array[7] << endl << array[8] << endl << array[9] << endl << endl << array[10];\n  return 0;\n}\n\n#include <algorithm>\n#include <cctype>\n#include <climits>\n#include <cmath>\n#include <complex>\n#include <cstdio>\n#include <ctime>\n#include <deque>\n#include <functional>\n#includ