# OpenAI-GPT predicted solutions

In [46]:
import os
import pickle

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import HTML
from nltk.tokenize import word_tokenize
from typing import List, Tuple

from main import make_prompt_simple

GENERATED_ROOT_PATH = os.path.join("..", "input", "bugnet")
GENERATED_PAIRS_PATH = os.path.join(GENERATED_ROOT_PATH, "generated_pairs.csv")

MODEL = "gpt2"

CSV_PATH = os.path.join(GENERATED_ROOT_PATH, f"{MODEL}_description_results.csv")
PKL_PATH = os.path.join(GENERATED_ROOT_PATH, f"{MODEL}_description_results.pkl")

In [47]:
df = pd.read_csv(CSV_PATH, keep_default_na=False)

with open(PKL_PATH, "rb") as f:
    attentions = pickle.load(f)

df.head()

Unnamed: 0,problem_id,language,original_status,original_src,changed_src,change,i1,i2,j1,j2,error,stderr,predicted
0,p00001,C++,Runtime Error,#include <algorithm>\n#include <cstdio>\n\nusi...,#include <algorithm>\n#include <cstdio>\n\nusi...,replace,11,12,11,12,-11,,is the best? caused cause when the game gameb...
1,p00001,Python,Runtime Error,num = [int(input()) for i in range(10)]\nnum.s...,num = [int(input()) for i in range(10)]\nnum.s...,replace,2,3,2,3,TypeError: 'type' object is not subscriptable,"Traceback (most recent call last):\n File ""/h...",is the best? caused cause when the game gameb...


In [48]:
df["correct_exact"] = df["error"].isin(df["predicted"])

correct = df["correct_exact"].sum()
print(f"The exact accuracy of the openai model is {correct / len(df)}")

The exact accuracy of the openai model is 0.0


In [49]:
def check_if_correct(row: pd.Series) -> bool:
    error = row["error"]
    predicted = row["predicted"]
    
    words = word_tokenize(error)
    return any(w in predicted for w in words)

df["correct_partial"] = df.apply(check_if_correct, axis="columns")

correct = df["correct_partial"].sum()
print(f"The partial accuracy of the openai model is {correct / len(df)}")

The partial accuracy of the openai model is 0.5


In [20]:
def get_bug_type(row: pd.Series) -> str:
    line = "\n".join(row["changed_src"].splitlines()[row["j1"] : row["j2"]])
    language = row["language"]

    if language == "Python":
        return (
            "input"
            if "input" in line
            else "output"
            if "print" in line
            else "algorithm"
        )

    if language == "C++":
        return (
            "input"
            if ("cin" in line or "scanf" in line)
            else "output"
            if ("cout" in line or "printf" in line)
            else "algorithm"
        )

    raise NotImplementedError(f"{language} not implemented yet")

df["type"] = df.apply(get_bug_type, axis="columns")

In [21]:
lang_df = df.groupby(["language", "type"])["correct_exact"].agg(
    ["sum", "count"]
)
lang_df["accuracy"] = lang_df["sum"] / lang_df["count"]

print("Exact match accuracy")
lang_df

Exact match accuracy


Unnamed: 0_level_0,Unnamed: 1_level_0,sum,count,accuracy
language,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
C++,algorithm,0,1,0.0
Python,algorithm,0,1,0.0


In [22]:
lang_df = df.groupby(["language", "type"])["correct_partial"].agg(
    ["sum", "count"]
)
lang_df["accuracy"] = lang_df["sum"] / lang_df["count"]

print("Partial accuracy")
lang_df

Partial accuracy


Unnamed: 0_level_0,Unnamed: 1_level_0,sum,count,accuracy
language,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
C++,algorithm,0,1,0.0
Python,algorithm,1,1,1.0


In [23]:
def is_in_interval(i: int, interval: Tuple[int, int]) -> bool:
    i1, i2 = interval
    return i1 <= i and i <= i2

def is_in_intervals(i: int, intervals: List[Tuple[int, int]]) -> bool:
    return any(is_in_interval(i, interval) for interval in intervals)

def color_source_lines(source_code: str, intervals: List[Tuple[int, int]], color: str='red'):
    lines = source_code.splitlines(keepends=True)
    
    text = ""
    for i, line_str in enumerate(lines):
        for char in line_str:
            norm_color = 'black'
            if char == ' ':
                char = "•"
                norm_color = 'lightgrey'
            if char == '\n':
                char = "↵\n"
                norm_color = 'lightgrey'
            text += f'<span style="color:{color if is_in_intervals(i, intervals) else norm_color};">{char}</span>'

    return "<pre>" + text + "</pre>"

def color_source_chars(source_code: str, intervals: List[Tuple[int, int]], color: str='red'):
    text = ""
    for i, char in enumerate(source_code):
        norm_color = 'black'
        if char == ' ':
            char = "•"
            norm_color = 'lightgrey'
        if char == '\n':
            char = "↵\n"
            norm_color = 'lightgrey'
        text += f'<span style="color:{color if is_in_intervals(i, intervals) else norm_color};">{char}</span>'

    return "<pre>" + text + "</pre>"

In [24]:
def find_intervals(words: List[str], text: str) -> List[Tuple[int, int]]:
    intervals = []
    for word in words:
        start_index = 0

        while True:
            index = text.find(word, start_index)
            if index == -1:
                break
                
            start_index = index + len(word)            
            intervals.append((index, start_index - 1))
    
    return intervals

In [25]:
df["original_src_html"] = df.apply(
    lambda row: color_source_lines(row["original_src"], [(row["i1"], row["i2"])], color="red"), 
    axis="columns",
)

df["src_html"] = df.apply(
    lambda row: color_source_chars(row["predicted"], find_intervals(word_tokenize(row["error"]), row["predicted"]), color="red"), 
    axis="columns",
)

In [26]:
correct_df = df[df["correct_partial"]]
for i in range(min(len(correct_df), 10)):
    display(HTML(f"<h2>Example {i}</h2>"))
    
    display(HTML(f"<h6>Original Source Code</h6>"))
    display(HTML(correct_df["original_src_html"].iloc[i]))

    display(HTML(f"<h6>True Error Message</h6>"))
    display(HTML(correct_df["error"].iloc[i]))

    display(HTML(f"<h6>OpenAI Prediction</h6>"))
    display(HTML(correct_df["src_html"].iloc[i]))

In [57]:
from bertviz import head_view
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(MODEL)

def show(pairs_df: pd.DataFrame, attentions: List[List], index: int):
    text = pairs_df.iloc[index]["original_src"]
    input_text = make_prompt_simple(text)
    inputs = tokenizer.encode(input_text, return_tensors='pt')  # Tokenize input text
    tokens = tokenizer.convert_ids_to_tokens(inputs[0])  # Convert input ids to token strings
    attention = attentions[index]
    
    return head_view(attention[:1], tokens)  # Display model view

# show(df, attentions, 0) # Laggy AF