# OpenAI-GPT predicted solutions

In [1]:
import os

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import HTML
from nltk.tokenize import word_tokenize

GENERATED_ROOT_PATH = os.path.join("..", "input", "bugnet")
GENERATED_PAIRS_PATH = os.path.join(GENERATED_ROOT_PATH, "generated_pairs.csv")
OPENAI_GPT_PATH = os.path.join(GENERATED_ROOT_PATH, "openai-gpt_description_results.csv")

In [2]:
df = pd.read_csv(OPENAI_GPT_PATH, keep_default_na=False)

df.head()

Unnamed: 0,problem_id,language,original_status,original_src,changed_src,change,i1,i2,j1,j2,error,stderr,predicted
0,p00001,C++,Runtime Error,#include <algorithm>\n#include <cstdio>\n\nusi...,#include <algorithm>\n#include <cstdio>\n\nusi...,replace,11,12,11,12,-11,,What is the bug that can happen in the given c...
1,p00001,C++,Time Limit Exceeded,#include <algorithm>\n#include <bitset>\n#incl...,#include <algorithm>\n#include <bitset>\n#incl...,replace,35,38,35,36,TLE,,What is the bug that can happen in the given c...
2,p00001,C++,Time Limit Exceeded,#include <algorithm>\n#include <iostream>\n#in...,#include <algorithm>\n#include <iostream>\n#in...,replace,12,13,12,13,TLE,,What is the bug that can happen in the given c...
3,p00001,C++,Runtime Error,#include <algorithm>\n#include <iostream>\nusi...,#include <algorithm>\n#include <iostream>\nusi...,replace,9,10,9,10,-11,,What is the bug that can happen in the given c...
4,p00001,C++,Time Limit Exceeded,#include <algorithm>\n#include <iostream>\n\nu...,#include <algorithm>\n#include <iostream>\n\nu...,replace,9,14,9,10,TLE,,What is the bug that can happen in the given c...


In [3]:
df["correct_exact"] = df["error"].isin(df["predicted"])

correct = df["correct_exact"].sum()
print(f"The exact accuracy of the openai model is {correct / len(df)}")

The exact accuracy of the openai model is 0.0


In [4]:
def check_if_correct(row: pd.Series) -> bool:
    error = row["error"]
    predicted = row["predicted"]
    
    words = word_tokenize(error)
    return any(w in predicted for w in words)

df["correct_partial"] = df.apply(check_if_correct, axis="columns")

correct = df["correct_partial"].sum()
print(f"The partial accuracy of the openai model is {correct / len(df)}")

The partial accuracy of the openai model is 0.5


In [5]:
def get_bug_type(row: pd.Series) -> str:
    line = "\n".join(row["changed_src"].splitlines()[row["j1"] : row["j2"]])
    language = row["language"]

    if language == "Python":
        return (
            "input"
            if "input" in line
            else "output"
            if "print" in line
            else "algorithm"
        )

    if language == "C++":
        return (
            "input"
            if ("cin" in line or "scanf" in line)
            else "output"
            if ("cout" in line or "printf" in line)
            else "algorithm"
        )

    raise NotImplementedError(f"{language} not implemented yet")

df["type"] = df.apply(get_bug_type, axis="columns")

In [6]:
lang_df = df.groupby(["language", "type"])["correct_exact"].agg(
    ["sum", "count"]
)
lang_df["accuracy"] = lang_df["sum"] / lang_df["count"]

print("Exact match accuracy")
lang_df

Exact match accuracy


Unnamed: 0_level_0,Unnamed: 1_level_0,sum,count,accuracy
language,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
C++,algorithm,0,3,0.0
C++,input,0,1,0.0
C++,output,0,1,0.0
Python,algorithm,0,2,0.0
Python,input,0,3,0.0


In [7]:
lang_df = df.groupby(["language", "type"])["correct_partial"].agg(
    ["sum", "count"]
)
lang_df["accuracy"] = lang_df["sum"] / lang_df["count"]

print("Partial accuracy")
lang_df

Partial accuracy


Unnamed: 0_level_0,Unnamed: 1_level_0,sum,count,accuracy
language,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
C++,algorithm,0,3,0.0
C++,input,0,1,0.0
C++,output,0,1,0.0
Python,algorithm,2,2,1.0
Python,input,3,3,1.0


In [8]:
def color_source(source_code: str, i1: int, i2: int, color: str='red'):
    lines = source_code.splitlines(keepends=True)
    
    text = ""
    for i, line_str in enumerate(lines):
        for char in line_str:
            norm_color = 'black'
            if char == ' ':
                char = "•"
                norm_color = 'lightgrey'
            if char == '\n':
                char = "↵\n"
                norm_color = 'lightgrey'
            text += f'<span style="color:{color if i1 <= i and i <= i2 else norm_color};">{char}</span>'

    return "<pre>" + text + "</pre>"

In [9]:
df["original_src_html"] = df.apply(
    lambda row: color_source(row["original_src"], row["i1"], row["i2"], color="red"), 
    axis="columns",
)

df["changed_src_html"] = df.apply(
    lambda row: color_source(row["changed_src"], row["j1"], row["j2"], color="green"), 
    axis="columns",
)

df["src_html"] = df.apply(
    lambda row: color_source(row["predicted"], -1, -1, color="green"), 
    axis="columns",
)

In [12]:
correct_df = df[df["correct_partial"]]
for i in range(min(len(correct_df), 10)):
    display(HTML(f"<h2>Example {i}</h2>"))
    
    display(HTML(f"<h6>Original Source Code</h6>"))
    display(HTML(correct_df["original_src_html"].iloc[i]))

    display(HTML(f"<h6>True Error Message</h6>"))
    display(HTML(correct_df["error"].iloc[i]))

    display(HTML(f"<h6>OpenAI Prediction</h6>"))
    display(HTML(correct_df["src_html"].iloc[i]))