## Load Exception Handling data and form df

In [1]:
import os
import pandas as pd
import pickle
import json

df = pd.DataFrame(columns=['original_code', 'statement_vector', 'exception_types', 'num_try_blocks'])

# iterate through all data
for i in range(1,6):
    # convert pickle files to df
    with open(f"eh_data/eh-{i}/id_code_map-{i}", 'rb') as f:
        java_code = pickle.load(f)

    java_code = pd.DataFrame.from_dict(java_code, orient='index', columns=['original_code'])
    java_code.index.name = 'id'

    with open(f"eh_data/eh-{i}/id_meta_map-{i}", 'rb') as f:
        metadata = pickle.load(f)

    metadata = pd.DataFrame.from_dict(metadata, orient='index', columns=['statement_vector', 'exception_types'])
    metadata['exception_types'] = metadata['exception_types'].apply(lambda x: str(x).replace(' ', '').replace('\n','').replace('array(', '').replace(',dtype=object)', ''))
    metadata.index.name = 'id'
    # display(metadata.head(1))

    # merge dataset
    merged = pd.merge(java_code, metadata, on='id')
    merged['num_try_blocks'] = i

    # concat to df
    df = pd.concat([df, merged], axis=0)

df.index.name = 'id'
# df['exception_types'] = df['exception_types'].apply(clean_2d_array)
#df.to_csv('eh_combined.csv')

print(df.shape)
df.head()


(190, 4)


Unnamed: 0_level_0,original_code,statement_vector,exception_types,num_try_blocks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
d2c60ca4-ed25-45d0-89d5-4fbcba8ee6d5_CloudTasksClientHttpJsonTest-14,ApiException exception =\nApiExceptionFactory....,"[0, 0, 0, 0, 1, 2, 2]",[['InvalidArgumentException']],1
62e25ed1-2776-4606-b4bb-0cd605002639_DataFusionClientTest-11,StatusRuntimeException exception = new StatusR...,"[0, 0, 1, 2, 2, 2, 2, 2]",[['ExecutionException']],1
750e00dd-5763-4fc9-8182-79ed290e26b6_UserAgentStringMatchMaker-2,initializeMatchers();\nString useragentString ...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...",[['RuntimeException']],1
9ac700f2-eb72-49f7-ac6b-b8efa096d6c1_GlassfishInstance-3,int debugPort;\ndebugPort = Integer.parseInt(\...,"[0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0]",[['NumberFormatException']],1
2464d5a3-e3aa-4a92-8683-4f9893f8b206_WebViewMapFragment-1,JSONArray array = new JSONArray();\nfor (LatLn...,"[1, 2, 2, 2, 2, 2, 2, 2, 2, 2]",[['JSONException']],1


## Add line numbers to code

In [2]:
# add line numbers to code and store count
def number_code(row):
    numbered_code = '1. '
    by_line = row['original_code'].strip().split('\n')
    numbered_code += by_line[0] + '\n'
    for i in range(1, len(by_line)):
        numbered_code += f"{str(i+1)}. {by_line[i]}\n"

    row['numbered_code'] = numbered_code
    row['num_lines'] = len(by_line)
    
    return row

df = df.apply(number_code, axis=1)

print(df.iloc[0]['numbered_code'])
df.head()

1. ApiException exception =
2. ApiExceptionFactory.createException(
3. new Exception(), FakeStatusCode.of(StatusCode.Code.INVALID_ARGUMENT), false);
4. mockService.addException(exception);
5. QueueName name = QueueName.of("[PROJECT]", "[LOCATION]", "[QUEUE]");
6. client.resumeQueue(name);
7. Assert.fail("No exception raised");



Unnamed: 0_level_0,original_code,statement_vector,exception_types,num_try_blocks,numbered_code,num_lines
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
d2c60ca4-ed25-45d0-89d5-4fbcba8ee6d5_CloudTasksClientHttpJsonTest-14,ApiException exception =\nApiExceptionFactory....,"[0, 0, 0, 0, 1, 2, 2]",[['InvalidArgumentException']],1,1. ApiException exception =\n2. ApiExceptionFa...,7
62e25ed1-2776-4606-b4bb-0cd605002639_DataFusionClientTest-11,StatusRuntimeException exception = new StatusR...,"[0, 0, 1, 2, 2, 2, 2, 2]",[['ExecutionException']],1,1. StatusRuntimeException exception = new Stat...,8
750e00dd-5763-4fc9-8182-79ed290e26b6_UserAgentStringMatchMaker-2,initializeMatchers();\nString useragentString ...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...",[['RuntimeException']],1,1. initializeMatchers();\n2. String useragentS...,31
9ac700f2-eb72-49f7-ac6b-b8efa096d6c1_GlassfishInstance-3,int debugPort;\ndebugPort = Integer.parseInt(\...,"[0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0]",[['NumberFormatException']],1,1. int debugPort;\n2. debugPort = Integer.pars...,12
2464d5a3-e3aa-4a92-8683-4f9893f8b206_WebViewMapFragment-1,JSONArray array = new JSONArray();\nfor (LatLn...,"[1, 2, 2, 2, 2, 2, 2, 2, 2, 2]",[['JSONException']],1,1. JSONArray array = new JSONArray();\n2. for ...,10


## Construct and store prompts


In [3]:
df['prompt'] = "INSTRUCTIONS\nYou are a code reviewer that reviews and patches Java code snippets that may throw one or more exceptions. Analyze the code snippet and fix it by wrapping vulnerable lines into one or more try blocks; do not make any other edits.\n\nYou will capture your results in various Python objects:\n- Store the resultant code snippet as a multi-line string in a Python string: fixed_snippet = '''<fixed code snipped>'''\n- Produce a Python list that acts as a vector where each vector element represents each line in the original code snippet. Each vector element can take the integer label 0, 1, or 2 where 0 is a line not inside a try block, 1 is the first line inside a try block, and 2 is a non-first line inside a try block: statement_vector = [<label>, <label>, ..., <label>]\n- Store the exception type caught by each catch block in a Python list of lists, where each list represents the catch block(s) for one try block as a single try block may have 1 or more catch blocks. Each list will contain the exception type(s) handled for one try block:\nexception_types = [[<exception type>, <exception type>, ..., <exception type>], \n\t...,\n\t[<exception type>, <exception type>, ..., <exception type>]]\n---\nJAVA CODE SNIPPET\n" + df['numbered_code']+ "\n---\nDo not use nested try blocks. The length of statement_vector must equal " + df['num_lines'].astype(str) + ", the number of lines in the original snippet (not the fixed snippet). Count the number of nested lists in exception_types. Count the number of try blocks you inserted. The number of nested lists must equal the number of try blocks inserted. Output your results including fixed_snippet, statement_vector, and exception_types in a single Python code block.\n---\nRESULTS\n"

print(df.iloc[0]['prompt'])
df.head()

INSTRUCTIONS
You are a code reviewer that reviews and patches Java code snippets that may throw one or more exceptions. Analyze the code snippet and fix it by wrapping vulnerable lines into one or more try blocks; do not make any other edits.

You will capture your results in various Python objects:
- Store the resultant code snippet as a multi-line string in a Python string: fixed_snippet = '''<fixed code snipped>'''
- Produce a Python list that acts as a vector where each vector element represents each line in the original code snippet. Each vector element can take the integer label 0, 1, or 2 where 0 is a line not inside a try block, 1 is the first line inside a try block, and 2 is a non-first line inside a try block: statement_vector = [<label>, <label>, ..., <label>]
- Store the exception type caught by each catch block in a Python list of lists, where each list represents the catch block(s) for one try block as a single try block may have 1 or more catch blocks. Each list will co

Unnamed: 0_level_0,original_code,statement_vector,exception_types,num_try_blocks,numbered_code,num_lines,prompt
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
d2c60ca4-ed25-45d0-89d5-4fbcba8ee6d5_CloudTasksClientHttpJsonTest-14,ApiException exception =\nApiExceptionFactory....,"[0, 0, 0, 0, 1, 2, 2]",[['InvalidArgumentException']],1,1. ApiException exception =\n2. ApiExceptionFa...,7,INSTRUCTIONS\nYou are a code reviewer that rev...
62e25ed1-2776-4606-b4bb-0cd605002639_DataFusionClientTest-11,StatusRuntimeException exception = new StatusR...,"[0, 0, 1, 2, 2, 2, 2, 2]",[['ExecutionException']],1,1. StatusRuntimeException exception = new Stat...,8,INSTRUCTIONS\nYou are a code reviewer that rev...
750e00dd-5763-4fc9-8182-79ed290e26b6_UserAgentStringMatchMaker-2,initializeMatchers();\nString useragentString ...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...",[['RuntimeException']],1,1. initializeMatchers();\n2. String useragentS...,31,INSTRUCTIONS\nYou are a code reviewer that rev...
9ac700f2-eb72-49f7-ac6b-b8efa096d6c1_GlassfishInstance-3,int debugPort;\ndebugPort = Integer.parseInt(\...,"[0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0]",[['NumberFormatException']],1,1. int debugPort;\n2. debugPort = Integer.pars...,12,INSTRUCTIONS\nYou are a code reviewer that rev...
2464d5a3-e3aa-4a92-8683-4f9893f8b206_WebViewMapFragment-1,JSONArray array = new JSONArray();\nfor (LatLn...,"[1, 2, 2, 2, 2, 2, 2, 2, 2, 2]",[['JSONException']],1,1. JSONArray array = new JSONArray();\n2. for ...,10,INSTRUCTIONS\nYou are a code reviewer that rev...


## Generate and store responses

In [None]:
# import openai
# from tqdm.auto import tqdm

# openai.api_key = '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'
# tqdm.pandas()

# def gpt_call(row):
#     print(f"Generating response for snippet {row.name}...")
#     try:
#         response = openai.ChatCompletion.create(
#             model='gpt-3.5-turbo-0613',
#             messages=[{
#                 'role': 'system', 'content': row['prompt']
#             }]
#         )
#         response_message = response['choices'][0]['message']['content']
#         print(response_message)
#         print('Done.')
#         return response_message
    
#     except openai.OpenAIError as e:
#         print("Unsuccessful, skipping.")
#         print(response)
#         return -1

# df['response'] = df.progress_apply(gpt_call, axis=1)

In [13]:
import openai
from tqdm.auto import tqdm
import re
import ast
import time

openai.api_key = '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'
tqdm.pandas()

def gpt_call(row):
    print(f"Generating response for snippet {row.name}...")

    # 100 max attempts
    for i in range(0,100):
        try:
            # make api call and get response
            response = openai.ChatCompletion.create(
                model='gpt-3.5-turbo-0613',
                messages=[{
                    'role': 'system', 'content': row['prompt']
                }]
            )
            response_message = response['choices'][0]['message']['content']

            # extract results, retry if unparsable
            fixed_snippet = re.findall(r"fixed_snippet = ('''[\s\S]*''')", response_message)
            if fixed_snippet:
                try:
                    row['fixed_snippet'] = fixed_snippet[0]
                    # print(fixed_snippet[0])
                except:
                    print("Unsuccessful, retrying now:")
                    print(response_message)
                    continue
            else:
                print("Unsuccessful, retrying now:")
                print(response_message)
                continue

            statement_vector = re.findall(r'statement_vector = (\[[0-9, ]*\])', response_message)
            if statement_vector:
                try:
                    row['statement_vector_predict'] = ast.literal_eval(statement_vector[0].strip())
                    # print(statement_vector[0])
                except:
                    print("Unsuccessful, retrying now:")
                    print(response_message)
                    continue
            else:
                print("Unsuccessful, retrying now:")
                print(response_message)
                continue

            exception_types = re.findall(r'exception_types = (\[\[.*\]\])', response_message)
            if exception_types:
                try:
                    row['exception_types_predict'] = ast.literal_eval(exception_types[0].strip())
                    # print(exception_types[0])
                except:
                    print("Unsuccessful, retrying now:")
                    print(response_message)
                    continue
            else:
                print("Unsuccessful, retrying now:")
                print(response_message)
                continue

            print('Done.')
            row['response'] = response_message
            row['status'] = 1
            return row
        
        except Exception as e:
            # try again until we get parsable results
            print("Unsuccessful, retrying now.")
            time.sleep(10)
            continue
    
    print('Max attempts reached, skipping.')
    row['response'] = -1
    row['fixed_snippet'] = -1
    row['statement_vector_predict'] = -1
    row['exception_types'] = -1
    row['status'] = 0
    return row

In [14]:
df = df.progress_apply(gpt_call, axis=1)
df.head()

  0%|          | 0/190 [00:00<?, ?it/s]

Generating response for snippet d2c60ca4-ed25-45d0-89d5-4fbcba8ee6d5_CloudTasksClientHttpJsonTest-14...
Done.
Generating response for snippet 62e25ed1-2776-4606-b4bb-0cd605002639_DataFusionClientTest-11...
Done.
Generating response for snippet 750e00dd-5763-4fc9-8182-79ed290e26b6_UserAgentStringMatchMaker-2...
Done.
Generating response for snippet 9ac700f2-eb72-49f7-ac6b-b8efa096d6c1_GlassfishInstance-3...
Done.
Generating response for snippet 2464d5a3-e3aa-4a92-8683-4f9893f8b206_WebViewMapFragment-1...
Done.
Generating response for snippet e2ff6074-bce4-4985-b24e-6ff37cfb63ba_ReferenceTypeWrapper-8...
Done.
Generating response for snippet 3be52b74-5e84-46d7-82a2-cb1678e39234_EnterpriseKnowledgeGraphServiceClientTest-13...
Done.
Generating response for snippet cb1ce5d0-6e5a-43f9-98c7-511bcbf7295a_PlayerManager-1...
Done.
Generating response for snippet 70c73fed-b595-40b8-a52e-bb1fdb45be64_IQPEPHandler-1...
Done.
Generating response for snippet 9843a01c-38f3-46f6-965e-b7ad4687be21_Artif

Unnamed: 0_level_0,original_code,statement_vector,exception_types,num_try_blocks,numbered_code,num_lines,prompt,fixed_snippet,statement_vector_predict,exception_types_predict,response,status
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
d2c60ca4-ed25-45d0-89d5-4fbcba8ee6d5_CloudTasksClientHttpJsonTest-14,ApiException exception =\nApiExceptionFactory....,"[0, 0, 0, 0, 1, 2, 2]",[['InvalidArgumentException']],1,1. ApiException exception =\n2. ApiExceptionFa...,7,INSTRUCTIONS\nYou are a code reviewer that rev...,'''\ntry {\n ApiException exception =\n ...,"[1, 1, 1, 1, 1, 1, 2]",[[ApiException]],fixed_snippet = '''\ntry {\n ApiException e...,1
62e25ed1-2776-4606-b4bb-0cd605002639_DataFusionClientTest-11,StatusRuntimeException exception = new StatusR...,"[0, 0, 1, 2, 2, 2, 2, 2]",[['ExecutionException']],1,1. StatusRuntimeException exception = new Stat...,8,INSTRUCTIONS\nYou are a code reviewer that rev...,'''\ntry {\n StatusRuntimeException excepti...,"[1, 1, 0, 0, 0, 0, 0, 0]",[[Exception]],fixed_snippet = '''\ntry {\n StatusRuntimeE...,1
750e00dd-5763-4fc9-8182-79ed290e26b6_UserAgentStringMatchMaker-2,initializeMatchers();\nString useragentString ...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...",[['RuntimeException']],1,1. initializeMatchers();\n2. String useragentS...,31,INSTRUCTIONS\nYou are a code reviewer that rev...,'''\n1. initializeMatchers();\n2. try {\n3. ...,"[0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, ...",[[Exception]],fixed_snippet = '''\n1. initializeMatchers();\...,1
9ac700f2-eb72-49f7-ac6b-b8efa096d6c1_GlassfishInstance-3,int debugPort;\ndebugPort = Integer.parseInt(\...,"[0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0]",[['NumberFormatException']],1,1. int debugPort;\n2. debugPort = Integer.pars...,12,INSTRUCTIONS\nYou are a code reviewer that rev...,'''int debugPort;\ntry {\n debugPort = Inte...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0]",[[NumberFormatException]],fixed_snippet = '''int debugPort;\ntry {\n ...,1
2464d5a3-e3aa-4a92-8683-4f9893f8b206_WebViewMapFragment-1,JSONArray array = new JSONArray();\nfor (LatLn...,"[1, 2, 2, 2, 2, 2, 2, 2, 2, 2]",[['JSONException']],1,1. JSONArray array = new JSONArray();\n2. for ...,10,INSTRUCTIONS\nYou are a code reviewer that rev...,'''JSONArray array = new JSONArray();\n ...,"[0, 1, 1, 1, 1, 1, 1, 2, 2, 2]",[[Exception]],fixed_snippet = '''JSONArray array = new JSONA...,1


In [16]:
df.to_csv('eh_combined.csv')

## Check responses

In [92]:
# load dataset
df = pd.read_csv('eh_combined.csv', index_col='id')
df.head()

Unnamed: 0_level_0,original_code,statement_vector,exception_types,num_try_blocks,numbered_code,num_lines,prompt,fixed_snippet,statement_vector_predict,exception_types_predict,response,status
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
d2c60ca4-ed25-45d0-89d5-4fbcba8ee6d5_CloudTasksClientHttpJsonTest-14,ApiException exception =\nApiExceptionFactory....,"[0, 0, 0, 0, 1, 2, 2]",[['InvalidArgumentException']],1,1. ApiException exception =\n2. ApiExceptionFa...,7,INSTRUCTIONS\nYou are a code reviewer that rev...,'''\ntry {\n ApiException exception =\n ...,"[1, 1, 1, 1, 1, 1, 2]",[['ApiException']],fixed_snippet = '''\ntry {\n ApiException e...,1
62e25ed1-2776-4606-b4bb-0cd605002639_DataFusionClientTest-11,StatusRuntimeException exception = new StatusR...,"[0, 0, 1, 2, 2, 2, 2, 2]",[['ExecutionException']],1,1. StatusRuntimeException exception = new Stat...,8,INSTRUCTIONS\nYou are a code reviewer that rev...,'''\ntry {\n StatusRuntimeException excepti...,"[1, 1, 0, 0, 0, 0, 0, 0]",[['Exception']],fixed_snippet = '''\ntry {\n StatusRuntimeE...,1
750e00dd-5763-4fc9-8182-79ed290e26b6_UserAgentStringMatchMaker-2,initializeMatchers();\nString useragentString ...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...",[['RuntimeException']],1,1. initializeMatchers();\n2. String useragentS...,31,INSTRUCTIONS\nYou are a code reviewer that rev...,'''\n1. initializeMatchers();\n2. try {\n3. ...,"[0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, ...",[['Exception']],fixed_snippet = '''\n1. initializeMatchers();\...,1
9ac700f2-eb72-49f7-ac6b-b8efa096d6c1_GlassfishInstance-3,int debugPort;\ndebugPort = Integer.parseInt(\...,"[0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0]",[['NumberFormatException']],1,1. int debugPort;\n2. debugPort = Integer.pars...,12,INSTRUCTIONS\nYou are a code reviewer that rev...,'''int debugPort;\ntry {\n debugPort = Inte...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0]",[['NumberFormatException']],fixed_snippet = '''int debugPort;\ntry {\n ...,1
2464d5a3-e3aa-4a92-8683-4f9893f8b206_WebViewMapFragment-1,JSONArray array = new JSONArray();\nfor (LatLn...,"[1, 2, 2, 2, 2, 2, 2, 2, 2, 2]",[['JSONException']],1,1. JSONArray array = new JSONArray();\n2. for ...,10,INSTRUCTIONS\nYou are a code reviewer that rev...,'''JSONArray array = new JSONArray();\n ...,"[0, 1, 1, 1, 1, 1, 1, 2, 2, 2]",[['Exception']],fixed_snippet = '''JSONArray array = new JSONA...,1


In [93]:
# summary of df
df.describe(include='all')

Unnamed: 0,original_code,statement_vector,exception_types,num_try_blocks,numbered_code,num_lines,prompt,fixed_snippet,statement_vector_predict,exception_types_predict,response,status
count,190,190,190,190.0,190,190.0,190,190,190,190,190,190.0
unique,188,178,118,,188,,188,189,187,95,190,
top,ObjectOutputStream out;\nByteArrayOutputStream...,"[0, 0, 0, 0, 1, 2, 2]",[['InvalidArgumentException']],,1. ObjectOutputStream out;\n2. ByteArrayOutput...,,INSTRUCTIONS\nYou are a code reviewer that rev...,'''double[] ar = new double[2];\ntry {\n Ar...,"[1, 0, 1, 0, 1, 0, 1, 0]",[['Exception']],fixed_snippet = '''\ntry {\n ApiException e...,
freq,2,4,7,,2,,2,2,2,53,1,
mean,,,,2.736842,,22.136842,,,,,,1.0
std,,,,1.294764,,14.061927,,,,,,0.0
min,,,,1.0,,2.0,,,,,,1.0
25%,,,,2.0,,10.0,,,,,,1.0
50%,,,,3.0,,18.0,,,,,,1.0
75%,,,,4.0,,32.0,,,,,,1.0


In [94]:
# check if any cell has -1 value (bad response)
df.isin([-1]).sum()

original_code               0
statement_vector            0
exception_types             0
num_try_blocks              0
numbered_code               0
num_lines                   0
prompt                      0
fixed_snippet               0
statement_vector_predict    0
exception_types_predict     0
response                    0
status                      0
dtype: int64

In [95]:
# check if any cell has None value (bad response)
df.isnull().sum()

original_code               0
statement_vector            0
exception_types             0
num_try_blocks              0
numbered_code               0
num_lines                   0
prompt                      0
fixed_snippet               0
statement_vector_predict    0
exception_types_predict     0
response                    0
status                      0
dtype: int64

In [None]:
# import re
# import ast

# # extract results from response text
# def extract_results(row):
#     print(row.name + ':')
#     fixed_snippet = re.findall(r"fixed_snippet = ('''[\s\S]*''')", row['response'])
#     if fixed_snippet:
#         try:
#             row['fixed_snippet'] = fixed_snippet[0]
#             print(fixed_snippet[0])
#         except:
#             row['fixed_snippet'] = -1
#     else:
#         row['fixed_snippet'] = None

#     statement_vector = re.findall(r'statement_vector = (\[[0-9, ]*\])', row['response'])
#     if statement_vector:
#         try:
#             row['statement_vector_predict'] = ast.literal_eval(statement_vector[0].strip())
#             print(statement_vector[0])
#         except:
#             row['statement_vector_predict'] = -1
#     else:
#         row['statement_vector_predict'] = None

#     exception_types = re.findall(r'exception_types = (\[\[.*\]\])', row['response'])
#     if exception_types:
#         try:
#             row['exception_types_predict'] = ast.literal_eval(exception_types[0].strip())
#             print(exception_types[0])
#         except:
#             row['exception_types_predict'] = -1
#     else:
#         row['exception_types_predict'] = None
        
#     return row

# df = df.apply(extract_results, axis=1)

# df.head()

In [96]:
# check for division by zero
df[(df['exception_types_predict'].isnull()) | (df['exception_types_predict'] == -1)]

Unnamed: 0_level_0,original_code,statement_vector,exception_types,num_try_blocks,numbered_code,num_lines,prompt,fixed_snippet,statement_vector_predict,exception_types_predict,response,status
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1


## Evaluate predictions

In [97]:
def evaluate(row):
    # exact match
    row['exact_statement_vector'] = ast.literal_eval(str(row['statement_vector'])) == ast.literal_eval(str(row['statement_vector_predict']))

    # normalize statement vectors for partial match -> xstate vector
    actual_xstate = [2 if label == 1 else label for label in ast.literal_eval(str(row['statement_vector']))]
    predicted_xstate = [2 if label == 1 else label for label in ast.literal_eval(str(row['statement_vector_predict']))]
    xstate_vector = [x == y for x, y in zip(actual_xstate, predicted_xstate)]
    row['xstate_vector'] = xstate_vector

    # xstate precision = # correct / # predicted; recall = # correct / # actual
    try:
        row['xstate_precision'] = xstate_vector.count(True) / len(predicted_xstate)
        row['xstate_recall'] = xstate_vector.count(True) / len(actual_xstate)
    except ZeroDivisionError as e:
        row['xstate_precision'] = -1
        row['xstate_recall'] = -1

    row['xstate_recall_frac'] = str(xstate_vector.count(True)) + '/' + str(len(actual_xstate))
    row['xstate_precision_frac'] = str(xstate_vector.count(True)) + '/' + str(len(predicted_xstate))

    # flatten 2d lists -> get xtype vector
    actual_xtype = ast.literal_eval(str(row['exception_types']))
    actual_xtype = [element for sublist in actual_xtype for element in sublist]
    predicted_xtype = ast.literal_eval(str(row['exception_types_predict']))
    predicted_xtype = [element for sublist in predicted_xtype for element in sublist]
    xtype_intersection = set(actual_xtype).intersection(set(predicted_xtype))
    row['xtype_intersection'] = xtype_intersection

    # xtype precision = # correct / # predicted; recall = # correct / # actual
    try:
        row['xtype_precision'] = len(xtype_intersection) / len(predicted_xtype)
        row['xtype_recall'] = len(xtype_intersection) / len(actual_xtype)
    except ZeroDivisionError as e:
        row['xtype_precision'] = -1
        row['xtype_recall'] = -1
        
    row['xtype_precision_frac'] = str(len(xtype_intersection)) +  '/' + str(len(predicted_xtype))
    row['xtype_recall_frac'] = str(len(xtype_intersection)) +  '/' + str(len(actual_xtype))

    return row

In [98]:
df = df.apply(evaluate, axis=1)
pd.set_option('display.max_columns', None)
df.sample(n=5)

Unnamed: 0_level_0,original_code,statement_vector,exception_types,num_try_blocks,numbered_code,num_lines,prompt,fixed_snippet,statement_vector_predict,exception_types_predict,response,status,exact_statement_vector,xstate_vector,xstate_precision,xstate_recall,xstate_recall_frac,xstate_precision_frac,xtype_intersection,xtype_precision,xtype_recall,xtype_precision_frac,xtype_recall_frac
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
00fe3f6c-e207-4d7e-ae80-213cb1ff8d50_ModelAttributeServiceTest-1,final ModelAttributeService modelAttributeServ...,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[['NoSuchMethodException'],['NoSuchMethodExcep...",3,1. final ModelAttributeService modelAttributeS...,21,INSTRUCTIONS\nYou are a code reviewer that rev...,'''final ModelAttributeService modelAttributeS...,"[1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, ...","[['Exception'], ['Exception']]",fixed_snippet = '''final ModelAttributeService...,1,False,"[False, False, True, False, False, True, True,...",0.619048,0.619048,13/21,13/21,{},0.0,0.0,0/2,0/3
6ed6566e-9498-46b0-8399-8abe1fccbbb4_TestOffsetDateTimeField-1,OffsetDateTimeField field = new OffsetDateTime...,"[0, 0, 0, 0, 0, 1, 2, 1, 2, 1, 2, 2]","[['IllegalArgumentException'],['IllegalArgumen...",3,1. OffsetDateTimeField field = new OffsetDateT...,12,INSTRUCTIONS\nYou are a code reviewer that rev...,'''OffsetDateTimeField field = null;\ntry {\n ...,"[0, 1, 0, 0, 0, 0, 2, 0, 0, 2, 0, 0]","[['Exception'], ['Exception'], ['Exception'], ...",fixed_snippet = '''OffsetDateTimeField field =...,1,False,"[True, False, True, True, True, False, True, F...",0.5,0.5,6/12,6/12,{},0.0,0.0,0/4,0/3
5efd29d6-dd86-45e4-99b3-90989c5d45a7_DefaultControlTest-5,int testNo = 0;\nResourceBundle rb = null;\nte...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[['IllegalArgumentException'],['IllegalArgumen...",4,1. int testNo = 0;\n2. ResourceBundle rb = nul...,35,INSTRUCTIONS\nYou are a code reviewer that rev...,'''int testNo = 0;\nResourceBundle rb = null;\...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",[['Exception']],fixed_snippet = '''int testNo = 0;\nResourceBu...,1,False,"[False, False, False, False, False, False, Fal...",0.388889,0.4,14/35,14/36,{},0.0,0.0,0/1,0/4
12bdd80c-ccef-4e7e-9dfe-f416383aa85a_Zombies-1,"if (! new File(""/usr/bin/perl"").canExecute() |...","[0, 0, 0, 0, 0, 1, 2, 1, 2, 1, 2, 0, 0, 0, 0, ...","[['IOException'],['IOException'],['IOException']]",3,"1. if (! new File(""/usr/bin/perl"").canExecute(...",18,INSTRUCTIONS\nYou are a code reviewer that rev...,"'''if (! new File(""/usr/bin/perl"").canExecute(...","[0, 0, 0, 0, 0, 1, 2, 1, 2, 1, 2, 0, 0, 0, 0, ...","[['IOException'], ['IOException'], ['IOExcepti...","fixed_snippet = '''if (! new File(""/usr/bin/pe...",1,True,"[True, True, True, True, True, True, True, Tru...",1.0,1.0,18/18,18/18,{IOException},0.25,0.333333,1/4,1/3
116b5c9b-ea6b-45c1-8bac-859f271bdd02_CommonProfilerTestCase-6,if (!isStatus(STATUS_MEASURED)) {\nSystem.err....,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, ...","[['InterruptedException'],['InterruptedExcepti...",3,1. if (!isStatus(STATUS_MEASURED)) {\n2. Syste...,28,INSTRUCTIONS\nYou are a code reviewer that rev...,'''try {\n if (!isStatus(STATUS_MEASURED)) ...,"[0, 1, 1, 1, 0, 1, 1, 2, 0, 1, 2, 2, 0, 1, 2, ...","[['Exception'], ['Exception'], ['Exception'], ...",fixed_snippet = '''try {\n if (!isStatus(ST...,1,False,"[True, False, False, False, True, False, False...",0.586207,0.607143,17/28,17/29,{},0.0,0.0,0/15,0/3


In [99]:
# summary stats
df.describe(include='all')

Unnamed: 0,original_code,statement_vector,exception_types,num_try_blocks,numbered_code,num_lines,prompt,fixed_snippet,statement_vector_predict,exception_types_predict,response,status,exact_statement_vector,xstate_vector,xstate_precision,xstate_recall,xstate_recall_frac,xstate_precision_frac,xtype_intersection,xtype_precision,xtype_recall,xtype_precision_frac,xtype_recall_frac
count,190,190,190,190.0,190,190.0,190,190,190,190,190,190.0,190,190,190.0,190.0,190,190,190,190.0,190.0,190,190
unique,188,178,118,,188,,188,189,187,95,190,,2,185,,,148,149,27,,,34,18
top,ObjectOutputStream out;\nByteArrayOutputStream...,"[0, 0, 0, 0, 1, 2, 2]",[['InvalidArgumentException']],,1. ObjectOutputStream out;\n2. ByteArrayOutput...,,INSTRUCTIONS\nYou are a code reviewer that rev...,'''double[] ar = new double[2];\ntry {\n Ar...,"[1, 0, 1, 0, 1, 0, 1, 0]",[['Exception']],fixed_snippet = '''\ntry {\n ApiException e...,,False,"[True, False, True, False, True, False, True, ...",,,3/7,3/7,{},,,0/1,0/1
freq,2,4,7,,2,,2,2,2,53,1,,189,2,,,3,3,131,,,66,38
mean,,,,2.736842,,22.136842,,,,,,1.0,,,0.538944,0.543978,,,,0.13815,0.113158,,
std,,,,1.294764,,14.061927,,,,,,0.0,,,0.204416,0.2061,,,,0.352426,0.304801,,
min,,,,1.0,,2.0,,,,,,1.0,,,0.0,0.0,,,,-1.0,-1.0,,
25%,,,,2.0,,10.0,,,,,,1.0,,,0.4,0.404236,,,,0.0,0.0,,
50%,,,,3.0,,18.0,,,,,,1.0,,,0.52607,0.533333,,,,0.0,0.0,,
75%,,,,4.0,,32.0,,,,,,1.0,,,0.681427,0.691645,,,,0.25,0.25,,


In [100]:
# check for zero division 
df.isin([-1]).sum()

original_code               0
statement_vector            0
exception_types             0
num_try_blocks              0
numbered_code               0
num_lines                   0
prompt                      0
fixed_snippet               0
statement_vector_predict    0
exception_types_predict     0
response                    0
status                      0
exact_statement_vector      0
xstate_vector               0
xstate_precision            0
xstate_recall               0
xstate_recall_frac          0
xstate_precision_frac       0
xtype_intersection          0
xtype_precision             5
xtype_recall                5
xtype_precision_frac        0
xtype_recall_frac           0
dtype: int64

In [101]:
# show those with zero division -> caused by empty exception_types_predict lists, actual exception type is generic exception
zero_div = df[df['xtype_recall'].isin([-1])]
print(zero_div.iloc[4]['response'])

fixed_snippet = '''
try {
	if (connection == null) {
		return null;
	}
	output.setStatus(HttpResponseStatus.OK);
	output.headers().set(HttpHeaders.Names.CONTENT_TYPE, "text/plain; charset=UTF-8");
	output.headers().set(HttpHeaders.Names.CONNECTION, HttpHeaders.Values.KEEP_ALIVE);
	String sql;
	switch (uri) {
		case "likesong":
			sql = "UPDATE " + MediaTableAudiotracks.TABLE_NAME + " SET LIKESONG = true WHERE " + MediaTableAudiotracks.TABLE_COL_MBID_TRACK + " =?";
			ps.setString(1, content);
			ps.executeUpdate();
			break;
		case "likealbum":
			sql = "MERGE INTO " + MediaTableMusicBrainzReleaseLike.TABLE_NAME + " KEY (MBID_RELEASE) values (?)";
			ps.setString(1, content);
			ps.executeUpdate();
			break;
		case "dislikesong":
			sql = "UPDATE " + MediaTableAudiotracks.TABLE_NAME + " SET LIKESONG = false WHERE " + MediaTableAudiotracks.TABLE_COL_MBID_TRACK + " =?";
			ps.setString(1, content);
			ps.executeUpdate();
			break;
		case "dislikealbum":
			sql = "DELETE FROM " + MediaTab

In [102]:
# check for nulls
df.isnull().sum()

original_code               0
statement_vector            0
exception_types             0
num_try_blocks              0
numbered_code               0
num_lines                   0
prompt                      0
fixed_snippet               0
statement_vector_predict    0
exception_types_predict     0
response                    0
status                      0
exact_statement_vector      0
xstate_vector               0
xstate_precision            0
xstate_recall               0
xstate_recall_frac          0
xstate_precision_frac       0
xtype_intersection          0
xtype_precision             0
xtype_recall                0
xtype_precision_frac        0
xtype_recall_frac           0
dtype: int64

In [123]:
# check for original_code with few lines
print(df[df['num_lines'] <= 3].shape[0])
df[df['num_lines'] <= 3][['original_code', 'numbered_code', 'num_lines']]

3


Unnamed: 0_level_0,original_code,numbered_code,num_lines
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
d2f1fd18-a6b2-45f2-8cda-acc7d7f44d0c_CharCollectionRetainAllTester-3,collection.retainAll(target.toRetain);\nString...,1. collection.retainAll(target.toRetain);\n2. ...,3
c023a43e-e922-444c-b6b3-442f4c180abb_Stub-3,ClassLoader loader = Thread.currentThread().ge...,1. ClassLoader loader = Thread.currentThread()...,3
80b9c1fb-4da7-4c8e-9277-bbf635c9d13f_PrivateInvokeTest-1,"return defc.getDeclaredMethod(name, ptypes);\n...","1. return defc.getDeclaredMethod(name, ptypes)...",2


In [118]:
df[df['num_lines'] <= 1]

Unnamed: 0_level_0,original_code,statement_vector,exception_types,num_try_blocks,numbered_code,num_lines,prompt,fixed_snippet,statement_vector_predict,exception_types_predict,response,status,exact_statement_vector,xstate_vector,xstate_precision,xstate_recall,xstate_recall_frac,xstate_precision_frac,xtype_intersection,xtype_precision,xtype_recall,xtype_precision_frac,xtype_recall_frac,original_code_line_count
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1


In [106]:
df[['original_code']]

Unnamed: 0_level_0,original_code
id,Unnamed: 1_level_1
d2c60ca4-ed25-45d0-89d5-4fbcba8ee6d5_CloudTasksClientHttpJsonTest-14,ApiException exception =\nApiExceptionFactory....
62e25ed1-2776-4606-b4bb-0cd605002639_DataFusionClientTest-11,StatusRuntimeException exception = new StatusR...
750e00dd-5763-4fc9-8182-79ed290e26b6_UserAgentStringMatchMaker-2,initializeMatchers();\nString useragentString ...
9ac700f2-eb72-49f7-ac6b-b8efa096d6c1_GlassfishInstance-3,int debugPort;\ndebugPort = Integer.parseInt(\...
2464d5a3-e3aa-4a92-8683-4f9893f8b206_WebViewMapFragment-1,JSONArray array = new JSONArray();\nfor (LatLn...
...,...
96df86ab-cc6e-4f27-a943-479099a47a12_Caa-6,if ((regs = getRegistersFromRTM2())!= null) {\...
02adf5ac-8067-4d74-a58d-55a27f7acd89_Basic-4,"Class<?> clazz = Class.forName(""p.internal.Not..."
45aba3bc-8a26-4f99-a6a1-193b903e413b_MethodMatcherFactoryTest-1,"MethodMatcherFactory.methodMatchers(""org.sonar..."
0bfcdbe7-e849-4e85-a5d2-df263ac0d166_HeapPage-4,int len = BufferPool.PAGE_SIZE;\nByteArrayOutp...


In [104]:
# store combined df
df.to_csv('eh_combined_eval.csv')