## Load Exception Handling data and form df

In [1]:
import os
import pandas as pd
import pickle
import json

df = pd.DataFrame(columns=['original_code', 'statement_vector', 'exception_types', 'num_try_blocks'])

# iterate through all data
for i in range(1,6):
    # convert pickle files to df
    with open(f"eh_data/eh-{i}/id_code_map-{i}", 'rb') as f:
        java_code = pickle.load(f)

    java_code = pd.DataFrame.from_dict(java_code, orient='index', columns=['original_code'])
    java_code.index.name = 'id'

    with open(f"eh_data/eh-{i}/id_meta_map-{i}", 'rb') as f:
        metadata = pickle.load(f)

    metadata = pd.DataFrame.from_dict(metadata, orient='index', columns=['statement_vector', 'exception_types'])
    metadata['exception_types'] = metadata['exception_types'].apply(lambda x: str(x).replace(' ', '').replace('\n','').replace('array(', '').replace(',dtype=object)', ''))
    metadata.index.name = 'id'
    # display(metadata.head(1))

    # merge dataset
    merged = pd.merge(java_code, metadata, on='id')
    merged['num_try_blocks'] = i

    # concat to df
    df = pd.concat([df, merged], axis=0)

df.index.name = 'id'
# df['exception_types'] = df['exception_types'].apply(clean_2d_array)
#df.to_csv('eh_combined.csv')

print(df.shape)
df.head()


(190, 4)


Unnamed: 0_level_0,original_code,statement_vector,exception_types,num_try_blocks
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
d2c60ca4-ed25-45d0-89d5-4fbcba8ee6d5_CloudTasksClientHttpJsonTest-14,ApiException exception =\nApiExceptionFactory....,"[0, 0, 0, 0, 1, 2, 2]",[['InvalidArgumentException']],1
62e25ed1-2776-4606-b4bb-0cd605002639_DataFusionClientTest-11,StatusRuntimeException exception = new StatusR...,"[0, 0, 1, 2, 2, 2, 2, 2]",[['ExecutionException']],1
750e00dd-5763-4fc9-8182-79ed290e26b6_UserAgentStringMatchMaker-2,initializeMatchers();\nString useragentString ...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...",[['RuntimeException']],1
9ac700f2-eb72-49f7-ac6b-b8efa096d6c1_GlassfishInstance-3,int debugPort;\ndebugPort = Integer.parseInt(\...,"[0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0]",[['NumberFormatException']],1
2464d5a3-e3aa-4a92-8683-4f9893f8b206_WebViewMapFragment-1,JSONArray array = new JSONArray();\nfor (LatLn...,"[1, 2, 2, 2, 2, 2, 2, 2, 2, 2]",[['JSONException']],1


## Add line numbers to code

In [2]:
# add line numbers to code and store count
def number_code(row):
    numbered_code = '1. '
    by_line = row['original_code'].strip().split('\n')
    numbered_code += by_line[0] + '\n'
    for i in range(1, len(by_line)):
        numbered_code += f"{str(i+1)}. {by_line[i]}\n"

    row['numbered_code'] = numbered_code
    row['num_lines'] = len(by_line)
    
    return row

df = df.apply(number_code, axis=1)

print(df.iloc[0]['numbered_code'])
df.head()

1. ApiException exception =
2. ApiExceptionFactory.createException(
3. new Exception(), FakeStatusCode.of(StatusCode.Code.INVALID_ARGUMENT), false);
4. mockService.addException(exception);
5. QueueName name = QueueName.of("[PROJECT]", "[LOCATION]", "[QUEUE]");
6. client.resumeQueue(name);
7. Assert.fail("No exception raised");



Unnamed: 0_level_0,original_code,statement_vector,exception_types,num_try_blocks,numbered_code,num_lines
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
d2c60ca4-ed25-45d0-89d5-4fbcba8ee6d5_CloudTasksClientHttpJsonTest-14,ApiException exception =\nApiExceptionFactory....,"[0, 0, 0, 0, 1, 2, 2]",[['InvalidArgumentException']],1,1. ApiException exception =\n2. ApiExceptionFa...,7
62e25ed1-2776-4606-b4bb-0cd605002639_DataFusionClientTest-11,StatusRuntimeException exception = new StatusR...,"[0, 0, 1, 2, 2, 2, 2, 2]",[['ExecutionException']],1,1. StatusRuntimeException exception = new Stat...,8
750e00dd-5763-4fc9-8182-79ed290e26b6_UserAgentStringMatchMaker-2,initializeMatchers();\nString useragentString ...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...",[['RuntimeException']],1,1. initializeMatchers();\n2. String useragentS...,31
9ac700f2-eb72-49f7-ac6b-b8efa096d6c1_GlassfishInstance-3,int debugPort;\ndebugPort = Integer.parseInt(\...,"[0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0]",[['NumberFormatException']],1,1. int debugPort;\n2. debugPort = Integer.pars...,12
2464d5a3-e3aa-4a92-8683-4f9893f8b206_WebViewMapFragment-1,JSONArray array = new JSONArray();\nfor (LatLn...,"[1, 2, 2, 2, 2, 2, 2, 2, 2, 2]",[['JSONException']],1,1. JSONArray array = new JSONArray();\n2. for ...,10


## Construct and store prompts


In [3]:
df['prompt'] = "INSTRUCTIONS\nYou are a code reviewer that reviews and patches Java code snippets that may throw one or more exceptions. Analyze the code snippet and fix it by wrapping vulnerable lines into one or more try blocks; do not make any other edits.\n\nYou will capture your results in various Python objects:\n- Store the resultant code snippet as a multi-line string in a Python string: fixed_snippet = '''<fixed code snipped>'''\n- Produce a Python list that acts as a vector where each vector element represents each line in the original code snippet. Each vector element can take the integer label 0, 1, or 2 where 0 is a line not inside a try block, 1 is the first line inside a try block, and 2 is a non-first line inside a try block: statement_vector = [<label>, <label>, ..., <label>]\n- Store the exception type caught by each catch block in a Python list of lists, where each list represents the catch block(s) for one try block as a single try block may have 1 or more catch blocks. Each list will contain the exception type(s) handled for one try block:\nexception_types = [[<exception type>, <exception type>, ..., <exception type>], \n\t...,\n\t[<exception type>, <exception type>, ..., <exception type>]]\n---\nJAVA CODE SNIPPET\n" + df['numbered_code']+ "\n---\nDo not use nested try blocks. The length of statement_vector must equal " + df['num_lines'].astype(str) + ", the number of lines in the original snippet (not the fixed snippet). Count the number of nested lists in exception_types. Count the number of try blocks you inserted. The number of nested lists must equal the number of try blocks inserted. Output your results including fixed_snippet, statement_vector, and exception_types in a single Python code block.\n---\nRESULTS\n"

print(df.iloc[0]['prompt'])
df.head()

INSTRUCTIONS
You are a code reviewer that reviews and patches Java code snippets that may throw one or more exceptions. Analyze the code snippet and fix it by wrapping vulnerable lines into one or more try blocks; do not make any other edits.

You will capture your results in various Python objects:
- Store the resultant code snippet as a multi-line string in a Python string: fixed_snippet = '''<fixed code snipped>'''
- Produce a Python list that acts as a vector where each vector element represents each line in the original code snippet. Each vector element can take the integer label 0, 1, or 2 where 0 is a line not inside a try block, 1 is the first line inside a try block, and 2 is a non-first line inside a try block: statement_vector = [<label>, <label>, ..., <label>]
- Store the exception type caught by each catch block in a Python list of lists, where each list represents the catch block(s) for one try block as a single try block may have 1 or more catch blocks. Each list will co

Unnamed: 0_level_0,original_code,statement_vector,exception_types,num_try_blocks,numbered_code,num_lines,prompt
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
d2c60ca4-ed25-45d0-89d5-4fbcba8ee6d5_CloudTasksClientHttpJsonTest-14,ApiException exception =\nApiExceptionFactory....,"[0, 0, 0, 0, 1, 2, 2]",[['InvalidArgumentException']],1,1. ApiException exception =\n2. ApiExceptionFa...,7,INSTRUCTIONS\nYou are a code reviewer that rev...
62e25ed1-2776-4606-b4bb-0cd605002639_DataFusionClientTest-11,StatusRuntimeException exception = new StatusR...,"[0, 0, 1, 2, 2, 2, 2, 2]",[['ExecutionException']],1,1. StatusRuntimeException exception = new Stat...,8,INSTRUCTIONS\nYou are a code reviewer that rev...
750e00dd-5763-4fc9-8182-79ed290e26b6_UserAgentStringMatchMaker-2,initializeMatchers();\nString useragentString ...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...",[['RuntimeException']],1,1. initializeMatchers();\n2. String useragentS...,31,INSTRUCTIONS\nYou are a code reviewer that rev...
9ac700f2-eb72-49f7-ac6b-b8efa096d6c1_GlassfishInstance-3,int debugPort;\ndebugPort = Integer.parseInt(\...,"[0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0]",[['NumberFormatException']],1,1. int debugPort;\n2. debugPort = Integer.pars...,12,INSTRUCTIONS\nYou are a code reviewer that rev...
2464d5a3-e3aa-4a92-8683-4f9893f8b206_WebViewMapFragment-1,JSONArray array = new JSONArray();\nfor (LatLn...,"[1, 2, 2, 2, 2, 2, 2, 2, 2, 2]",[['JSONException']],1,1. JSONArray array = new JSONArray();\n2. for ...,10,INSTRUCTIONS\nYou are a code reviewer that rev...


## Generate and store responses

In [None]:
# import openai
# from tqdm.auto import tqdm

# openai.api_key = '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'
# tqdm.pandas()

# def gpt_call(row):
#     print(f"Generating response for snippet {row.name}...")
#     try:
#         response = openai.ChatCompletion.create(
#             model='gpt-3.5-turbo-0613',
#             messages=[{
#                 'role': 'system', 'content': row['prompt']
#             }]
#         )
#         response_message = response['choices'][0]['message']['content']
#         print(response_message)
#         print('Done.')
#         return response_message
    
#     except openai.OpenAIError as e:
#         print("Unsuccessful, skipping.")
#         print(response)
#         return -1

# df['response'] = df.progress_apply(gpt_call, axis=1)

In [7]:
import openai
from tqdm.auto import tqdm
import re
import ast

openai.api_key = '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'
tqdm.pandas()

def gpt_call(row):
    print(f"Generating response for snippet {row.name}...")

    # 100 max attempts
    for i in range(0,100):
        try:
            # make api call and get response
            response = openai.ChatCompletion.create(
                model='gpt-3.5-turbo-0613',
                messages=[{
                    'role': 'system', 'content': row['prompt']
                }]
            )
            response_message = response['choices'][0]['message']['content']

            # extract results, retry if unparsable
            fixed_snippet = re.findall(r"fixed_snippet = ('''[\s\S]*''')", response_message)
            if fixed_snippet:
                try:
                    row['fixed_snippet'] = fixed_snippet[0]
                    # print(fixed_snippet[0])
                except:
                    print("Unsuccessful, retrying now:")
                    print(response_message)
                    continue
            else:
                print("Unsuccessful, retrying now:")
                print(response_message)
                continue

            statement_vector = re.findall(r'statement_vector = (\[[0-9, ]*\])', response_message)
            if statement_vector:
                try:
                    row['statement_vector_predict'] = ast.literal_eval(statement_vector[0].strip())
                    # print(statement_vector[0])
                except:
                    print("Unsuccessful, retrying now:")
                    print(response_message)
                    continue
            else:
                print("Unsuccessful, retrying now:")
                print(response_message)
                continue

            exception_types = re.findall(r'exception_types = (\[\[.*\]\])', response_message)
            if exception_types:
                try:
                    row['exception_types_predict'] = ast.literal_eval(exception_types[0].strip())
                    # print(exception_types[0])
                except:
                    print("Unsuccessful, retrying now:")
                    print(response_message)
                    continue
            else:
                print("Unsuccessful, retrying now:")
                print(response_message)
                continue

            print('Done.')
            row['response'] = response_message
            row['status'] = 1
            return row
        
        except openai.OpenAIError as e:
            # try again until we get parsable results
            print("Unsuccessful, retrying now.")
            print(response)
            continue
    
    print('Max attempts reached, skipping.')
    row['response'] = -1
    row['fixed_snippet'] = -1
    row['statement_vector_predict'] = -1
    row['exception_types'] = -1
    row['status'] = 0
    return row

In [8]:
df = df.progress_apply(gpt_call, axis=1)
df.head()

  0%|          | 0/190 [00:00<?, ?it/s]

Generating response for snippet d2c60ca4-ed25-45d0-89d5-4fbcba8ee6d5_CloudTasksClientHttpJsonTest-14...
Done.
Generating response for snippet 62e25ed1-2776-4606-b4bb-0cd605002639_DataFusionClientTest-11...
Done.
Generating response for snippet 750e00dd-5763-4fc9-8182-79ed290e26b6_UserAgentStringMatchMaker-2...
Unsuccessful, retrying now:
fixed_snippet = '''initializeMatchers();
String useragentString = userAgent.getUserAgentString();
try {
    if (useragentString!= null && useragentString.length() > userAgentMaxLength) {
        setAsHacker(userAgent, 100);
        userAgent.setForced(HACKER_ATTACK_VECTOR, "Buffer overflow", 100);
        return hardCodedPostProcessing(userAgent);
    }
} catch (Exception e) {
    // Handle exception
}
synchronized (this) {
    reset();
    try {
        if (userAgent.isDebug()) {
            for (Matcher matcher : allMatchers) {
                matcher.setVerboseTemporarily(true);
            }
        }
    } catch (Exception e) {
        // Handle e

In [None]:
df.to_csv('eh_combined.csv')

## Check responses

In [None]:
df = pd.read_csv('eh_combined.csv', index_col='id')
df.head()

In [None]:
# check for bad responses
df[df['response'] == -1]

In [None]:
# import re
# import ast

# # extract results from response text
# def extract_results(row):
#     print(row.name + ':')
#     fixed_snippet = re.findall(r"fixed_snippet = ('''[\s\S]*''')", row['response'])
#     if fixed_snippet:
#         try:
#             row['fixed_snippet'] = fixed_snippet[0]
#             print(fixed_snippet[0])
#         except:
#             row['fixed_snippet'] = -1
#     else:
#         row['fixed_snippet'] = None

#     statement_vector = re.findall(r'statement_vector = (\[[0-9, ]*\])', row['response'])
#     if statement_vector:
#         try:
#             row['statement_vector_predict'] = ast.literal_eval(statement_vector[0].strip())
#             print(statement_vector[0])
#         except:
#             row['statement_vector_predict'] = -1
#     else:
#         row['statement_vector_predict'] = None

#     exception_types = re.findall(r'exception_types = (\[\[.*\]\])', row['response'])
#     if exception_types:
#         try:
#             row['exception_types_predict'] = ast.literal_eval(exception_types[0].strip())
#             print(exception_types[0])
#         except:
#             row['exception_types_predict'] = -1
#     else:
#         row['exception_types_predict'] = None
        
#     return row

# df = df.apply(extract_results, axis=1)

# df.head()

In [None]:
# check for erroneous fixed_snippet
df[(df['fixed_snippet'].isnull()) | (df['fixed_snippet'] == -1)]

In [None]:
# check for erroneous fixed_snippet
df[(df['statement_vector_predict'].isnull()) | (df['statement_vector_predict'] == -1)].head()

In [None]:
# check for erroneous fixed_snippet
df[(df['exception_types_predict'].isnull()) | (df['exception_types_predict'] == -1)]

## Evaluate predictions

In [None]:
df['in']

In [None]:
[True, True, False, True].count(True)

In [None]:
def evaluate(row):
    # exact match
    row['exact_statement_vector'] = ast.literal_eval(str(row['statement_vector'])) == ast.literal_eval(str(row['statement_vector_predict']))

    # normalize statement vectors for partial match -> xstate vector
    actual_xstate = [2 if label == 1 else label for label in ast.literal_eval(str(row['statement_vector']))]
    predicted_xstate = [2 if label == 1 else label for label in ast.literal_eval(str(row['statement_vector_predict']))]
    xstate_vector = [x == y for x, y in zip(actual_xstate, predicted_xstate)]
    row['xstate_vector'] = xstate_vector

    # xstate precision = # correct / # predicted; recall = # correct / # actual
    row['xstate_precision'] = xstate_vector.count(True) / len(predicted_xstate)
    row['xstate_precision_frac'] = str(xstate_vector.count(True)) + '/' + str(len(predicted_xstate))
    row['xstate_recall'] = xstate_vector.count(True) / len(actual_xstate)
    row['xstate_recall_frac'] = str(xstate_vector.count(True)) + '/' + str(len(actual_xstate))

    # flatten 2d lists -> get xtype vector
    actual_xtype = ast.literal_eval(str(row['exception_types']))
    actual_xtype = [element for sublist in actual_xtype for element in sublist]
    predicted_xtype = ast.literal_eval(str(row['exception_types_predict']))
    predicted_xtype = [element for sublist in predicted_xtype for element in sublist]
    xtype_intersection = set(actual_xtype).intersection(set(predicted_xtype))
    row['xtype_intersection'] = xtype_intersection

    # xtype precision = # correct / # predicted; recall = # correct / # actual
    row['xtype_precision'] = len(xtype_intersection) / len(predicted_xtype)
    row['xtype_precision_frac'] = str(len(xtype_intersection)) +  '/' + str(len(predicted_xtype))
    row['xtype_recall'] = len(xtype_intersection) / len(actual_xtype)
    row['xtype_recall_frac'] = str(len(xtype_intersection)) +  '/' + str(len(actual_xtype))

    return row

df = df.apply(evaluate, axis=1)
pd.set_option('display.max_columns', None)
df.sample(n=5)