This notebook provides the code to run the compilability test for a given test file. The code checks if the generated code can execute without the Python interpreter throwing any compilation errors. 

In [1]:
import csv

# Loading Test File

In [2]:
# specify the path to the CSV file containing the test data
test_file = 'path/to/your/test_data.csv'  # Update this path to your CSV file with the test set predictions

# specify the setting: 'K-Fold' or 'OOD'
setting = 'OOD' # 'K-Fold' or 'OOD'

if not test_file:
    raise ValueError("Please specify the path to the test CSV file.")
if setting not in ['K-Fold', 'OOD']:
    raise ValueError("Setting must be either 'K-Fold' or 'OOD'.")

# read the CSV file and extract the relevant data
data = []
with open(test_file, 'r') as f:
    reader = csv.reader(f)
    next(reader)
    for row in reader:
        if len(row) < 2:
            print(f"Skipping row with insufficient data: {row}")
            continue
        if setting == 'K-Fold' and len(row) < 3:
            print(f"Skipping row with insufficient data for K-Fold: {row}")
            continue
        # Depending on the setting, extract the appropriate columns
        # For K-Fold, we expect three columns: text, ground truth code, generated code
        # For OOD, we expect two columns: text, generated code
        if setting == 'K-Fold':
            data = {'text': row[0], 'ground truth code': row[1], 'generated code': row[2]}
        elif setting == 'OOD':
            data.append({'text': row[0], 'generated code': row[1]})

# Compilability Test

In [3]:
from class_structure import *

# Check whether the generated code is a valid python code
def is_valid_python_code(code):
    try:
        code_object = compile(code, '<string>', 'exec')
        exec(code_object)
        return True
    except (SyntaxError, Exception):
        return False
    
def preprocess_code(code):
    # Remove the code block markers if they exist
    if code.startswith("```python"):
        code = code[9:].strip()
    elif code.startswith("```"):
        code = code[3:].strip()
    
    # Remove any trailing backticks
    if code.endswith("```"):
        code = code[:-3].strip()
    
    return code

# Call the function for each element in data to check whether the generated code is a valid python code
def check_valid_python_code(data):
    segments = len(data)
    count = 0 # to keep track of the number of valid code segments
    for i in range(segments):
        code = data[i]['generated code']
        if not code:
            print(f"No generated code found for segment {i+1}")
            continue
        # Preprocess the code to remove any unnecessary formatting
        code = preprocess_code(code)

        # Check if the preprocessed code is valid Python code
        if not is_valid_python_code(code):
            print(f"Generated code for segment {i+1} is not a valid python code")
            print("\n")
        else:
            print(f"Generated code for segment {i+1} is a valid python code")
            count += 1
            print("\n")
    return count

# Call the function to check whether the generated code is a valid python code
valid_code = check_valid_python_code(data)

print(f"Number of valid code segments: {valid_code}")
print(f"Total segments checked: {len(data)}")
print(f"Percentage of valid code segments: {valid_code / len(data) * 100:.2f}%")


Generated code for segment 1 is a valid python code


Generated code for segment 2 is a valid python code


Generated code for segment 3 is a valid python code


Generated code for segment 4 is a valid python code


Generated code for segment 5 is a valid python code


Generated code for segment 6 is a valid python code


Generated code for segment 7 is a valid python code


Generated code for segment 8 is a valid python code


Generated code for segment 9 is a valid python code


Number of valid code segments: 9
Total segments checked: 9
Percentage of valid code segments: 100.00%
