In [1]:
import logging
import os
import random
import shutil
import requests
import re
import pandas as pd
from typing import List, Tuple
import evaluation_framework as eval



INFO:evaluation_framework:Finding Java files in ../kestra
INFO:evaluation_framework:Splitting Java files into train and test sets
INFO:evaluation_framework:Copying train and test files to output directory


In [21]:
def _find_java_files(directory: str) -> List[str]:
    """
    Search for Java (.java) files within the specified directory and its subdirectories.

    :param directory: The directory to start the search from.
    :return: A list of paths to Java files found.
    """
    java_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.java'):
                java_files.append(os.path.join(root, file))
    return java_files

In [26]:
def _split_train_test(java_files: List[str], train_ratio=0.8, seed=0) -> Tuple[List[str], List[str]]:
    """
    Randomly split a list of Java files into training and test sets.

    :param java_files: A list of file paths to Java (.java) files to be split.
    :param train_ratio: The ratio of files to include in the training set (default is 0.8).
    :param seed: Seed for random number generation to ensure reproducibility (default is 0).
    :return: A tuple containing two lists: the training files and the test files.
    """

    random.seed(seed)
    random.shuffle(java_files)
    
    split_index = int(train_ratio * len(java_files))
    
    train_files = java_files[:split_index]
    test_files = java_files[split_index:]

    return train_files, test_files

In [27]:
def copy_files_to_directory(files: List[str], source_directory: str, destination_directory: str, new_directory_name: str):
    """
    Copy a list of files to a destination directory, preserving their relative structure (TODO should it be relative or in one directory).

    :param files: A list of file paths to be copied.
    :param source_directory: The base directory where the source files are located.
    :param destination_directory: The base directory where the copied files will be placed.
    :param new_directory_name: (Optional) The name of the subdirectory within the destination directory to copy the files to.
    """

    if destination_directory is None:
        destination_directory = os.getcwd()

    destination_directory = os.path.join(destination_directory, new_directory_name)
    os.makedirs(destination_directory, exist_ok=True)

    for source_file in files:
        file_name = os.path.relpath(source_file, source_directory)
        destination_path = os.path.join(destination_directory, file_name)
        os.makedirs(os.path.dirname(destination_path), exist_ok=True)
        shutil.copy(source_file, destination_path)

In [28]:
#Need to edit so that it also clears whatever was in test or train folders before adding more files. 
def split_train_test(code_base_directory: str, train_ratio=0.8, output_directory=None, seed=0):
    """
    Given a directory (TODO repository, zip, url)
    Scan it for .java files (TODO a specified file type)
    Randomly split the .java files into train and test (make modular for a different splitting method)
    Generate a directory for the train and test files respectively (TODO zip)

    :param code_base_directory: The directory containing .java files.
    :param train_ratio: The ratio of files to include in the training set (default is 0.8).
    :param output_directory: The base directory where train and test directories will be created (default is None).
    :param seed: random seed for splitting train and test (default is 0)
    """
    logger = logging.getLogger(__name__)
    
    logger.info("Finding Java files in %s", code_base_directory)
    java_files = _find_java_files(code_base_directory)

    logger.info("Splitting Java files into train and test sets")
    train_files, test_files = _split_train_test(java_files, train_ratio, seed)
    

    logger.info("Copying train and test files to output directory")
    copy_files_to_directory(train_files, code_base_directory, output_directory, 'train')
    copy_files_to_directory(test_files, code_base_directory, output_directory, 'test')

In [10]:
def prompts_from_js_file(file_path):
    with open(file_path, 'r') as f:
        lines = f.readlines()
    
    non_space_lines_indices = [index for index, line in enumerate(lines) if line.strip() != '']

    #Randomly select one non-space line and remove it.
    #Should edit so that it does not remove the first or last line. 
    #Should edit so that it does not remove anything above the first docstring. 
    # This part should be its own helper function
    random.seed(0)
    line_to_remove_index = random.choice(non_space_lines_indices)
    last_line_to_remove_index = line_to_remove_index + 4
    removed_lines = ''.join(lines[line_to_remove_index:last_line_to_remove_index + 1])
    removed_line = lines.pop(line_to_remove_index)

    #edit so that we only keep 200 lines above and below.
    if len(lines[:line_to_remove_index]) <= 200:
        prefix1a = ''.join(lines[:line_to_remove_index])
    else:
        prefix1a = ''.join(lines[line_to_remove_index-200:line_to_remove_index])

    if len(lines[line_to_remove_index:]) <= 200:
        suffix1a = ''.join(lines[line_to_remove_index+1:])
    else:
        suffix1a = ''.join(lines[line_to_remove_index+1:line_to_remove_index+200])

    if len(lines[line_to_remove_index:]) <= 200:
        suffix1b = ''.join(lines[last_line_to_remove_index+1:])
    else:
        suffix1b = ''.join(lines[last_line_to_remove_index+1:last_line_to_remove_index+200])

    #infill prompt formatting for code llama
    prompt1a = f'<PRE> {prefix1a} <SUF>{suffix1a} <MID>'
    prompt1b = f'<PRE> {prefix1a} <SUF>{suffix1b} <MID>'
    
    #Need to concatenate in order to find the regex pattern. 
    # This part should be its own helper function
    # I don't fully understand the logic of this part
    java_code = "".join(lines)

    pattern1 = re.compile(r"@Override\s+public\s+void\s+defineProperties\(PropertyBuilder propertyBuilder\)\s*\{([^}]*)\}", re.DOTALL)
    pattern2 = re.compile(r"@Override\s+void\s+defineAdditionalProperties\(PropertyBuilder propertyBuilder\)\s*\{([^}]*)\}", re.DOTALL)

    match1 = pattern1.search(java_code)
    match2 = pattern2.search(java_code)

    startdp, enddp, startdap, enddap = None, None, None, None
    prefix2, label, suffix2 = None, None, None
    labelstart, labelend = None, None 
    prompt2a, prompt2b = None, None
    
    #Determine start and end line indices for the two match types. 
    if match1:
        startdp = java_code.count('\n', 0, match1.start(1)) + 1
        enddp = startdp + match1.group(1).count('\n') + 1
    if match2:
        startdap = java_code.count('\n', 0, match2.start(1)) + 1
        enddap = startdap + match2.group(1).count('\n') + 1

    #edit so we only keep 200 lines above and below
    if startdp is not None or startdap is not None:
        if len(lines[0:startdp]) <= 200:
            prefix2 = "".join(lines[0:startdp])
        else:
            prefix2 = "".join(lines[startdp-200:startdp])

        if len(lines[enddap:]) <= 200:
            suffix2 = "".join(lines[enddap+1:])
        else:
            suffix2 = "".join(lines[enddap+1:enddap+200])

        label = "".join(lines[startdp:enddap+1])

    #Handle situations based on the existence of defineProperties and/or defineAdditionalProperties
    if startdp is not None and startdap is not None:
        labelstart = int(startdp)
        labelend = int(enddap)
    elif startdp is not None:
        labelstart = int(startdp)
        labelend = int(enddp)
    elif startdap is not None:
        labelstart = int(startdap)
        labelend = int(enddap)
    
    if prefix2 is not None:
        prompt2a = f'<PRE> {prefix2} <SUF>{suffix2} <MID>'
        prompt2b = f'<s>[INST] Add lines of code to the following code snippet that will define the necessary properties: {prefix2} [/INST]'

    # Adding 1 since line numbers usually start from 1
    # Is this only creating 2 prompts per file? (both types a and b). Because we will need to create many prompts per file.
    # The user should be able to set an input number of test examples or we should calculate a heuristic based on the number of lines
    # are you only getting rid of entire lines? what about parts of lines?
    row = [file_path,
           prompt1a, 
           removed_line, 
           line_to_remove_index + 1, 
           prompt1b, 
           removed_lines, 
           last_line_to_remove_index + 1, 
           prompt2a,
           prompt2b, 
           label, 
           labelend, 
           labelstart] 

    return row

In [4]:
def create_eval_df(folder_path):
    
    test_set_list = []

    for root, dirs, files in os.walk(folder_path):
        for file in files:
            file_path = os.path.join(root, file)
            row = prompts_from_js_file(file_path)
            test_set_list.append(row)

    test_set_df = pd.DataFrame(test_set_list, columns = ['file_path',
                                                        'prompt1a',
                                                        'prompt1a_label',
                                                        'first_line_removed',
                                                        'prompt1b',
                                                        'prompt1b_label',
                                                        'last_line_removed',
                                                        'prompt2a',
                                                        'prompt2b',
                                                        'label2',
                                                        'labelstart',
                                                        'labelend'])
    pd.set_option('max_colwidth', 500)
    return test_set_df
    


In [11]:
eval_df = create_eval_df('/Users/ChaliniA/Documents/capstone-code-generation/evaluation_framework/test')
eval_df

Unnamed: 0,file_path,prompt1a,prompt1a_label,first_line_removed,prompt1b,prompt1b_label,last_line_removed,prompt2a,prompt2b,label2,labelstart,labelend
0,/Users/ChaliniA/Documents/capstone-code-generation/evaluation_framework/test/SpreadsheetsBrowser.java,"<PRE> /*\n * SnapLogic - Data Integration\n *\n * Copyright (C) 2015, SnapLogic, Inc. All rights reserved.\n *\n * This program is licensed under the terms of\n * the SnapLogic Commercial Subscription agreement.\n *\n * ""SnapLogic"" is a trademark of SnapLogic, Inc.\n */\n\npackage com.snaplogic.snaps.google.spreadsheet;\n\nimport com.google.api.services.drive.model.File;\nimport com.google.api.services.drive.model.FileList;\nimport com.google.api.services.sheets.v4.model.Sheet;\nimport com....",import com.snaplogic.snap.api.capabilities.Outputs;\n,31,"<PRE> /*\n * SnapLogic - Data Integration\n *\n * Copyright (C) 2015, SnapLogic, Inc. All rights reserved.\n *\n * This program is licensed under the terms of\n * the SnapLogic Commercial Subscription agreement.\n *\n * ""SnapLogic"" is a trademark of SnapLogic, Inc.\n */\n\npackage com.snaplogic.snaps.google.spreadsheet;\n\nimport com.google.api.services.drive.model.File;\nimport com.google.api.services.drive.model.FileList;\nimport com.google.api.services.sheets.v4.model.Sheet;\nimport com....",import com.snaplogic.snap.api.capabilities.Outputs;\nimport com.snaplogic.snap.api.capabilities.Version;\nimport com.snaplogic.snap.api.capabilities.ViewType;\nimport com.snaplogic.snap.schema.api.Schema;\nimport com.snaplogic.snap.schema.api.SchemaProvider;\n,35,"<PRE> /*\n * SnapLogic - Data Integration\n *\n * Copyright (C) 2015, SnapLogic, Inc. All rights reserved.\n *\n * This program is licensed under the terms of\n * the SnapLogic Commercial Subscription agreement.\n *\n * ""SnapLogic"" is a trademark of SnapLogic, Inc.\n */\n\npackage com.snaplogic.snaps.google.spreadsheet;\n\nimport com.google.api.services.drive.model.File;\nimport com.google.api.services.drive.model.FileList;\nimport com.google.api.services.sheets.v4.model.Sheet;\nimport com....","<s>[INST] Add lines of code to the following code snippet that will define the necessary properties: /*\n * SnapLogic - Data Integration\n *\n * Copyright (C) 2015, SnapLogic, Inc. All rights reserved.\n *\n * This program is licensed under the terms of\n * the SnapLogic Commercial Subscription agreement.\n *\n * ""SnapLogic"" is a trademark of SnapLogic, Inc.\n */\n\npackage com.snaplogic.snaps.google.spreadsheet;\n\nimport com.google.api.services.drive.model.File;\nimport com.google.api.ser...","/*\n * SnapLogic - Data Integration\n *\n * Copyright (C) 2015, SnapLogic, Inc. All rights reserved.\n *\n * This program is licensed under the terms of\n * the SnapLogic Commercial Subscription agreement.\n *\n * ""SnapLogic"" is a trademark of SnapLogic, Inc.\n */\n\npackage com.snaplogic.snaps.google.spreadsheet;\n\nimport com.google.api.services.drive.model.File;\nimport com.google.api.services.drive.model.FileList;\nimport com.google.api.services.sheets.v4.model.Sheet;\nimport com.google...",99.0,96.0
1,/Users/ChaliniA/Documents/capstone-code-generation/evaluation_framework/test/Messages.java,"<PRE> /*\n * SnapLogic - Data Integration\n *\n * Copyright (C) 2015, SnapLogic, Inc. All rights reserved.\n *\n * This program is licensed under the terms of\n * the SnapLogic Commercial Subscription agreement.\n *\n * ""SnapLogic"" is a trademark of SnapLogic, Inc.\n */\n\npackage com.snaplogic.snaps.google.spreadsheet;\n\n/**\n * Messages is the container for all the externalized messages used in this snap pack.\n *\n * @author smudassir\n */\n@SuppressWarnings(""nls"")\npublic class Message...","static final String SUBSCRIBE_WEBHOOK_DESCRIPTION = ""Specify the target URL to allow Google to send notifications"";\n",155,"<PRE> /*\n * SnapLogic - Data Integration\n *\n * Copyright (C) 2015, SnapLogic, Inc. All rights reserved.\n *\n * This program is licensed under the terms of\n * the SnapLogic Commercial Subscription agreement.\n *\n * ""SnapLogic"" is a trademark of SnapLogic, Inc.\n */\n\npackage com.snaplogic.snaps.google.spreadsheet;\n\n/**\n * Messages is the container for all the externalized messages used in this snap pack.\n *\n * @author smudassir\n */\n@SuppressWarnings(""nls"")\npublic class Message...","static final String SUBSCRIBE_WEBHOOK_DESCRIPTION = ""Specify the target URL to allow Google to send notifications"";\n static final String SUBSCRIBE_EXPIRATION_PROP = ""expiration"";\n static final String SUBSCRIBE_EXPIRATION_LABEL = ""Expiration (in Minutes)"";\n static final String SUBSCRIBE_EXPIRATION_DESCRIPTION = ""Specify the time in minutes after which the subscription expires"";\n static final String SUBSCRIBE_CHANNEL_ID_PROP = ""channelId"";\n",159,,,,,
2,/Users/ChaliniA/Documents/capstone-code-generation/evaluation_framework/test/Constants.java,"<PRE> /*\n * SnapLogic - Data Integration\n *\n * Copyright (C) 2015, SnapLogic, Inc. All rights reserved.\n *\n * This program is licensed under the terms of\n * the SnapLogic Commercial Subscription agreement.\n *\n * ""SnapLogic"" is a trademark of SnapLogic, Inc.\n */\n\npackage com.snaplogic.snaps.google.spreadsheet;\n\n/**\n * Constants is the container for all the constant values used in this snap pack.\n *\n * @author smudassir\n */\npublic class Constants {\n static final String S...","static final String DRIVE_SERVICE_NAME = ""SnapLogicDriveIntegration"";\n",23,"<PRE> /*\n * SnapLogic - Data Integration\n *\n * Copyright (C) 2015, SnapLogic, Inc. All rights reserved.\n *\n * This program is licensed under the terms of\n * the SnapLogic Commercial Subscription agreement.\n *\n * ""SnapLogic"" is a trademark of SnapLogic, Inc.\n */\n\npackage com.snaplogic.snaps.google.spreadsheet;\n\n/**\n * Constants is the container for all the constant values used in this snap pack.\n *\n * @author smudassir\n */\npublic class Constants {\n static final String S...","static final String DRIVE_SERVICE_NAME = ""SnapLogicDriveIntegration"";\n static final String AUTH_ENDPOINT_URL = ""https://accounts.google.com/o/oauth2/auth"";\n static final String TOKEN_ENDPOINT_URL = ""https://oauth2.googleapis.com/token"";\n static final String GOOGLE_SPREADSHEET_MIME_TYPE = ""application/vnd.google-apps.spreadsheet"";\n static final String DEFAULT_OAUTH2_CLIENT_ID =\n",27,,,,,


In [98]:
def get_responses_from_api(df):
    
    # This will be changed for our refact trained model. 
    url = f"https://api-inference.huggingface.co/models/codellama/CodeLlama-7b-hf"
    token="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"
    headers = {
        "Authorization": f"Bearer {token}"
    }
    
    prompt_columns = [col for col in df.columns if re.match(r'^prompt\d+[a-z]?$', col)]

    for column_name in prompt_columns: 
        responses = []
        for prompt in df[column_name]:
            # It is important to later learn what other parameter can be passed with the JSON. 
            # Specifically, if we can generate multiple responces per POST, that will help with calculating pass@k. 
            data = {
                "inputs": prompt,
            }

            response = requests.post(url, headers=headers, json=data)

            if response.status_code == 200:
                generated_text = response.json()[0].get('generated_text', '')
                responses.append(generated_text)
            else:
                print(f"Error {response.status_code}: {response.text}")
                responses.append(None)  

        column_name = column_name + '_response'
        df[column_name] = responses
        
    return df

In [78]:
get_responses_from_api(eval_df)

['<PRE> /*\n * SnapLogic - Data Integration\n *\n * Copyright (C) 2015, SnapLogic, Inc.  All rights reserved.\n *\n * This program is licensed under the terms of\n * the SnapLogic Commercial Subscription agreement.\n *\n * "SnapLogic" is a trademark of SnapLogic, Inc.\n */\n\npackage com.snaplogic.snaps.google.spreadsheet;\n\nimport com.google.api.services.drive.model.File;\nimport com.google.api.services.drive.model.FileList;\nimport com.google.api.services.sheets.v4.model.Sheet;\nimport com.google.api.services.sheets.v4.model.SheetProperties;\nimport com.google.common.collect.ImmutableSet;\nimport com.snaplogic.common.SnapType;\nimport com.snaplogic.common.properties.SnapProperty;\nimport com.snaplogic.common.properties.builders.PropertyBuilder;\nimport com.snaplogic.snap.api.Document;\nimport com.snaplogic.snap.api.ExpressionProperty;\nimport com.snaplogic.snap.api.PropertyValues;\nimport com.snaplogic.snap.api.SnapCategory;\nimport com.snaplogic.snap.api.SnapDataException;\nimport 

### The current prompts are too large for the hg hosted codellama7b. Will consider prompt max for our implementation and whether we will need to reduce prompt size. Fow now, will practice with simple prompts from an example df. 

In [80]:
#Later replace this test df with the actual df produced from parsing the java files. 
data = {
    'prompt1': ['''def remove_non_ascii(s: str) -> str:
    """ <FILL>
    return result
''',
'<PRE> def remove_non_ascii(s: str) -> str:\n""" <SUF>return result <MID>',
'''class InterfaceManagerFactory(AbstractManagerFactory):
    def __init__(<FILL>
def main():
    factory = InterfaceManagerFactory(start=datetime.now())
    managers = []
    for i in range(10):
        managers.append(factory.build(id=i))
''',
'<PRE> class InterfaceManagerFactory(AbstractManagerFactory):\ndef __init__( <SUF>def main():\nfactory = InterfaceManagerFactory(start=datetime.now())\nmanagers = []\nfor i in range(10):\nmanagers.append(factory.build(id=i)) <MID>',
'''def remove_non_ascii(s: str) -> str:
    """ Remove non-ASCII characters from a string.
    <FILL>
    return result''',
'<PRE> def remove_non_ascii(s: str) -> str:\n""" Remove non-ASCII characters from a string. <SUF>return result <MID>'],
    'prompt2': ['Another day,', 'In a faraway land', 'Hello, world!', 'Once upon a time', 'My favorite thing to do is', 'I love runnning because']
}
example_df = pd.DataFrame(data)
example_df

Unnamed: 0,prompt1,prompt2
0,"def remove_non_ascii(s: str) -> str:\n """""" <FILL>\n return result\n","Another day,"
1,"<PRE> def remove_non_ascii(s: str) -> str:\n"""""" <SUF>return result <MID>",In a faraway land
2,class InterfaceManagerFactory(AbstractManagerFactory):\n def __init__(<FILL>\ndef main():\n factory = InterfaceManagerFactory(start=datetime.now())\n managers = []\n for i in range(10):\n managers.append(factory.build(id=i))\n,"Hello, world!"
3,<PRE> class InterfaceManagerFactory(AbstractManagerFactory):\ndef __init__( <SUF>def main():\nfactory = InterfaceManagerFactory(start=datetime.now())\nmanagers = []\nfor i in range(10):\nmanagers.append(factory.build(id=i)) <MID>,Once upon a time
4,"def remove_non_ascii(s: str) -> str:\n """""" Remove non-ASCII characters from a string.\n <FILL>\n return result",My favorite thing to do is
5,"<PRE> def remove_non_ascii(s: str) -> str:\n"""""" Remove non-ASCII characters from a string. <SUF>return result <MID>",I love runnning because


In [99]:
new_df = get_responses_from_api(example_df)
new_df

Unnamed: 0,prompt1,prompt2,prompt1_response,prompt2_response
0,"def remove_non_ascii(s: str) -> str:\n """""" <FILL>\n return result\n","Another day,","def remove_non_ascii(s: str) -> str:\n """""" <FILL>\n return result\n """"""\n return s\n\n\ndef remove_non_ascii(s: str","Another day,another $100,000 in the bank.\nI'm not sure what"
1,"<PRE> def remove_non_ascii(s: str) -> str:\n"""""" <SUF>return result <MID>",In a faraway land,"<PRE> def remove_non_ascii(s: str) -> str:\n"""""" <SUF>return result <MID>\nRemove non-ASCII characters from a string.\n\nArgs:\n s (str","In a faraway land, a young man named Kai is sent to the city of Kai-Tok to learn"
2,class InterfaceManagerFactory(AbstractManagerFactory):\n def __init__(<FILL>\ndef main():\n factory = InterfaceManagerFactory(start=datetime.now())\n managers = []\n for i in range(10):\n managers.append(factory.build(id=i))\n,"Hello, world!",class InterfaceManagerFactory(AbstractManagerFactory):\n def __init__(<FILL>\ndef main():\n factory = InterfaceManagerFactory(start=datetime.now())\n managers = []\n for i in range(10):\n managers.append(factory.build(id=i))\n for m in managers:\n m.start()\n for m in managers,"Hello, world!\n\nThis is a test of the emergency broadcast system.\n\nThis is a test of"
3,<PRE> class InterfaceManagerFactory(AbstractManagerFactory):\ndef __init__( <SUF>def main():\nfactory = InterfaceManagerFactory(start=datetime.now())\nmanagers = []\nfor i in range(10):\nmanagers.append(factory.build(id=i)) <MID>,Once upon a time,"<PRE> class InterfaceManagerFactory(AbstractManagerFactory):\ndef __init__( <SUF>def main():\nfactory = InterfaceManagerFactory(start=datetime.now())\nmanagers = []\nfor i in range(10):\nmanagers.append(factory.build(id=i)) <MID>self, start=None):\nself.start = start\ndef build(self, id):","Once upon a time, there was a man who was a very good man.\nHe was a very good man,"
4,"def remove_non_ascii(s: str) -> str:\n """""" Remove non-ASCII characters from a string.\n <FILL>\n return result",My favorite thing to do is,"def remove_non_ascii(s: str) -> str:\n """""" Remove non-ASCII characters from a string.\n <FILL>\n return result\n """"""\n result = """"\n for c in s:\n if ord(",My favorite thing to do isto go to the beach. I love the beach. I love the ocean. I love the water
5,"<PRE> def remove_non_ascii(s: str) -> str:\n"""""" Remove non-ASCII characters from a string. <SUF>return result <MID>",I love runnning because,"<PRE> def remove_non_ascii(s: str) -> str:\n"""""" Remove non-ASCII characters from a string. <SUF>return result <MID>\n\n Args:\n s (str): The string to remove non-ASCII",I love runnning becauseit is a great way to get exercise and it is a great way to get out of the house


In [102]:
#For ALL prompt types, response needs to be combined with prefix and sufix (if there is one) so that we can do unit test on the snap. 
#Are unit tests going to be found and matched via name? Is it going to be at the snap level or the lines level? 
#Do we want to parse out a list of methods/classes from the training set and see how many of them we detect in the responses? 
#Does the response need to get evaluated against the actual label? How would we do this comparation? 

def rearrange_text(text):
    pattern = r'<PRE>(.*?)<SUF>(.*?)<MID>(.*)'
    match = re.search(pattern, text)
    if match:
        return match.group(1) + match.group(3) + match.group(2)
    return text

df['processed_column'] = df['column_name'].apply(rearrange_text)

def evaluate_responses(df):
    
    #skeleton for the infilling prompt case
    
    #might need to edit previous code so that infill responses have nameing convention that seperates them from completion responses
    for row in df[file_path,infill_prompt_response]:
        
        #How do we match unit test to prompt. Do we use the filename from the file_path column? 
        #unit_test_path = 

        processed_response = rearrange_text(infill_prompt_response)

        #run unit test on processed response 
        #collect result
        

SyntaxError: incomplete input (2005396673.py, line 1)