In [119]:
import os
import random
import re
from glob import glob

import dotenv
from langchain.chat_models import ChatOpenAI
from tqdm import tqdm

dotenv.load_dotenv()

open_ai_key = os.getenv("OPENAI_API_KEY")
llm = ChatOpenAI(openai_api_key=open_ai_key, model_name="gpt-3.5-turbo")

# TODO change to PATH object 
generated_code_dir = 'generated_code/gpt35'


In [129]:
def filter_comment(code):
    regex = r"\"\"\".*\"\"\""
    matches = re.finditer(regex, code, re.MULTILINE | re.DOTALL)

    for match in matches:
        start = match.start()
        end = match.end()
        comment = code[start:end]
        code = code.replace(comment, '')

    code = code.strip()
    return code


def generate_fixed_code(code):
    prompt = (
        'Please fix the code above:\n'
        f'{code}\n'
        'The fixed code:'
    )

    llm_response = llm.invoke(prompt, timeout=20)

    return llm_response.content



In [85]:
all_code = []

all_code_file = glob('datasets/QuixBugs/python_programs/*.py')
# filter the test code
all_code_file = [x for x in all_code_file if not re.match(r'.*test.py', x) and not 'node' in x]
all_code_file = sorted(all_code_file)

index = random.randint(0, len(all_code_file) - 1)
code_file_name = all_code_file[index]

with open(code_file_name, 'r') as f:
    code = f.read()

code = filter_comment(code)
print(code)


def knapsack(capacity, items):
    from collections import defaultdict
    memo = defaultdict(int)

    for i in range(1, len(items) + 1):
        weight, value = items[i - 1]

        for j in range(1, capacity + 1):
            memo[i, j] = memo[i - 1, j]

            if weight < j:
                memo[i, j] = max(
                    memo[i, j],
                    value + memo[i - 1, j - weight]
                )

    return memo[len(items), capacity]


In [130]:
dir_done = os.listdir(generated_code_dir)
dir_done = sorted(dir_done)

last_index = all_code_file.index(f'datasets/QuixBugs/python_programs/{dir_done[-1]}.py')

for i in tqdm(range(last_index, len(all_code_file))):
    code_file_name = all_code_file[i]
    
    with open(code_file_name, 'r') as f:
        code = f.read()
    code = filter_comment(code)
    function_name = code_file_name.split('/')[-1][:-3]

    # TODO 'generated_code/gpt35' to variable
    if not os.path.isdir(f'{generated_code_dir}/{function_name}'):
        os.mkdir(f'{generated_code_dir}/{function_name}')
        
    with open(f'{generated_code_dir}/{function_name}/{function_name}_input.py', 'w') as f:
        f.write(code)
    
    for j in range(4):
        fixed_code = generate_fixed_code(code)

        generated_file_path = f'{generated_code_dir}/{function_name}/{function_name}_{j + 1}.py'

        if not os.path.isdir(f'{generated_code_dir}/{function_name}'):
            os.mkdir(f'{generated_code_dir}/{function_name}')

        with open(generated_file_path, 'w') as f:
            f.write(fixed_code)


100%|██████████| 36/36 [08:11<00:00, 13.64s/it]
