# Setup

In [12]:
%reload_ext autoreload
%autoreload 2

In [13]:
import os
import sys
sys.path.append(".bin")

import logging
logging.basicConfig(level=logging.INFO)

FILTERED_DIR = "data/filtered_code_contest_data"
CODE_CONTEST_DATA_PATH = "data/code_contest_data/"
PROMPTED_DIR = "data/patched_solutions"
PATCHED_EVAL_RESULTS_PATH = "data/patched_eval_results"
BASE_EVAL_RESULTS_PATH = "data/eval_results"
OPENAI_CONFIG_PATH = ".env.secret"

os.makedirs(BASE_EVAL_RESULTS_PATH, exist_ok=True)
os.makedirs(FILTERED_DIR, exist_ok=True)
os.makedirs(PROMPTED_DIR, exist_ok=True)
os.makedirs(PATCHED_EVAL_RESULTS_PATH, exist_ok=True)
os.makedirs(CODE_CONTEST_DATA_PATH, exist_ok=True)


from llm_handler.openai_handler import OpenAIHandler as openai_handler
openai_handler.set_openai_api_key(OPENAI_CONFIG_PATH)

# Creating Contest Problem Set

In [None]:
import dask
dask.config.set({'dataframe.query-planning': True})
import dask.dataframe as dd

from domain.problems_d import ContestProblemSetD

df = dd.read_parquet("code_contests/data/*.parquet").map_partitions(
    lambda x: ContestProblemSetD.compressed_from_df(x),
    meta={}
    ).compute()

In [None]:
for i, problem_set in enumerate(df):
    f_name = f"{CODE_CONTEST_DATA_PATH}/chunk_{i}.bin"
    with open(f_name, "wb") as f:
        f.write(problem_set)

# Filtering Down Problem Set

In [14]:
from typing import List

from domain.domain_dao import CompressedDomainFileDAO
from domain.problems_d import ContestProblemSetD

reader = CompressedDomainFileDAO(CODE_CONTEST_DATA_PATH, ContestProblemSetD)
problem_sets: List[ContestProblemSetD] = []
for problem_set in reader.read():
    problem_sets.append(problem_set)

In [20]:
import dataclasses

from domain.domain_dao import CompressedDomainFileDAO
from domain.problems_d import ContestProblemSetD

compressed_dao = CompressedDomainFileDAO(FILTERED_DIR, ContestProblemSetD)
compressed_dao.clear_cache()
filtered_problem_sets = []
for problem_set in problem_sets:
    filtered_problems = []
    for problem in problem_set.problems[:5]:
        filtered_problem = dataclasses.replace(
            problem,
            solutions=problem.solutions[:5],
            public_tests=problem.public_tests[:5] + problem.private_tests[:5],
            incorrect_solutions=problem.incorrect_solutions[:5])
        filtered_problems.append(filtered_problem)
    filtered_problem_set = dataclasses.replace(problem_set, problems=filtered_problems)
    filtered_problem_sets.append(filtered_problem_set)

num_inc_sol = sum(
    len(problem.incorrect_solutions) 
    for filtered_problem_set in filtered_problem_sets
    for problem in filtered_problem_set.problems)    
num_pub_tests = sum(len(problem.public_tests) 
                    for filtered_problem_set in filtered_problem_sets
    for problem in filtered_problem_set.problems)
print(f"Filtered {len(filtered_problem_sets)} problem sets, {num_inc_sol} incorrect solutions, and {num_pub_tests} public tests")

compressed_dao.write(filtered_problem_sets)    
       

Filtered 41 problem sets, 946 incorrect solutions, and 1070 public tests


# Generating Patched Solutions

In [22]:
from domain.domain_dao import CompressedDomainFileDAO
from domain.problems_d import ContestProblemSetD

reader = CompressedDomainFileDAO(FILTERED_DIR, ContestProblemSetD)
patched_problem_sets = list(reader.read())

In [23]:

num_inc_sol = sum(
    len(problem.incorrect_solutions) 
    for problem_set in patched_problem_sets
    for problem in problem_set.problems
    )    
num_pub_tests = sum(len(problem.public_tests) 
    for problem_set in patched_problem_sets
    for problem in problem_set.problems)
print(f"Filtered {len(patched_problem_sets)} problem sets, {num_inc_sol} incorrect solutions, and {num_pub_tests} public tests")


Filtered 41 problem sets, 946 incorrect solutions, and 1070 public tests


In [46]:
import logging
logging.basicConfig(level=logging.INFO)


from code_patching.prompts import PROMPTS
from code_patching.solution_generator import generate_prompted_dataset
from domain.domain_dao import CompressedDomainFileDAO
from domain.problems_d import PatchedSolutionSetD
import proto.patched_solutions_pb2 as ps_pb2


MODELS = [ps_pb2.MODEL_TYPE_GPT_4_TURBO, ps_pb2.MODEL_TYPE_GPT_3_5_TURBO]
prompted_dao = CompressedDomainFileDAO(PROMPTED_DIR, PatchedSolutionSetD)
DRY_RUN = False
GEN_SOLUTIONS_MAX_WORKERS = 100
GEN_SOLUTIONS_BATCH_SIZE = 100

In [47]:
import logging
logging.basicConfig(level=logging.WARNING)


generated_solution_sets = list(
    generate_prompted_dataset(
        contest_problems=patched_problem_sets,
        model_types=MODELS,
        prompts=PROMPTS,
        max_workers=GEN_SOLUTIONS_MAX_WORKERS,
        result_batch_size=GEN_SOLUTIONS_BATCH_SIZE,
        domain_reader=prompted_dao,
        dry_run=DRY_RUN))

Solutions: 0it [00:00, ?it/s]


# Running Patched Evaluation

In [49]:
import logging
logging.basicConfig(level=logging.INFO)
from collections import defaultdict

from domain.domain_dao import CompressedDomainFileDAO
from domain.problems_d import PatchedSolutionSetD, ContestProblemSetD

problem_test_cases = defaultdict(list)
filtered_problems = CompressedDomainFileDAO(FILTERED_DIR, ContestProblemSetD)
for problem_set in filtered_problems.read():
    for problem in problem_set.problems:
        problem_test_cases[problem.proto_id].extend(problem.public_tests)
       
problem_patched_solutions = defaultdict(list)
prompted_dao = CompressedDomainFileDAO(PROMPTED_DIR, PatchedSolutionSetD)
for patched_solution_set in prompted_dao.read():
    for patched_solution in patched_solution_set.solutions:
        problem_patched_solutions[patched_solution.problem_id].append(patched_solution)

if diff := set(problem_test_cases.keys()).symmetric_difference(set(problem_patched_solutions.keys())):
    raise ValueError(f"Problem ids do not match: {diff}")





In [50]:

import logging
logging.basicConfig(level=logging.INFO)

from code_patching.solution_evaluator import eval_patched_solutions
from domain.domain_dao import CompressedDomainFileDAO
from domain.problems_d import TestResultSetD


test_result_dao = CompressedDomainFileDAO(PATCHED_EVAL_RESULTS_PATH, TestResultSetD)
test_result_sets = list(eval_patched_solutions(
        problem_tests=problem_test_cases,
        patched_solutions=problem_patched_solutions,
        domain_writer=test_result_dao,
        process_batch_size=10,
        batch_size=1000))


Test Evals:  34%|███▎      | 3320/9898 [01:23<00:50, 129.80it/s]

KeyboardInterrupt: 

# Evaluate Correct Solution For Baseline

In [None]:
import logging
logging.basicConfig(level=logging.INFO)
from collections import defaultdict

from domain.domain_dao import CompressedDomainFileDAO
from domain.problems_d import ContestProblemSetD, PatchedSolutionD
import proto.patched_solutions_pb2 as ps_pb2


base_problem_test_cases = defaultdict(list)
base_problem_solutions = defaultdict(list)
filtered_problems = CompressedDomainFileDAO(FILTERED_DIR, ContestProblemSetD)
for problem_set in filtered_problems.read():
    for problem in problem_set.problems:
        base_problem_test_cases[problem.proto_id].extend(problem.public_tests)
        for solution in problem.solutions:        
            base_solution = PatchedSolutionD(
                problem_id=problem.proto_id,
                patched_solution=solution.solution,
                solution_id=solution.proto_id,
                prompt_id="base_solution",
                model=ps_pb2.MODEL_TYPE_UNSPECIFIED,
                patched_response={})
            base_problem_solutions[problem.proto_id].append(base_solution)

In [None]:
import logging
logging.basicConfig(level=logging.INFO)

from code_patching.solution_evaluator import eval_patched_solutions
from domain.domain_dao import CompressedDomainFileDAO
from domain.problems_d import TestResultSetD


test_result_dao = CompressedDomainFileDAO(BASE_EVAL_RESULTS_PATH, TestResultSetD)
test_result_sets = list(eval_patched_solutions(
        problem_tests=base_problem_test_cases,
        patched_solutions=base_problem_solutions,
        domain_writer=test_result_dao,
        process_batch_size=10,
        batch_size=1000))