In [4]:
import subprocess
import tempfile
import os
import pandas as pd
import time
import glob
import concurrent.futures
from typing import Tuple


DEFAULT_DATASET = "~/Downloads/data/improvement_pairs_additional_metadata.csv"
PUBLIC_TEST_CASES_FOLDER = "~/Downloads/codenet/public_test_cases/"
HIDDEN_TEST_CASES_FOLDER = "~/Downloads/codenet2/generated_test_cases/"
MAX_TIMEOUT = 5

In [33]:
def run_python_code_with_file_input(
    code: str, input_file_path: str
) -> Tuple[str, float, str]:
    # Create a temporary directory to hold the Python script
    with tempfile.TemporaryDirectory() as temp_dir:
        python_file_path = os.path.join(temp_dir, "code.py")

        # Write the Python code to a file
        with open(python_file_path, "w") as python_file:
            python_file.write(code)

        # Run the Python script with input redirected from the input file
        try:
            start_time = time.time()
            with open(input_file_path, "r") as input_file:
                run_process = subprocess.run(
                    ["python3", python_file_path],
                    stdin=input_file,
                    capture_output=True,
                    text=True,
                    timeout=MAX_TIMEOUT,
                )
            end_time = time.time()
            if run_process.returncode != 0:
                # Handle runtime errors
                return "Runtime Error", -1, ""
            return "Accepted", (end_time - start_time), run_process.stdout
        except subprocess.TimeoutExpired:
            return "Time Limit Exceeded", MAX_TIMEOUT, ""

def eval_output(output: str, expected_output_file: str) -> bool:
    with open(expected_output_file, "r") as expected_file:
        expected_output = expected_file.read()
        return output.strip() == expected_output.strip()

def run_single_test_case(code: str, input_file: str) -> Tuple[str, float, str]:
    expected_output_file = input_file.replace("input", "output")
    verdict, runtime, actual_output = run_python_code_with_file_input(code, input_file)
    if verdict != "Accepted":
        return verdict, runtime, input_file
    elif not eval_output(actual_output, expected_output_file):
        return "Wrong Answer", runtime, input_file
    return "Accepted", runtime, input_file

def run_tcs(code: str, problem_id: int) -> Tuple[str, float]:
    # Example paths for test cases, these need to be defined or configured
    sample_output_folder = f"{PUBLIC_TEST_CASES_FOLDER}p{problem_id:05d}"
    hidden_output_folder = f"{HIDDEN_TEST_CASES_FOLDER}p{problem_id:05d}"
    start_time = time.time()
    folders = [sample_output_folder, hidden_output_folder]
    test_cases = []
    execution_time = 0

    for folder in folders:
        input_files = glob.glob(os.path.join(os.path.expanduser(folder), "input.*.txt"))
        for input_file in input_files:
            test_cases.append((code, input_file))

    # print('all testcases', folders)
            
    with concurrent.futures.ThreadPoolExecutor() as executor:
        results = executor.map(lambda p: run_single_test_case(*p), test_cases)

    for verdict, runtime, input_file in results:
        if verdict != "Accepted":
            print(f"Failed on test case {input_file}")
            return verdict, 2 if verdict == "Time Limit Exceeded" else -1
        execution_time += runtime

    end_time = time.time()
    print(f"time to run all test cases: {end_time - start_time:.2f} seconds")
    return "Accepted", execution_time / len(test_cases)

In [5]:
def load_dataset(dataset=DEFAULT_DATASET):
    df = pd.read_csv(dataset, sep="\t")
    return df

df = load_dataset()

In [35]:
if __name__ == "__main__":
    sample_code = df.at[17, 'code_v0']
    python_df = df[df['language'] == 'Python']
    print(run_tcs(sample_code, 3352))

all testcases ['~/Downloads/codenet/public_test_cases/p03352', '~/Downloads/codenet2/generated_test_cases/p03352']
Failed on test case /Users/zhuodannychen/Downloads/codenet/public_test_cases/p03352/input.3.txt
('Time Limit Exceeded', 2)


In [8]:
python_df.head(50)

Unnamed: 0,user_id,problem_id,language,submission_id_v0,submission_id_v1,cpu_time_v0,cpu_time_v1,memory_v0,memory_v1,status_v0,status_v1,improvement_frac,code_v0,code_v1,code_v0_loc,code_v1_loc
17,u000040786,p03352,Python,s655497536,s488933938,2206.0,28.0,8992.0,9160.0,Time Limit Exceeded,Accepted,98.73,"x=int(input())\n\nc=1\n\nfor b in range(1,x):\...","x=int(input())\n\nc=1\n\nfor b in range(1,x):\...",7.0,9.0
20,u013408661,p03352,Python,s605669799,s413110219,2109.0,17.0,119576.0,3060.0,Time Limit Exceeded,Accepted,99.19,num=[]\n\nfor i in range(32):\n\n stack=i*i\n...,"num=[1]\n\nfor i in range(2,32):\n\n stack=i*...",13.0,13.0
21,u015418072,p03352,Python,s258169947,s180991297,18.0,17.0,2940.0,2940.0,Accepted,Accepted,5.56,import sys\n\nimport math\n\n\n\ndef input():\...,import sys\n\nimport math\n\n\n\ndef input():\...,12.0,12.0
22,u015993380,p03352,Python,s133750393,s781843229,2106.0,164.0,40752.0,38256.0,Time Limit Exceeded,Accepted,92.21,x = int(input())\n\nvis = [False for _ in rang...,x = int(input())\n\nvis = [False for _ in rang...,12.0,13.0
26,u020373088,p03352,Python,s068946442,s791061381,18.0,17.0,2940.0,2940.0,Accepted,Accepted,5.56,x = int(input())\n\ny = []\n\nfor i in range(1...,x = int(input())\n\ny = []\n\nfor i in range(1...,11.0,7.0
31,u038216098,p03352,Python,s711290106,s787230341,2206.0,28.0,9044.0,9144.0,Time Limit Exceeded,Accepted,98.73,"X=int(input())\n\nres=[]\n\nfor b in range(1,1...","X=int(input())\n\nres=0\n\nfor b in range(1,10...",7.0,9.0
32,u046187684,p03352,Python,s285848029,s595372974,2106.0,179.0,42516.0,38512.0,Time Limit Exceeded,Accepted,91.5,from itertools import product\n\n\n\n\n\ndef s...,"from math import floor, log, sqrt\n\n\n\n\n\nd...",11.0,11.0
33,u046187684,p03352,Python,s595372974,s868533635,179.0,173.0,38512.0,38256.0,Accepted,Accepted,3.35,"from math import floor, log, sqrt\n\n\n\n\n\nd...","from math import ceil, floor, log, sqrt\n\n\n\...",11.0,15.0
34,u046187684,p03352,Python,s868533635,s422164018,173.0,18.0,38256.0,3060.0,Accepted,Accepted,89.6,"from math import ceil, floor, log, sqrt\n\n\n\...","from math import ceil, floor, log, sqrt\n\n\n\...",15.0,15.0
35,u049354454,p03352,Python,s498478248,s219265597,2104.0,17.0,3064.0,3060.0,Time Limit Exceeded,Accepted,99.19,# coding: utf-8\n\n# Your code here!\n\n\n\nIN...,# coding: utf-8\n\n# Your code here!\n\n\n\nfr...,28.0,14.0
