In [4]:
import subprocess
import tempfile
import os
import pandas as pd
import time
import glob
import concurrent.futures
from typing import Tuple


DEFAULT_DATASET = "../datasets/improvement_pairs_additional_metadata.csv"
PUBLIC_TEST_CASES_FOLDER = "../datasets/codenet/public_test_cases/"
HIDDEN_TEST_CASES_FOLDER = "../datasets/codenet2/generated_test_cases/"
MAX_TIMEOUT = 2

In [8]:
def run_cpp_code_with_file_input(code: str, input_file_path: str) -> Tuple[str, float]:
    # Create a temporary directory to hold the C++ file and executable
    with tempfile.TemporaryDirectory() as temp_dir:
        cpp_file_path = os.path.join(temp_dir, "code.cpp")
        executable_path = os.path.join(temp_dir, "code")
        
        # Write the C++ code to a file
        with open(cpp_file_path, "w") as cpp_file:
            cpp_file.write(code)
        
        # Compile the C++ code
        compile_process = subprocess.run(["g++", cpp_file_path, "-o", executable_path], capture_output=True, text=True)
        if compile_process.returncode != 0:
            # Compilation failed
            return f"Compilation Error", -1, ""
        
        # Run the compiled executable with input redirected from the input file
        try:
            start_time = time.time()
            with open(input_file_path, 'r') as input_file:
                run_process = subprocess.run(executable_path, stdin=input_file, capture_output=True, text=True, universal_newlines=True, timeout=MAX_TIMEOUT)
                if run_process.returncode != 0:
                    # Runtime error
                    return "Runtime Error", -1, ""
            end_time = time.time()
            return "Accepted", (end_time - start_time), run_process.stdout
        except subprocess.TimeoutExpired:
            return "Time Limit Exceeded", MAX_TIMEOUT, ""

def eval_output(output: str, expected_output_file: str) -> bool:
    with open(expected_output_file, 'r') as expected_file:
        expected_output = expected_file.read()
        return output.strip() == expected_output.strip()
    
def run_single_test_case(code, input_file):
    expected_output_file = input_file.replace('input', 'output')
    verdict, runtime, actual_output = run_cpp_code_with_file_input(code, input_file)
    if verdict != "Accepted":
        return verdict, runtime, input_file
    elif not eval_output(actual_output, expected_output_file):
        return "Wrong Answer", runtime, input_file
    return "Accepted", runtime, input_file
    
def run_tcs(code: str, problem_id: int) -> bool:
    sample_output_folder = f"{PUBLIC_TEST_CASES_FOLDER}p{problem_id:05d}"
    hidden_output_folder = f"{HIDDEN_TEST_CASES_FOLDER}p{problem_id:05d}"
    start_time = time.time()
    folders = [sample_output_folder, hidden_output_folder]
    test_cases = []
    execution_time = 0

    for folder in folders:
        input_files = glob.glob(os.path.join(folder, "input.*.txt"))
        for input_file in input_files:
            test_cases.append((code, input_file))

    with concurrent.futures.ThreadPoolExecutor() as executor:
        results = executor.map(lambda p: run_single_test_case(*p), test_cases)
    
    for verdict, runtime, input_file in results:
        if verdict != "Accepted":
            print(f'Failed on test case {input_file}')
            return verdict, 2 if verdict == "Time Limit Exceeded" else -1
        execution_time += runtime
            
    end_time = time.time()
    print(f"time to run all test cases: {end_time - start_time:.2f} seconds")
    return "Accepted", execution_time / len(test_cases)

In [6]:
def load_dataset(dataset=DEFAULT_DATASET):
    df = pd.read_csv(dataset, sep="\t")
    return df

df = load_dataset()

In [12]:
if __name__ == "__main__":
    sample_code = df.at[3, 'code_v0']
    print(run_tcs(sample_code, 849))

Failed on test case ../datasets/codenet/public_test_cases/p00849/input.1.txt
('Compilation Error', -1)


In [18]:
df.head(50)

Unnamed: 0,user_id,problem_id,language,submission_id_v0,submission_id_v1,cpu_time_v0,cpu_time_v1,memory_v0,memory_v1,status_v0,status_v1,improvement_frac,code_v0,code_v1,code_v0_loc,code_v1_loc
0,u116766943,p00849,C++,s624998539,s546809015,9370.0,9250.0,1232.0,1228.0,Time Limit Exceeded,Time Limit Exceeded,1.28,"#include<bits/stdc++.h>\n\n\n\n#define REP(i,s...","#include<bits/stdc++.h>\n\n\n\n#define REP(i,s...",122.0,127.0
1,u116766943,p00849,C++,s546809015,s829771916,9250.0,8780.0,1228.0,1228.0,Time Limit Exceeded,Time Limit Exceeded,5.08,"#include<bits/stdc++.h>\n\n\n\n#define REP(i,s...","#include<bits/stdc++.h>\n\n\n\n#define REP(i,s...",127.0,125.0
2,u116766943,p00849,C++,s812631681,s154463767,8700.0,8030.0,1232.0,1232.0,Time Limit Exceeded,Time Limit Exceeded,7.7,"#include<bits/stdc++.h>\n\n\n\n#define REP(i,s...","#include<bits/stdc++.h>\n\n\n\n#define REP(i,s...",144.0,144.0
3,u116766943,p00849,C++,s363969251,s061168313,8500.0,920.0,1252.0,12164.0,Time Limit Exceeded,Accepted,89.18,"#include<bits/stdc++.h>\n\n\n\n#define REP(i,s...","#include<bits/stdc++.h>\n\n\n\n#define REP(i,s...",145.0,214.0
4,u116766943,p00849,C++,s061168313,s755775167,920.0,420.0,12164.0,9780.0,Accepted,Accepted,54.35,"#include<bits/stdc++.h>\n\n\n\n#define REP(i,s...","#include<bits/stdc++.h>\n\n\n\n#define REP(i,s...",214.0,178.0
5,u116766943,p00849,C++,s755775167,s221324014,420.0,410.0,9780.0,9780.0,Accepted,Accepted,2.38,"#include<bits/stdc++.h>\n\n\n\n#define REP(i,s...","#include<bits/stdc++.h>\n\n\n\n#define REP(i,s...",178.0,179.0
6,u157643087,p00849,C++,s939749011,s752272632,9990.0,8420.0,872.0,872.0,Time Limit Exceeded,Time Limit Exceeded,15.72,#include<iostream>\n\n#include<algorithm>\n\nu...,#include<iostream>\n\n#include<algorithm>\n\nu...,273.0,259.0
7,u157643087,p00849,C++,s752272632,s870368250,8420.0,7350.0,872.0,868.0,Time Limit Exceeded,Accepted,12.71,#include<iostream>\n\n#include<algorithm>\n\nu...,#include<iostream>\n\n#include<algorithm>\n\nu...,259.0,259.0
8,u157643087,p00849,C++,s870368250,s545062583,7350.0,2930.0,868.0,868.0,Accepted,Accepted,60.14,#include<iostream>\n\n#include<algorithm>\n\nu...,#include<iostream>\n\n#include<algorithm>\n\nu...,259.0,211.0
9,u157643087,p00849,C++,s476552923,s019272801,2930.0,2850.0,872.0,872.0,Accepted,Accepted,2.73,#include<iostream>\n\n#include<algorithm>\n\nu...,#include<iostream>\n\n#include<algorithm>\n\nu...,211.0,198.0
