In [91]:
import pandas as pd
import subprocess


In [92]:
def get_elements_by_pattern(elements: list[str], beginning, end) -> list[str]:
    orig_query_line = list(map(
        lambda s: s[s.index(beginning) + len(beginning) : s.index(end, s.index(beginning))],
        filter(
            lambda s: beginning in s and end in s and s.index(beginning) < s.index(end, s.index(beginning)),
            elements
        )
    ))
    
    if len(orig_query_line) < 1:
        return elements[-1].split('[ANALYSIS] ast: ', 1)[1]
    # make sure exactly one output of this type can be found
    return orig_query_line



def save_analysis(output):
    analysis = pd.DataFrame()
    results = list(filter(lambda x: "[ANALYSIS]" in x, output.split("\n")))
    analysis["query number"] = get_elements_by_pattern(results, '[ANALYSIS] QUERY PATH: "/workspaces/reducer/queries/query', '/original_test.sql"[END ANALYSIS]')

    analysis["original"] = get_elements_by_pattern(results, '[ANALYSIS] ORIGINAL QUERY: "', '"[END ANALYSIS]')
    analysis["delta debugged"] = get_elements_by_pattern(results, '[ANALYSIS] AFTER DELTA DEBUGGING: ', '[END ANALYSIS]')
    #analysis["reduced"] = []

    analysis["#statements original"] = analysis["original"].apply(lambda x: x.count(";"))
    analysis["#statements delta debugged"] = analysis["delta debugged"].apply(lambda x: x.count(";"))
    analysis["#tokens original"] = analysis["original"].apply(lambda s: len(s.split(" ")))

    analysis["DIFF orig-delta"] = analysis["#statements original"] - analysis["#statements delta debugged"]
    return analysis
    

In [93]:
def build_and_prepare():
    cmd = """
    export CARGO_TARGET_DIR=/target
    export RUST_LOG=info
    cargo build
    """
    result = subprocess.run(cmd, shell=True, capture_output=True, text=True, executable="/bin/bash")
    print(result)

def run_multiple_queries(start=1, stop=21, step=1):
    outputs = []
    for i in range(start, stop, step):
        outputs.append(run_single_query(i))
    return outputs

def run_single_query(query_number):
    cmd = f"""
    cd ../..
    export CARGO_TARGET_DIR=/target
    export RUST_LOG=warn
    cargo build
    cargo run --package reducer --bin reducer -- --query queries/query{query_number}/original_test.sql --test src/resources/native.sh
 
    """
    result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
    output = result.stdout + result.stderr  # Capture both stdout and stderr if needed
    return output

In [94]:
build_and_prepare()
output = run_multiple_queries()



In [95]:
results = pd.DataFrame()
if isinstance(output, str):
    results = save_analysis(output)
else:
    for out in output:
        results = pd.concat([results, save_analysis(out)], ignore_index=True)
results

Unnamed: 0,query number,original,delta debugged,#statements original,#statements delta debugged,#tokens original,DIFF orig-delta
0,1,CREATE TABLE F (p BOOLEAN NOT NULL NULL NOT NU...,CREATE TABLE F (p BOOLEAN NOT NULL NULL NOT NU...,2,2,73,0
1,2,CREATE TABLE IF NOT EXISTS t_DX44 (c_LGUf NUME...,CREATE TABLE IF NOT EXISTS t_DX44 (c_LGUf NUME...,18,5,249,13
2,3,"CREATE TABLE table_0 (table_0_c0 TEXT, table_...","Err(ParserError(""Expected: ',' or ')' after co...",117,0,915,117
3,4,CREATE TABLE t0 ( col0 text DEFAULT 'default...,"CREATE TABLE t1 (col0 TEXT, col1 INT, col2 TEX...",42,9,355,33
4,5,"CREATE TABLE biq (ype , ucn , ynu );\nCREATE...","CREATE TABLE biq (ype, ucn, ynu); INSERT OR RE...",5,3,59,2
5,6,"CREATE TABLE t0 (\n c0 BOOLEAN,\n c1 INT...","CREATE TABLE t0 (c0 BOOLEAN, c1 INTEGER, c2 BO...",16,7,3384,9
6,7,"CREATE TABLE t0 (c0, c1);\nCREATE TABLE t1 (c0...","Err(ParserError(""Expected: joined table, found...",32,0,332,32
7,8,"CREATE TABLE tbl_wqiwo (icol_dhwup INTEGER, rc...","Err(ParserError(""Expected: AS, found: GLOB at ...",41,0,950,41
8,9,CREATE TABLE t_b8TmXJYj7 (c_NiVEveUN3c NUMERIC...,"Err(ParserError(""Expected: SELECT, VALUES, or ...",30,0,756,30
9,10,CREATE TABLE IF NOT EXISTS t_N96 (c_HoR4r6 REA...,CREATE TABLE IF NOT EXISTS t_N96 (c_HoR4r6 REA...,8,8,146,0


In [96]:
results["delta debugged"]

0     CREATE TABLE F (p BOOLEAN NOT NULL NULL NOT NU...
1     CREATE TABLE IF NOT EXISTS t_DX44 (c_LGUf NUME...
2     Err(ParserError("Expected: ',' or ')' after co...
3     CREATE TABLE t1 (col0 TEXT, col1 INT, col2 TEX...
4     CREATE TABLE biq (ype, ucn, ynu); INSERT OR RE...
5     CREATE TABLE t0 (c0 BOOLEAN, c1 INTEGER, c2 BO...
6     Err(ParserError("Expected: joined table, found...
7     Err(ParserError("Expected: AS, found: GLOB at ...
8     Err(ParserError("Expected: SELECT, VALUES, or ...
9     CREATE TABLE IF NOT EXISTS t_N96 (c_HoR4r6 REA...
10    CREATE TABLE V (l BOOLEAN UNIQUE UNIQUE, q BOO...
11    Err(ParserError("Expected: ',' or ')' after co...
12    CREATE TABLE T1 (c1 JSON, c2 INT, c3 REAL); CR...
13    Err(ParserError("Expected: ',' or ')' after co...
14    CREATE TABLE T1 (c1 REAL, c2 JSON, c3 REAL NOT...
15    Err(ParserError("Expected: ), found: ON at Lin...
16    Err(ParserError("Expected: an SQL statement, f...
17    CREATE TABLE t0 (c0 TEXT, c1 TEXT, c2 BOOL