In [46]:
import pandas as pd
import subprocess


In [47]:
def get_elements_by_pattern(elements: list[str], beginning, end) -> list[str]:
    orig_query_line = list(map(
        lambda s: s[s.index(beginning) + len(beginning) : s.index(end, s.index(beginning))],
        filter(
            lambda s: beginning in s and end in s and s.index(beginning) < s.index(end, s.index(beginning)),
            elements
        )
    ))
    
    if len(orig_query_line) < 1:
        return ["nonono"]
    # make sure exactly one output of this type can be found
    return orig_query_line

def save_analysis(output):
    analysis = pd.DataFrame()
    results = list(filter(lambda x: "[ANALYSIS]" in x, output.split("\n")))
    analysis["query number"] = get_elements_by_pattern(results, '[ANALYSIS] QUERY PATH: "/workspaces/reducer/queries/query', '/original_test.sql"[END ANALYSIS]')

    analysis["original"] = get_elements_by_pattern(results, '[ANALYSIS] ORIGINAL QUERY: "', '"[END ANALYSIS]')
    analysis["delta debugged"] = get_elements_by_pattern(results, '[ANALYSIS] AFTER DELTA DEBUGGING: ', '[END ANALYSIS]')
    #analysis["reduced"] = []

    analysis["#statements original"] = analysis["original"].apply(lambda x: x.count(";"))
    analysis["#statements delta debugged"] = analysis["delta debugged"].apply(lambda x: x.count(";"))
    analysis["#tokens original"] = analysis["original"].apply(lambda s: len(s.split(" ")))

    analysis["DIFF orig-delta"] = analysis["#statements original"] - analysis["#statements delta debugged"]
    return analysis
    

In [48]:
def build_and_prepare():
    cmd = """
    export CARGO_TARGET_DIR=/target
    export RUST_LOG=info
    cargo build
    """
    result = subprocess.run(cmd, shell=True, capture_output=True, text=True, executable="/bin/bash")
    print(result)

def run_multiple_queries(start=1, stop=21, step=1):
    outputs = []
    for i in range(start, stop, step):
        outputs.append(run_single_query(i))
    return outputs

def run_single_query(query_number):
    cmd = f"""
    cd ../..
    export CARGO_TARGET_DIR=/target
    export RUST_LOG=warn
    cargo build
    cargo run --package reducer --bin reducer -- --query queries/query{query_number}/original_test.sql --test src/resources/native.sh
 
    """
    result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
    output = result.stdout + result.stderr  # Capture both stdout and stderr if needed
    return output

In [53]:
build_and_prepare()
output = run_multiple_queries()



In [54]:
results = pd.DataFrame()
if isinstance(output, str):
    results = save_analysis(output)
else:
    for out in output:
        results = pd.concat([results, save_analysis(out)], ignore_index=True)
results

Unnamed: 0,query number,original,delta debugged,#statements original,#statements delta debugged,#tokens original,DIFF orig-delta
0,1,CREATE TABLE F (p BOOLEAN NOT NULL NULL NOT NU...,CREATE TABLE F (p BOOLEAN NOT NULL NULL NOT NU...,2,2,77,0
1,2,CREATE TABLE IF NOT EXISTS t_DX44 (c_LGUf NUME...,CREATE TABLE IF NOT EXISTS t_DX44 (c_LGUf NUME...,18,5,283,13
2,3,"CREATE TABLE table_0 (table_0_c0 TEXT, table_...",nonono,117,0,1116,117
3,4,CREATE TABLE t0 ( col0 text DEFAULT 'default...,"CREATE TABLE t1 (col0 TEXT, col1 INT, col2 TEX...",42,9,377,33
4,5,"CREATE TABLE biq (ype , ucn , ynu ); CREATE...","CREATE TABLE biq (ype, ucn, ynu); INSERT OR RE...",5,3,67,2
5,6,"CREATE TABLE t0 ( c0 BOOLEAN, c1 INT...","CREATE TABLE t0 (c0 BOOLEAN, c1 INTEGER, c2 BO...",16,7,4058,9
6,7,"CREATE TABLE t0 (c0, c1); CREATE TABLE t1 (c0...",nonono,32,0,394,32
7,8,"CREATE TABLE tbl_wqiwo (icol_dhwup INTEGER, rc...",nonono,41,0,1026,41
8,9,CREATE TABLE t_b8TmXJYj7 (c_NiVEveUN3c NUMERIC...,nonono,30,0,912,30
9,10,CREATE TABLE IF NOT EXISTS t_N96 (c_HoR4r6 REA...,CREATE TABLE IF NOT EXISTS t_N96 (c_HoR4r6 REA...,8,6,160,2


In [None]:
results[original]