In [137]:
# Setup evaluation suite

yusung_set = [
    "bullet",
    "factorial",
    "ffmpeg",
    "fractals",
    "funky-kart",
    "game-of-life",
    "gotemplate",
    "hnset-bench",
    "jqkungfu",
    "lichess",
    "mandelbrot",
    "ogv",
    "onnxjs",
    "pacalc",
    "playnox",
    "roslyn",
    "rustpython",
    "sandspiel",
    "sqlgui",
    "sqlpractice",
    "takahirox",
    "tic-tac-toe",
    "timestretch",
    "vaporboy",
    "video",
    "waforth",
    "wasmsh",
    "wheel",
]

jakob_set = [
    "boa",
    "commanderkeen",
    "ffmpeg",
    "fib",
    "figma-startpage",
    "funky-kart",
    "game-of-life",
    "guiicons",
    "handy-tools",
    "jsc",
    "kittygame",
    "pathfinding",
    "riconpacker",
    "rtexviewer",
    "sqlgui",
    "video",
    "multiplyInt",
]

union = list(set(yusung_set) | set(jakob_set))
# print intersection of yusung_set and jakob_set
print('intersection: ', list(set(yusung_set) & set(jakob_set)))
# print only in yusung_set
print('only in yusung_set: ', list(set(yusung_set) - set(jakob_set)))
# print only in jakob_set
print('only in jakob_set: ', list(set(jakob_set) - set(yusung_set)))
print('yusung_set: ', len(yusung_set))
print('jakob_set: ', len(jakob_set))
print('union: ', len(union))

intersection:  ['funky-kart', 'ffmpeg', 'game-of-life', 'sqlgui', 'video']
only in yusung_set:  ['waforth', 'sqlpractice', 'bullet', 'vaporboy', 'tic-tac-toe', 'pacalc', 'rustpython', 'onnxjs', 'sandspiel', 'ogv', 'wasmsh', 'lichess', 'timestretch', 'hnset-bench', 'factorial', 'fractals', 'wheel', 'gotemplate', 'playnox', 'mandelbrot', 'roslyn', 'jqkungfu', 'takahirox']
only in jakob_set:  ['boa', 'handy-tools', 'kittygame', 'riconpacker', 'figma-startpage', 'commanderkeen', 'pathfinding', 'multiplyInt', 'guiicons', 'rtexviewer', 'fib', 'jsc']
yusung_set:  28
jakob_set:  17
union:  40


In [138]:
import socket
import subprocess
import os
import concurrent.futures

def find_consecutive_free_ports(start_port, end_port, consecutive_ports):
    for port in range(start_port, end_port):
        for i in range(consecutive_ports):
            with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
                result = sock.connect_ex(('localhost', port + i))
                if result == 0:
                    break
                if i == consecutive_ports - 1:
                    return port
    return None

# Setup artifacts
r3_path = os.getenv('WASMR3_PATH', '~/wasm-r3')
testset = yusung_set
start_port = find_consecutive_free_ports(8080, 65535, len(testset))
timeout = 120 # seconds
custom = True
# parallel takes around 2 minutes, while serial takes around 23 minutes
# parallel for quick experiments and sequential for final results
parallel = False


def run_command(testname, i):
    customcommand = '-c' if custom else ''
    port = start_port + i
    command = f". ~/.bashrc && timeout {timeout}s npm test -- {customcommand} -t {testname} -p {port}"
    result = subprocess.run(command, shell=True, capture_output=True, text=True)
    isNormal = 'o' if result.returncode == 0  else ''
    if not isNormal:
        print(result.args)
        print(result.stderr)
    return [testname, isNormal]

if parallel:
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = [executor.submit(run_command, testname, i) for i, testname in enumerate(testset)]
        results = [future.result() for future in concurrent.futures.as_completed(futures)]
else:
    results = [run_command(testname, i) for i, testname in enumerate(testset)]

results.sort(key=lambda x: x[0])
successful_results = [row for row in results if row[1] == 'o']

. ~/.bashrc && timeout 120s npm test -- -c -t ffmpeg -p 8114

. ~/.bashrc && timeout 120s npm test -- -c -t gotemplate -p 8118

. ~/.bashrc && timeout 120s npm test -- -c -t jqkungfu -p 8120

. ~/.bashrc && timeout 120s npm test -- -c -t lichess -p 8121

. ~/.bashrc && timeout 120s npm test -- -c -t onnxjs -p 8124

. ~/.bashrc && timeout 120s npm test -- -c -t playnox -p 8126

. ~/.bashrc && timeout 120s npm test -- -c -t roslyn -p 8127

. ~/.bashrc && timeout 120s npm test -- -c -t rustpython -p 8128

. ~/.bashrc && timeout 120s npm test -- -c -t sqlpractice -p 8131
thread 'main' panicked at crates/replay_gen/src/main.rs:96:39:
called `Result::unwrap()` on an `Err` value: Os { code: 2, kind: NotFound, message: "No such file or directory" }
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace

. ~/.bashrc && timeout 120s npm test -- -c -t takahirox -p 8132

. ~/.bashrc && timeout 120s npm test -- -c -t timestretch -p 8134

. ~/.bashrc && timeout 120s npm test -

In [139]:
import subprocess

timeout = 120 # seconds
engine_kind = ['wizard-int']
opt_kind = ['noopt', 'split', 'merge', 'custom', 'benchmark'] # custom and benchmark are technically not replay opt though
replay_metrics = {testname: {'wizard-int': {opt: {} for opt in opt_kind}} for testname, _ in successful_results}
# parallel takes around 2 minutes, while serial takes around 5.5 minutes
# parallel for quick experiments and sequential for final results
parallel = False


def get_replay_wasm(testname, opt):
    regex = ''
    match opt:
        case 'noopt':
            regex = 'merge|split|custom|benchmark'
        case 'split':
            regex = 'noopt|merge|custom|benchmark'
        case 'merge':
            regex = 'noopt|split|custom|benchmark'
        case 'custom':
            regex = 'noopt|split|merge|benchmark'
        case 'benchmark':
            regex = 'noopt|split|merge|custom'
        case _:
            exit('invalid op')
    find_command = f"find {r3_path}/tests/online/{testname} -name replay.wasm | grep -vE '{regex}'"
    find_result = subprocess.run(find_command, shell=True, capture_output=True, text=True)
    replay_path = find_result.stdout.strip()
    return replay_path

def run_wizard(testname, engine_kind):
    global replay_metrics
    for opt in opt_kind:
        replay_path = get_replay_wasm(testname, opt)
        command = f". ~/.bashrc && timeout {timeout}s wizeng.x86-64-linux --metrics --monitors=profile {replay_path}"
        result = subprocess.run(command, shell=True, capture_output=True, text=True)
        isNormal = 'o' if result.returncode == 0  else ''
        if not isNormal:
            print(result.args)
            print(result.stderr)
            replay_metrics[testname][engine_kind][opt] = 'N/A'
        else:
            monitor, profile = result.stdout.split("pregen:time_us")
            profile = 'pregen:time_us' + profile
            # Make replay_metrics after "pregen:time_us" a key of some object
            replay_metrics[testname][engine_kind][opt] |= {line.rsplit(":", 1)[0].strip(): line.rsplit(":", 1)[1].strip().replace("μs", "").strip() for line in profile.split("\n") if line}

if parallel:
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = [executor.submit(run_command, testname, i) for i, testname in enumerate(testset)]
        results = [future.result() for future in concurrent.futures.as_completed(futures)]
else:
    for testname, _ in successful_results:
        for kind in engine_kind:
            run_wizard(testname, kind)


. ~/.bashrc && timeout 120s wizeng.x86-64-linux --metrics --monitors=profile 

. ~/.bashrc && timeout 120s wizeng.x86-64-linux --metrics --monitors=profile /home/don/wasm-r3/tests/online/funky-kart/noopt/dbad40-d1f0-4268-8002-6379008e2bca/replay.wasm

. ~/.bashrc && timeout 120s wizeng.x86-64-linux --metrics --monitors=profile 



In [144]:
from tabulate import tabulate
print('RQ1: Applicability')

rq1_results = results
print(tabulate(rq1_results, tablefmt="latex"))

RQ1: Applicability
\begin{tabular}{ll}
\hline
 bullet       & o \\
 factorial    & o \\
 ffmpeg       &   \\
 fractals     & o \\
 funky-kart   & o \\
 game-of-life & o \\
 gotemplate   &   \\
 hnset-bench  & o \\
 jqkungfu     &   \\
 lichess      &   \\
 mandelbrot   & o \\
 ogv          & o \\
 onnxjs       &   \\
 pacalc       & o \\
 playnox      &   \\
 roslyn       &   \\
 rustpython   &   \\
 sandspiel    & o \\
 sqlgui       & o \\
 sqlpractice  &   \\
 takahirox    &   \\
 tic-tac-toe  & o \\
 timestretch  &   \\
 vaporboy     &   \\
 video        & o \\
 waforth      &   \\
 wasmsh       & o \\
 wheel        &   \\
\hline
\end{tabular}


In [145]:
print('RQ2-1: Performance-Record')

print('TODO')

print('RQ2-2: Performance-Replay')

def get_metric(testname, metric):
    return replay_metrics[testname]['wizard-int']['custom'][metric]
rq22_results = [['Test name', 'replay proportion', 'pregen time', 'load time', 'validate time', 'spc time', 'start time', 'main time']] + [[testname, 'TODO', get_metric(testname, 'pregen:time_us'), get_metric(testname, 'load:time_us'), get_metric(testname, 'validate:time_us'), get_metric(testname, 'spc:time_us'), get_metric(testname, 'start:time_us'), get_metric(testname, 'main:time_us')] for testname in replay_metrics if replay_metrics[testname]['wizard-int']['custom'] != 'N/A']
print(tabulate(rq22_results, tablefmt="latex"))

RQ2-1: Performance-Record
TODO
RQ2-2: Performance-Replay
\begin{tabular}{llllllll}
\hline
 Test name    & replay proportion & pregen time & load time & validate time & spc time & start time & main time \\
 bullet       & TODO              & 8           & 11671     & 10378         & 0        & 0          & 5771733   \\
 factorial    & TODO              & 6           & 768       & 662           & 0        & 0          & 68        \\
 fractals     & TODO              & 5           & 39        & 14            & 0        & 0          & 1272      \\
 funky-kart   & TODO              & 7           & 13417     & 9265          & 0        & 0          & 5136387   \\
 game-of-life & TODO              & 6           & 52        & 29            & 0        & 0          & 182       \\
 hnset-bench  & TODO              & 5           & 35        & 14            & 0        & 0          & 4         \\
 mandelbrot   & TODO              & 7           & 1518      & 1215          & 0        & 0          & 303

In [146]:
print('RQ3: Trace Reduction')

print('TODO')

RQ3: Trace Reduction
TODO


In [147]:
print('RQ4: Replay Optimization')

def get_metric(testname, opt, time):
    metric = replay_metrics[testname]['wizard-int'][opt]
    if metric == 'N/A':
        return 0
    else:
        return metric[time]

print('RQ4-1: Load time')
time = 'load:time_us'
rq4_results = [['Test name', 'noopt time', 'split time', 'merge time', 'fullopt time']] + [[testname, get_metric(testname, 'noopt', time), get_metric(testname, 'split', time), get_metric(testname, 'merge', time), get_metric(testname, 'custom', time)] for testname in replay_metrics]
print(tabulate(rq4_results, tablefmt="latex"))

print('RQ4-2: Validate time')
time = 'validate:time_us'
rq4_results = [['Test name', 'noopt time', 'split time', 'merge time', 'fullopt time']] + [[testname, get_metric(testname, 'noopt', time), get_metric(testname, 'split', time), get_metric(testname, 'merge', time), get_metric(testname, 'custom', time)] for testname in replay_metrics]
print(tabulate(rq4_results, tablefmt="latex"))

print('RQ4-3: Main time')
time = 'main:time_us'
rq4_results = [['Test name', 'noopt time', 'split time', 'merge time', 'fullopt time']] + [[testname, get_metric(testname, 'noopt', time), get_metric(testname, 'split', time), get_metric(testname, 'merge', time), get_metric(testname, 'custom', time)] for testname in replay_metrics]
print(tabulate(rq4_results, tablefmt="latex"))

RQ4: Replay Optimization
RQ4-1: Load time
\begin{tabular}{lllll}
\hline
 Test name    & noopt time & split time & merge time & fullopt time \\
 bullet       & 12070      & 12207      & 11648      & 11671        \\
 factorial    & 772        & 771        & 770        & 768          \\
 fractals     & 36         & 37         & 37         & 39           \\
 funky-kart   & 0          & 124773     & 13733      & 13417        \\
 game-of-life & 56         & 53         & 52         & 52           \\
 hnset-bench  & 36         & 38         & 36         & 35           \\
 mandelbrot   & 14082      & 14063      & 1509       & 1518         \\
 ogv          & 30         & 28         & 26         & 25           \\
 pacalc       & 4783       & 4877       & 4664       & 4596         \\
 sandspiel    & 18639      & 18605      & 18692      & 18572        \\
 sqlgui       & 13394      & 13369      & 13345      & 13317        \\
 tic-tac-toe  & 561        & 557        & 565        & 559          \\
 vide