In [3]:
import os, json, re, subprocess


def assert_cpus_disabled(start, end):
    with open('/sys/devices/system/cpu/online', 'r') as f:
        online_cpus = f.read().strip()
        for cpu in range(start, end+1):
            assert str(cpu) not in online_cpus, f"CPU {cpu} is enabled"
def check_cpu_governor(start, end):
    for cpu in range(start, end+1):
        governor_file = f"/sys/devices/system/cpu/cpu{cpu}/cpufreq/scaling_governor"
        if os.path.exists(governor_file):
            with open(governor_file, 'r') as f:
                governor = f.read().strip()
                assert governor == 'performance', f"CPU {cpu} governor is not set to performance"
        else:
            print(f"CPU {cpu} does not exist or does not have a scaling governor")
def assert_cover_all(expected_dirs):
    online_tests_path = os.path.join(r3_path, 'tests/online')
    actual_dirs = [name for name in os.listdir(online_tests_path) if os.path.isdir(os.path.join(online_tests_path, name))]
    try:
        assert set(actual_dirs) == set(expected_dirs)
    except AssertionError:
        missing_dirs = set(expected_dirs) - set(actual_dirs)
        extra_dirs = set(actual_dirs) - set(expected_dirs)
        print(f"Assertion failed: Missing directories: {missing_dirs}, Extra directories: {extra_dirs}")
        raise
def extract_samples_and_mean(output):
    match = re.search(r"recorded (\d+) samples, mean = ([\d\.]+)", output)
    samples = int(match.group(1))
    mean = float(match.group(2))
    return [samples, mean]
def extract_cycle_counts(output):
    pattern = r"(\d+(?:,\d+)*)\s+cpu_core/cpu-cycles/"
    matches = re.findall(pattern, output)
    cycle_counts = [int(match.replace(',', '')) for match in matches]
    return cycle_counts
def extract_summarize(output):
    lines = output.strip().split('\n')
    data_line = lines[-1]
    data_parts = data_line.split(',')
    return [int(float(part)) for part in data_parts[1:]]
def trace_match(metrics, testname):
    return metrics[testname]['summary']['trace_match']

test_input = """
DevTools listening on ws://127.0.0.1:9966/devtools/browser/f72191be-fbd6-4fbd-b21f-2703612f1f13
 Performance counter stats for 'CPU(s) 0-15':
    40,125,664,880      cpu_core/cpu-cycles/                                                  
       6.157604349 seconds time elapsed
 Performance counter stats for 'CPU(s) 0-15':
     2,702,581,574      cpu_core/cpu-cycles/                                                  
       0.267278301 seconds time elapsed
"""
assert extract_cycle_counts(test_input) == [40125664880, 2702581574]
test_input_2 = """
================
Run online tests
================
WARNING: You need a working internet connection
WARNING: Tests depend on third party websites. If those websites changed since this testsuite was created, it might not work
fib  -Histogram: V8.ExecuteMicroSeconds recorded 581 samples, mean = 9993.9 (flags = 0x41)

581 9993.9
nvm
"""
assert extract_samples_and_mean(test_input_2) == [581, 9993.9]
test_input_3 = """
benchmark,instr:static_total,instr:static_replay,instrs:dynamic_total,instrs:dynamic_replay,ticks:total,ticks:replay
/home/don/wasm-r3/tests/online/hydro/benchmark/bin_0/replay.wasm,344760,191,27138,59,228486,8934
"""
test_input_4 = """
benchmark,instr:static_total,instr:static_replay,instrs:dynamic_total,instrs:dynamic_replay,ticks:total,ticks:replay
/home/don/wasm-r3/tests/online/multiplyDouble/benchmark/bin_0/replay.wasm,256244,238157,2.47543e+09,2100082177,9918500160,8.88911e+09
"""
assert(extract_summarize(test_input_3) == [344760, 191, 27138, 59, 228486, 8934])
assert(extract_summarize(test_input_4) == [256244, 238157, 2475430000, 2100082177, 9918500160, 8889110000])

# run ~/cpu.sh
check_cpu_governor(0, 15)
assert_cpus_disabled(16, 31)


def get_replay_wasm(testname, opt):
    regex = ''
    match opt:
        case 'noopt':
            regex = 'merge|split|custom|benchmark'
        case 'split':
            regex = 'noopt|merge|custom|benchmark'
        case 'merge':
            regex = 'noopt|split|custom|benchmark'
        case 'benchmark':
            regex = 'noopt|split|merge|custom'
        case _:
            exit('invalid op')
    find_command = f"find {r3_path}/tests/online/{testname} -name replay.wasm | grep -vE '{regex}'"
    find_result = subprocess.run(find_command, shell=True, capture_output=True, text=True)
    replay_path = find_result.stdout.strip()
    return replay_path
def get_pure_js(testname, opt):
    regex = ''
    match opt:
        case 'noopt':
            regex = 'merge|split|custom|benchmark'
        case 'split':
            regex = 'noopt|merge|custom|benchmark'
        case 'merge':
            regex = 'noopt|split|custom|benchmark'
        case 'benchmark':
            regex = 'noopt|split|merge|custom'
        case _:
            exit('invalid op')
    find_command = f"find {r3_path}/tests/online/{testname} -name pure.js | grep -vE '{regex}'"
    find_result = subprocess.run(find_command, shell=True, capture_output=True, text=True)
    replay_path = find_result.stdout.strip()
    return replay_path
def get_glue_js(testname, opt):
    regex = ''
    match opt:
        case 'noopt':
            regex = 'merge|split|custom|benchmark'
        case 'split':
            regex = 'noopt|merge|custom|benchmark'
        case 'merge':
            regex = 'noopt|split|custom|benchmark'
        case 'benchmark':
            regex = 'noopt|split|merge|custom'
        case _:
            exit('invalid op')
    find_command = f"find {r3_path}/tests/online/{testname} -name replay.js | grep -vE '{regex}'"
    find_result = subprocess.run(find_command, shell=True, capture_output=True, text=True)
    replay_path = find_result.stdout.strip()
    return replay_path

# Setup evaluation suite

eval_set = ['fractals', 'parquet', 'ogv', 'factorial', 'gotemplate', 'sandspiel', 'hydro', 'hnset-bench', 'boa', 'livesplit', 'ffmpeg', 'takahirox', 'pathfinding', 'bullet', 'rustpython', 'timestretch', 'riconpacker', 'rguistyler', 'wheel', 'game-of-life', 'jsc', 'multiplyInt', 'fib', 'guiicons', 'funky-kart', 'playnox', 'jqkungfu', 'figma-startpage', 'sqlpractice', 'mandelbrot', 'pacalc', 'waforth', 'roslyn', 'lichess', 'rtexpacker', 'image-convolute', 'commanderkeen', 'onnxjs', 'rguilayout', 'rfxgen', 'rtexviewer', 'multiplyDouble', 'sqlgui']

skip_set = [
    'ogv' # record run is abnormal but not filtered out by test framework because it produces something.
]

# These are excluded as they don't appear in either Made with WebAssembly(https://madewithwebassembly.com/) or Awesome-Wasm(https://github.com/mbasso/awesome-wasm)
excluded_set = [
    "handy-tools",
    'heatmap',
    "kittygame",
    'visual6502remix',
    'noisereduction',
    'skeletal',
    'uarm',
    'virtualkc',
]

print('union: ', len(eval_set))
print('exclude: ', len(excluded_set))
assert_cover_all(eval_set + excluded_set)

testset = ['sqlpractice']
metrics = {testname: { 'summary': {}, 'record_metrics': {}, 'replay_metrics': {}} for testname in testset }
# with open('metrics.json', 'w') as f: json.dump(metrics, f, indent=4)

union:  43
exclude:  8


In [5]:
import subprocess 
import json

with open('metrics.json', 'r') as f: metrics = json.load(f)

# Trace difference experiment
timeout = 120

def run_wasmr3(testname):
    if testname in skip_set: return [testname, False]
    command = f". ~/.bashrc && timeout {timeout}s npm test -- -t {testname}"
    result = subprocess.run(command, shell=True, capture_output=True, text=True)
    isNormal = result.returncode == 0
    if not isNormal: print(result.args)
    return [testname, isNormal]

results = [run_wasmr3(testname) for testname in testset]
for testname, isNormal in results:
    metrics[testname]['summary']['trace_match'] = isNormal
    if isNormal:
        with open(f"{r3_path}/tests/online/{testname}/benchmark/bin_0/stats.json", 'r') as f: stats = json.load(f)
        metrics[testname]['summary'] |= stats

with open('metrics.json', 'w') as f: json.dump(metrics, f, indent=4)
assert get_replay_wasm('game-of-life', 'benchmark') == f"{r3_path}/tests/online/game-of-life/benchmark/bin_0/replay.wasm"
assert get_glue_js('game-of-life', 'benchmark') == f"{r3_path}/tests/online/game-of-life/benchmark/bin_0/replay.js"
assert get_pure_js('game-of-life', 'benchmark') == f"{r3_path}/tests/online/game-of-life/benchmark/bin_0/pure.js"

KeyboardInterrupt: 

In [None]:
import subprocess, csv, json

with open('metrics.json', 'r') as f: metrics = json.load(f)

# Replay characteristic experiment
timeout = 180 # seconds
wizard_engine_kind = ['wizeng-int']
wizard_opt_kind = ['benchmark']

# this lies as it actually collects from jit mode not int
def run_icount(testname, engine, opt):
    data_path = f"/home/don/wasm-r3/tests/data/{testname}-icount.csv"
    replay_path = get_replay_wasm(testname, opt)
    cmd = f'. ~/.bashrc && DATA_FILE={data_path} /home/don/wasm-r3-paper/oopsla/data/run-icount.bash {replay_path} wizeng.x86-64-linux'
    try:        
        result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, text=True)
        with open(data_path, 'r') as f: 
            output = csv.DictReader(f)
            output_dict = {row['Function']: {'static': row['static'], 'dynamic': row['dynamic']} for row in output}
            return output_dict
    except Exception as e:
        print(f"Failed to run:")
        print(cmd)
        return {}
    
def run_fprofile(testname, engine, opt):
    data_path = f"/home/don/wasm-r3/tests/data/{testname}-fprofile.csv"
    replay_path = get_replay_wasm(testname, opt)
    cmd = f'. ~/.bashrc && DATA_FILE={data_path} /home/don/wasm-r3-paper/oopsla/data/run-fprofile.bash {replay_path} wizeng.x86-64-linux'
    try:
        result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, text=True)
        with open(data_path, 'r') as f: 
            output = csv.DictReader(f)
            output_dict = {}
            summary_dict = {}
            for row in output:
                if row['Function'].startswith('r3'):
                    output_dict[row['Function']] = {'count': row['count'], 'cycles': row['cycles'], 'percent': row['percent']}
                else:
                    key, value = row['Function'].rsplit(':', 1)
                    summary_dict[key.strip()] = value.strip()
            return output_dict, summary_dict
    except Exception as e:
        print(f"Failed to run:")
        print(cmd)
        return {}, {}
    
def run_summarize(testname, engine, opt):
    icount_path = f"/home/don/wasm-r3/tests/data/{testname}-icount.csv"
    ticks_path = f"/home/don/wasm-r3/tests/data/{testname}-fprofile.csv"
    replay_path = get_replay_wasm(testname, opt)
    cmd = f'. ~/.bashrc && ICOUNT_FILE={icount_path} TICKS_FILE={ticks_path} /home/don/wasm-r3-paper/oopsla/data/summarize.bash {replay_path}'
    try:
        result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, text=True)
        instr_static_total, instr_static_replay, instrs_dynamic_total, instr_dynamic_replay, ticks_total, ticks_replay = extract_summarize(result.stdout)
        return {
            'instr_static_total': instr_static_total,
            'instr_static_replay': instr_static_replay,
            'instrs_dynamic_total': instrs_dynamic_total,
            'instr_dynamic_replay': instr_dynamic_replay,
            'ticks_total': ticks_total,
            'ticks_replay': ticks_replay,
        }
    except Exception as e:
        print(f"Failed to run:")
        print(cmd)
        return {}
    
results = []
for testname in testset:
    if trace_match(metrics, testname): 
        for engine in wizard_engine_kind:
            for opt in wizard_opt_kind:
                if not metrics[testname]['replay_metrics'].get(engine): metrics[testname]['replay_metrics'][engine] = {}
                if not metrics[testname]['replay_metrics'][engine].get(opt): metrics[testname]['replay_metrics'][engine][opt] = {}
                !mkdir -p data
                metrics[testname]['replay_metrics'][engine][opt]['icount'] = run_icount(testname, engine, opt) 
                output_dict, summary_dict = run_fprofile(testname, engine, opt) 
                metrics[testname]['replay_metrics'][engine][opt]['fprofile'] = output_dict
                metrics[testname]['summary'] |= {**run_summarize(testname, engine, opt)}

with open('metrics.json', 'w') as f: json.dump(metrics, f, indent=4)

In [6]:
import subprocess, json, concurrent.futures, os

with open('metrics.json', 'r') as f:  metrics = json.load(f)
def trace_match(metrics, testname): return metrics[testname]['summary']['trace_match']
timeout = 180 # seconds
engine_kind = ['sm', 'sm-base', 'sm-opt', 'v8', 'v8-liftoff', 'v8-turbofan', 'jsc', 'jsc-int','jsc-bbq','jsc-omg', 'wizeng','wizeng-int','wizeng-jit','wizeng-dyn','wasmtime','wasmer','wasmer-base']
opt_kind = ['noopt', 'split', 'merge', 'benchmark']

r3_path = os.getenv('WASMR3_PATH', '/home/don/wasm-r3')

def get_replay_wasm(testname, opt):
    regex = ''
    match opt:
        case 'noopt':
            regex = 'merge|split|custom|benchmark'
        case 'split':
            regex = 'noopt|merge|custom|benchmark'
        case 'merge':
            regex = 'noopt|split|custom|benchmark'
        case 'benchmark':
            regex = 'noopt|split|merge|custom'
        case _:
            exit('invalid op')
    find_command = f"find {r3_path}/tests/online/{testname} -name replay.wasm | grep -vE '{regex}'"
    find_result = subprocess.run(find_command, shell=True, capture_output=True, text=True)
    replay_path = find_result.stdout.strip()
    return replay_path

def run_wizard(testname, engine, opt, i):
    try: 
        replay_path = get_replay_wasm(testname, opt)
        command = f". ~/.bashrc && timeout {timeout}s  wizeng.x86-64-linux --metrics {replay_path}"
        result = subprocess.run(command, shell=True, capture_output=True, text=True)
        if result.returncode != 0: raise Exception(result.args)
        _, profile = result.stdout.split("pregen:time_us")
        profile = 'pregen:time_us' + profile
        return [testname, engine, opt, i, {line.rsplit(":", 1)[0].strip(): line.rsplit(":", 1)[1].strip().replace("μs", "").strip() for line in profile.split("\n") if line}]
    except Exception as e:
        print(f"Failed to run {testname} with {opt}, engine: {engine}")
        print(e)

testset = metrics
with concurrent.futures.ThreadPoolExecutor() as executor:
    futures = [executor.submit(run_wizard, testname, 'wizeng-int', opt, i) for testname in testset if trace_match(metrics, testname) for opt in opt_kind for i in range(10)]
    results = [future.result() for future in concurrent.futures.as_completed(futures)]

for result in results:
    if result is None: continue
    testname, engine, opt, i, metric = result
    try: 
        if not metrics[testname]['replay_metrics'][engine]: metrics[testname]['replay_metrics'][engine] = {}
        if not isinstance(metrics[testname]['replay_metrics'][engine][opt], list): metrics[testname]['replay_metrics'][engine][opt] = []
        metrics[testname]['replay_metrics'][engine][opt].append(metric)
    except Exception as e:
        print(f"Failed to store {testname} with {opt}, engine: {engine}")
        print(e)
with open('metrics.json', 'w') as f: json.dump(metrics, f, indent=4)

Failed to run funky-kart with noopt, engine: wizeng-int
. ~/.bashrc && timeout 180s  wizeng.x86-64-linux --metrics /home/don/wasm-r3/tests/online/funky-kart/noopt/bin_0/replay.wasm
Failed to run funky-kart with noopt, engine: wizeng-int
. ~/.bashrc && timeout 180s  wizeng.x86-64-linux --metrics /home/don/wasm-r3/tests/online/funky-kart/noopt/bin_0/replay.wasm
Failed to run funky-kart with noopt, engine: wizeng-int
. ~/.bashrc && timeout 180s  wizeng.x86-64-linux --metrics /home/don/wasm-r3/tests/online/funky-kart/noopt/bin_0/replay.wasm
Failed to run funky-kart with noopt, engine: wizeng-int
. ~/.bashrc && timeout 180s  wizeng.x86-64-linux --metrics /home/don/wasm-r3/tests/online/funky-kart/noopt/bin_0/replay.wasm
Failed to run funky-kart with noopt, engine: wizeng-int
. ~/.bashrc && timeout 180s  wizeng.x86-64-linux --metrics /home/don/wasm-r3/tests/online/funky-kart/noopt/bin_0/replay.wasm
Failed to run funky-kart with noopt, engine: wizeng-int
. ~/.bashrc && timeout 180s  wizeng.x86