In [6]:
import os, json, re, subprocess

r3_path = os.getenv('WASMR3_PATH', '/home/don/wasm-r3')

def assert_cpus_disabled(start, end):
    with open('/sys/devices/system/cpu/online', 'r') as f:
        online_cpus = f.read().strip()
        for cpu in range(start, end+1):
            assert str(cpu) not in online_cpus, f"CPU {cpu} is enabled"
def check_cpu_governor(start, end):
    for cpu in range(start, end+1):
        governor_file = f"/sys/devices/system/cpu/cpu{cpu}/cpufreq/scaling_governor"
        if os.path.exists(governor_file):
            with open(governor_file, 'r') as f:
                governor = f.read().strip()
                assert governor == 'performance', f"CPU {cpu} governor is not set to performance"
        else:
            print(f"CPU {cpu} does not exist or does not have a scaling governor")
def assert_cover_all(expected_dirs):
    online_tests_path = os.path.join(r3_path, 'tests/online')
    actual_dirs = [name for name in os.listdir(online_tests_path) if os.path.isdir(os.path.join(online_tests_path, name))]
    try:
        assert set(actual_dirs) == set(expected_dirs)
    except AssertionError:
        missing_dirs = set(expected_dirs) - set(actual_dirs)
        extra_dirs = set(actual_dirs) - set(expected_dirs)
        print(f"Assertion failed: Missing directories: {missing_dirs}, Extra directories: {extra_dirs}")
        raise
def extract_samples_and_mean(output):
    match = re.search(r"recorded (\d+) samples, mean = ([\d\.]+)", output)
    samples = int(match.group(1))
    mean = float(match.group(2))
    return [samples, mean]
def extract_cycle_counts(output):
    pattern = r"(\d+(?:,\d+)*)\s+cpu_core/cpu-cycles/"
    matches = re.findall(pattern, output)
    cycle_counts = [int(match.replace(',', '')) for match in matches]
    return cycle_counts
def extract_summarize(output):
    lines = output.strip().split('\n')
    data_line = lines[-1]
    data_parts = data_line.split(',')
    return [int(float(part)) for part in data_parts[1:]]
def trace_match(metrics, testname):
    return metrics[testname]['summary']['trace_match']

test_input = """
DevTools listening on ws://127.0.0.1:9966/devtools/browser/f72191be-fbd6-4fbd-b21f-2703612f1f13
 Performance counter stats for 'CPU(s) 0-15':
    40,125,664,880      cpu_core/cpu-cycles/                                                  
       6.157604349 seconds time elapsed
 Performance counter stats for 'CPU(s) 0-15':
     2,702,581,574      cpu_core/cpu-cycles/                                                  
       0.267278301 seconds time elapsed
"""
assert extract_cycle_counts(test_input) == [40125664880, 2702581574]
test_input_2 = """
================
Run online tests
================
WARNING: You need a working internet connection
WARNING: Tests depend on third party websites. If those websites changed since this testsuite was created, it might not work
fib  -Histogram: V8.ExecuteMicroSeconds recorded 581 samples, mean = 9993.9 (flags = 0x41)

581 9993.9
nvm
"""
assert extract_samples_and_mean(test_input_2) == [581, 9993.9]
test_input_3 = """
benchmark,instr:static_total,instr:static_replay,instrs:dynamic_total,instrs:dynamic_replay,ticks:total,ticks:replay
/home/don/wasm-r3/tests/online/hydro/benchmark/bin_0/replay.wasm,344760,191,27138,59,228486,8934
"""
test_input_4 = """
benchmark,instr:static_total,instr:static_replay,instrs:dynamic_total,instrs:dynamic_replay,ticks:total,ticks:replay
/home/don/wasm-r3/tests/online/multiplyDouble/benchmark/bin_0/replay.wasm,256244,238157,2.47543e+09,2100082177,9918500160,8.88911e+09
"""
assert(extract_summarize(test_input_3) == [344760, 191, 27138, 59, 228486, 8934])
assert(extract_summarize(test_input_4) == [256244, 238157, 2475430000, 2100082177, 9918500160, 8889110000])

# run ~/cpu.sh
check_cpu_governor(0, 15)
assert_cpus_disabled(16, 31)


def get_replay_wasm(testname, opt):
    regex = ''
    match opt:
        case 'noopt':
            regex = 'merge|split|custom|benchmark'
        case 'split':
            regex = 'noopt|merge|custom|benchmark'
        case 'merge':
            regex = 'noopt|split|custom|benchmark'
        case 'benchmark':
            regex = 'noopt|split|merge|custom'
        case _:
            exit('invalid op')
    find_command = f"find {r3_path}/tests/online/{testname} -name replay.wasm | grep -vE '{regex}'"
    find_result = subprocess.run(find_command, shell=True, capture_output=True, text=True)
    replay_path = find_result.stdout.strip()
    return replay_path
def get_pure_js(testname, opt):
    regex = ''
    match opt:
        case 'noopt':
            regex = 'merge|split|custom|benchmark'
        case 'split':
            regex = 'noopt|merge|custom|benchmark'
        case 'merge':
            regex = 'noopt|split|custom|benchmark'
        case 'benchmark':
            regex = 'noopt|split|merge|custom'
        case _:
            exit('invalid op')
    find_command = f"find {r3_path}/tests/online/{testname} -name pure.js | grep -vE '{regex}'"
    find_result = subprocess.run(find_command, shell=True, capture_output=True, text=True)
    replay_path = find_result.stdout.strip()
    return replay_path
def get_glue_js(testname, opt):
    regex = ''
    match opt:
        case 'noopt':
            regex = 'merge|split|custom|benchmark'
        case 'split':
            regex = 'noopt|merge|custom|benchmark'
        case 'merge':
            regex = 'noopt|split|custom|benchmark'
        case 'benchmark':
            regex = 'noopt|split|merge|custom'
        case _:
            exit('invalid op')
    find_command = f"find {r3_path}/tests/online/{testname} -name replay.js | grep -vE '{regex}'"
    find_result = subprocess.run(find_command, shell=True, capture_output=True, text=True)
    replay_path = find_result.stdout.strip()
    return replay_path

# Setup evaluation suite

eval_set = ['fractals', 'parquet', 'ogv', 'factorial', 'gotemplate', 'sandspiel', 'hydro', 'hnset-bench', 'wasmsh', 'boa', 'livesplit', 'ffmpeg', 'takahirox', 'pathfinding', 'bullet', 'rustpython', 'timestretch', 'riconpacker', 'rguistyler', 'wheel', 'game-of-life', 'jsc', 'multiplyInt', 'fib', 'guiicons', 'tic-tac-toe', 'funky-kart', 'playnox', 'jqkungfu', 'figma-startpage', 'sqlpractice', 'mandelbrot', 'pacalc', 'waforth', 'roslyn', 'lichess', 'rtexpacker', 'image-convolute', 'commanderkeen', 'onnxjs', 'rguilayout', 'rfxgen', 'rtexviewer', 'multiplyDouble', 'sqlgui']

skip_set = [
    'ogv' # record run is abnormal but not filtered out by test framework because it produces something.
]

# These are excluded as they don't appear in either Made with WebAssembly(https://madewithwebassembly.com/) or Awesome-Wasm(https://github.com/mbasso/awesome-wasm)
excluded_set = [
    "handy-tools",
    'heatmap',
    "kittygame",
    'visual6502remix',
    'noisereduction',
    'skeletal',
    'uarm',
    'virtualkc',
]

print('union: ', len(eval_set))
print('exclude: ', len(excluded_set))
assert_cover_all(eval_set + excluded_set)

testset = ['sqlpractice']
metrics = {testname: { 'summary': {}, 'record_metrics': {}, 'replay_metrics': {}} for testname in testset }
# with open('metrics.json', 'w') as f: json.dump(metrics, f, indent=4)

union:  45
exclude:  8


In [7]:
import subprocess 
import json

with open('metrics.json', 'r') as f: metrics = json.load(f)

# Trace difference experiment
timeout = 120

def run_wasmr3(testname):
    if testname in skip_set: return [testname, False]
    command = f". ~/.bashrc && timeout {timeout}s npm test -- -t {testname}"
    result = subprocess.run(command, shell=True, capture_output=True, text=True)
    isNormal = result.returncode == 0
    if not isNormal: print(result.args)
    return [testname, isNormal]

results = [run_wasmr3(testname) for testname in testset]
for testname, isNormal in results:
    metrics[testname]['summary']['trace_match'] = isNormal
    if isNormal:
        with open(f"{r3_path}/tests/online/{testname}/benchmark/bin_0/stats.json", 'r') as f: stats = json.load(f)
        metrics[testname]['summary'] |= stats

with open('metrics.json', 'w') as f: json.dump(metrics, f, indent=4)
assert get_replay_wasm('game-of-life', 'benchmark') == f"{r3_path}/tests/online/game-of-life/benchmark/bin_0/replay.wasm"
assert get_glue_js('game-of-life', 'benchmark') == f"{r3_path}/tests/online/game-of-life/benchmark/bin_0/replay.js"
assert get_pure_js('game-of-life', 'benchmark') == f"{r3_path}/tests/online/game-of-life/benchmark/bin_0/pure.js"

In [11]:
import subprocess, csv, json

with open('metrics.json', 'r') as f: metrics = json.load(f)

# Replay characteristic experiment
timeout = 180 # seconds
wizard_engine_kind = ['wizeng-int']
wizard_opt_kind = ['benchmark']

# this lies as it actually collects from jit mode not int
def run_icount(testname, engine, opt):
    data_path = f"/home/don/wasm-r3/tests/data/{testname}-icount.csv"
    replay_path = get_replay_wasm(testname, opt)
    cmd = f'. ~/.bashrc && DATA_FILE={data_path} /home/don/wasm-r3-paper/oopsla/data/run-icount.bash {replay_path} wizeng.x86-64-linux'
    try:        
        result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, text=True)
        with open(data_path, 'r') as f: 
            output = csv.DictReader(f)
            output_dict = {row['Function']: {'static': row['static'], 'dynamic': row['dynamic']} for row in output}
            return output_dict
    except Exception as e:
        print(f"Failed to run:")
        print(cmd)
        return {}
    
def run_fprofile(testname, engine, opt):
    data_path = f"/home/don/wasm-r3/tests/data/{testname}-fprofile.csv"
    replay_path = get_replay_wasm(testname, opt)
    cmd = f'. ~/.bashrc && DATA_FILE={data_path} /home/don/wasm-r3-paper/oopsla/data/run-fprofile.bash {replay_path} wizeng.x86-64-linux'
    try:
        result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, text=True)
        with open(data_path, 'r') as f: 
            output = csv.DictReader(f)
            output_dict = {}
            summary_dict = {}
            for row in output:
                if row['Function'].startswith('r3'):
                    output_dict[row['Function']] = {'count': row['count'], 'cycles': row['cycles'], 'percent': row['percent']}
                else:
                    key, value = row['Function'].rsplit(':', 1)
                    summary_dict[key.strip()] = value.strip()
            return output_dict, summary_dict
    except Exception as e:
        print(f"Failed to run:")
        print(cmd)
        return {}, {}
    
def run_summarize(testname, engine, opt):
    icount_path = f"/home/don/wasm-r3/tests/data/{testname}-icount.csv"
    ticks_path = f"/home/don/wasm-r3/tests/data/{testname}-fprofile.csv"
    replay_path = get_replay_wasm(testname, opt)
    cmd = f'. ~/.bashrc && ICOUNT_FILE={icount_path} TICKS_FILE={ticks_path} /home/don/wasm-r3-paper/oopsla/data/summarize.bash {replay_path}'
    try:
        result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, text=True)
        instr_static_total, instr_static_replay, instrs_dynamic_total, instr_dynamic_replay, ticks_total, ticks_replay = extract_summarize(result.stdout)
        return {
            'instr_static_total': instr_static_total,
            'instr_static_replay': instr_static_replay,
            'instrs_dynamic_total': instrs_dynamic_total,
            'instr_dynamic_replay': instr_dynamic_replay,
            'ticks_total': ticks_total,
            'ticks_replay': ticks_replay,
        }
    except Exception as e:
        print(f"Failed to run:")
        print(cmd)
        return {}
    
results = []
for testname in testset:
    if trace_match(metrics, testname): 
        for engine in wizard_engine_kind:
            for opt in wizard_opt_kind:
                if not metrics[testname]['replay_metrics'].get(engine): metrics[testname]['replay_metrics'][engine] = {}
                if not metrics[testname]['replay_metrics'][engine].get(opt): metrics[testname]['replay_metrics'][engine][opt] = {}
                !mkdir -p data
                metrics[testname]['replay_metrics'][engine][opt]['icount'] = run_icount(testname, engine, opt) 
                output_dict, summary_dict = run_fprofile(testname, engine, opt) 
                metrics[testname]['replay_metrics'][engine][opt]['fprofile'] = output_dict
                metrics[testname]['summary'] |= {**run_summarize(testname, engine, opt)}

with open('metrics.json', 'w') as f: json.dump(metrics, f, indent=4)

In [12]:
import subprocess, json, time

# Replay characteristic experiment

with open('metrics.json', 'r') as f: metrics = json.load(f)

timeout = 180 # seconds
engine_kind = ['sm', 'sm-base', 'sm-opt', 'v8', 'v8-liftoff', 'v8-turbofan', 'jsc', 'jsc-int','jsc-bbq','jsc-omg', 'wizeng','wizeng-int','wizeng-jit','wizeng-dyn','wasmtime','wasmer','wasmer-base']
web_engine_kind = ['sm', 'sm-base', 'sm-opt', 'v8', 'v8-liftoff', 'v8-turbofan', 'jsc', 'jsc-int','jsc-bbq','jsc-omg']
wizard_engine_kind = ['wizeng','wizeng-int','wizeng-jit','wizeng-dyn']
opt_kind = ['noopt', 'split', 'merge', 'benchmark']
web_engine_to_cmd = {
    'sm': 'sm',
    'sm-base': 'sm',
    'sm-opt': 'sm',
    'v8': 'v8',
    'v8-liftoff': 'v8-liftoff',
    'v8-turbofan': 'v8-turbofan',
    'jsc': 'jsc',
    'jsc-int': 'jsc',
    'jsc-bbq': 'jsc',
    'jsc-omg': 'jsc',    
}

def run_wish_you_were_fast(testname, engine, opt):
    try:
        global metrics
        replay_path = get_replay_wasm(testname, opt)
        command = f". ~/.bashrc && RUNS=1 ENGINES={engine} timeout {timeout}s compare-engines.bash {replay_path}"
        result = subprocess.run(command, shell=True, capture_output=True, text=True)
        if result.returncode != 0:  raise Exception
        else:
            runtime = float(result.stdout.split(":")[-1].strip())
            metrics[testname]['replay_metrics'][engine][opt] |= { 'runtime': runtime }
    except Exception as e:
        print(f"Failed to run {testname} with {opt}, error: {e}")
        metrics[testname]['replay_metrics'][engine][opt] = {}

def run_js(testname, engine, opt):
    try:
        global metrics
        gluejs_path = get_glue_js(testname, opt)
        gluejs_command = f". ~/.bashrc && node {gluejs_path}"
        start_time = time.time()
        result = subprocess.run(gluejs_command, shell=True, capture_output=True, text=True, cwd=os.path.dirname(gluejs_path))
        end_time = time.time()
        if result.returncode != 0: raise Exception(gluejs_command)
        else:
            metrics[testname]['replay_metrics'][engine][opt]['gluejs_runtime'] = end_time - start_time
        purejs_path = get_pure_js(testname, opt)
        purejs_command = f". ~/.bashrc && node {purejs_path} run"
        start_time = time.time()
        result = subprocess.run(purejs_command, shell=True, capture_output=True, text=True)
        end_time = time.time()
        if result.returncode != 0: raise Exception(purejs_command)
        else:
            metrics[testname]['replay_metrics'][engine][opt]['purejs_runtime'] = end_time - start_time        
    except Exception as e:
        print(f"Failed to run {testname} with {opt}")
        print(e)
        metrics[testname]['replay_metrics'][engine][opt] = {}

def run_wizard(testname, engine, opt):
    global metrics
    try: 
        replay_path = get_replay_wasm(testname, opt)
        replay_size = os.path.getsize(replay_path)
        metrics[testname]['replay_metrics'][engine][opt]['file_size'] = replay_size
        command = f". ~/.bashrc && timeout {timeout}s  wizeng.x86-64-linux --metrics --monitors=profile {replay_path}"
        result = subprocess.run(command, shell=True, capture_output=True, text=True)
        if result.returncode != 0: raise Exception
        _, profile = result.stdout.split("pregen:time_us")
        profile = 'pregen:time_us' + profile
        # Make replay_metrics after "pregen:time_us" a key of some object
        metrics[testname]['replay_metrics'][engine][opt] |= {line.rsplit(":", 1)[0].strip(): line.rsplit(":", 1)[1].strip().replace("μs", "").strip() for line in profile.split("\n") if line}
    except Exception as e:
        print(f"Failed to run {testname} with {opt}, engine: {engine}")
        metrics[testname]['replay_metrics'][engine][opt] = {}

for testname in testset:
    if trace_match(metrics, testname):
        for engine in engine_kind:
            metrics[testname]['replay_metrics'][engine] = {}
            for opt in opt_kind:
                metrics[testname]['replay_metrics'][engine][opt] = {}
        for engine in engine_kind:
            for opt in ['benchmark']:
                run_wish_you_were_fast(testname, engine, opt)
        # for engine in web_engine_kind:
        #     for opt in ['benchmark']:
        #         run_js(testname, engine, opt)
        for engine in ['wizeng-int']:
            for opt in opt_kind:
                run_wizard(testname, engine, opt)

with open('metrics.json', 'w') as f: json.dump(metrics, f, indent=4)

In [1]:
import subprocess, time, json, os

with open('metrics.json', 'r') as f: metrics = json.load(f)
def trace_match(metrics, testname): return metrics[testname]['summary']['trace_match']

# Record overhead experiment
timeout = 120 # seconds
chromium_path = os.getenv('WASMR3_PATH', '/home/don/.cache/ms-playwright/chromium-1105/chrome-linux/chrome')
perf_sh_path = os.path.join('PERFSH_PATH', '/home/don/wasm-r3/tests/perf.sh')
CDP_PORT = os.getenv('CDP_PORT', 9997)
os.environ['CDP_PORT'] = str(CDP_PORT)
option_to_cmd = {
    'original': '--noRecord',
    'instrumented': '',
}
def run_command(testname, option):
    try:
        subprocess.run(["killall", "-9", "chrome"])
        chromium_cmd = f". ~/.bashrc && {chromium_path} --renderer-process-limit=1 --no-sandbox --remote-debugging-port={CDP_PORT} --js-flags='--slow-histograms' --renderer-cmd-prefix='bash {perf_sh_path}'"
        wasmr3_cmd = f". ~/.bashrc && timeout {timeout}s npm test -- --evalRecord {option_to_cmd[option]} -t {testname}"
        output_path = f"{testname}_{option}_output.txt"
        with open(output_path, 'w') as f: subprocess.Popen(chromium_cmd, shell=True, stdout=f , stderr=f)
        result = subprocess.run(wasmr3_cmd, shell=True, stdout=subprocess.PIPE, text=True)
        time.sleep(3)
        with open(output_path, 'r') as f: output = f.read()
        cycle_counts = extract_cycle_counts(output)
        samples, mean = extract_samples_and_mean(result.stdout)
        return [testname, option, cycle_counts, samples, mean]
    except Exception as e:
        print(f"Failed to run {testname} with {option}, error: {e}")
        return [testname, option, -1, -1, -1]
    
results = []
for testname in metrics:
    if trace_match(metrics, testname):
        for option in ['original', 'instrumented']:
            metrics[testname]['record_metrics'][option] = []
            print(f"Running {testname} with {option}")
            for i in range(10):
                testname, _, cycles, samples, mean = run_command(testname, option) 
                metrics[testname]['record_metrics'][option].append({'samples': samples, 'mean': mean, 'cycles': cycles})
                with open('metrics.json', 'w') as f: json.dump(metrics, f, indent=4)

with open('metrics.json', 'w') as f: json.dump(metrics, f, indent=4)

NameError: name 'trace_match' is not defined