Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
143 commits
Select commit Hold shift + click to select a range
4327f32
Add python front-end
BolunThompson Aug 12, 2025
5f05807
Add python hs to ci
BolunThompson Aug 16, 2025
29f4539
Add egg-info and build to .gitignore
BolunThompson Aug 15, 2025
f6ae431
feat: add preprocessing based python front-end
BolunThompson Aug 29, 2025
7711c58
fix: environment
BolunThompson Oct 10, 2025
a43ae2b
fix dev dependencies
BolunThompson Oct 10, 2025
9c87dcf
test formatting
BolunThompson Oct 10, 2025
6de6d29
formatting
BolunThompson Oct 10, 2025
b6b1792
feat: basic chipseq pipeline
BolunThompson Oct 10, 2025
1c1eb76
add dockerfile for chipseq benchmark
BolunThompson Oct 10, 2025
336d2ec
add complex variable support to preprocessor
BolunThompson Oct 10, 2025
9521b33
add unsafe support to runtime
BolunThompson Oct 10, 2025
a348a72
fix for python_hs docker
BolunThompson Oct 10, 2025
8b5daae
log stderr in tests
BolunThompson Oct 10, 2025
40594ec
cleanup dockerfile
BolunThompson Oct 11, 2025
a912ac6
add bioinfo download data script
BolunThompson Oct 11, 2025
a829edf
write python hs benchmark orchestration
Oct 11, 2025
dcd5294
update docker ignore
Oct 12, 2025
cc1e09a
move downloading out of benchmarking script
BolunThompson Oct 12, 2025
3bd29a2
improve download data error msg
BolunThompson Oct 12, 2025
bada3fc
fix
BolunThompson Oct 12, 2025
68f1447
only install data download dep if not installed
BolunThompson Oct 12, 2025
bf9e52a
delete runtime data for benchmark
BolunThompson Oct 12, 2025
d258817
remove unused test.sh
BolunThompson Oct 12, 2025
1c8b8d4
add benchmark readme
BolunThompson Oct 12, 2025
a3c1df9
expand benchmark gitignore
BolunThompson Oct 12, 2025
ad01c51
fix: python hs bioinfo
BolunThompson Oct 14, 2025
1d02093
fix sigterm handler
BolunThompson Oct 14, 2025
8e487d6
fix: only delete spec files before next run
BolunThompson Oct 14, 2025
abcc921
pass extra args to start_server
BolunThompson Oct 14, 2025
df6a13b
fix: docker priv for python hs
BolunThompson Oct 14, 2025
f878755
add support for with statement stdout to preprocessor
BolunThompson Oct 15, 2025
a0aeb71
add assumptions
BolunThompson Oct 22, 2025
df1e4cd
fix: trap cleanup_server
BolunThompson Oct 22, 2025
85a3f08
fix: newline at end of partial order
BolunThompson Oct 22, 2025
562d23d
fix: typo
BolunThompson Oct 22, 2025
02a35da
fix: linting
BolunThompson Oct 22, 2025
52310cc
feat: respect shell paramater
BolunThompson Oct 22, 2025
df078d0
feat: log stderr
BolunThompson Oct 22, 2025
06e89ca
add biostars script
BolunThompson Oct 22, 2025
1dddd01
improve python hs benchmark runner
BolunThompson Oct 22, 2025
77a32ad
Add Makefile for running python benchmarks
BolunThompson Oct 26, 2025
3dfe8e9
add java flags to biostars to prevent java conflicts
BolunThompson Nov 3, 2025
020a70f
fix python report benchmark runner bugs
BolunThompson Nov 3, 2025
d01b4a3
fix: do not lint benchmarks
BolunThompson Nov 4, 2025
07c0ec6
add rainbowcake ffmpeg benchmark
BolunThompson Nov 4, 2025
58fa1c7
fix: output plural
BolunThompson Nov 4, 2025
be448e1
fix docker and make bugs
BolunThompson Nov 7, 2025
5e08940
feat: generate python hs speedup plot
BolunThompson Nov 7, 2025
3aa9a5e
fix docker in ci by not using .git for PASH_TOP
BolunThompson Nov 9, 2025
8778ed2
refactor: move hs docker copy to end
BolunThompson Nov 10, 2025
0134b5c
use uv pip in python hs docker
BolunThompson Nov 10, 2025
24df843
increase amount of data used for python hs benchmarks
BolunThompson Nov 9, 2025
049856b
feat: latex table script
BolunThompson Nov 11, 2025
69dedc5
feat: bigger data for video processing bench
BolunThompson Nov 11, 2025
8a0def4
fix: re-add create venv in docker
BolunThompson Nov 11, 2025
2f020cd
feat: remove trim/duration limit from video processing bench
BolunThompson Nov 11, 2025
170486f
feat: generate stats in python post process
BolunThompson Nov 11, 2025
7cb502f
add python hs fixes
BolunThompson Nov 15, 2025
1e42e1b
temp: new benchmarks
BolunThompson Nov 16, 2025
d003320
filter python hs data to a reasonable amount
BolunThompson Nov 21, 2025
b6ee562
update python benchmark table script
BolunThompson Nov 21, 2025
ed77078
add multiprocessing versions of python benchmarks
BolunThompson Nov 21, 2025
c0cbad9
add multiprocess benchmarks
BolunThompson Nov 23, 2025
f25b678
rename download_data.sh -> download_data
BolunThompson Nov 23, 2025
02aa01e
add .git to dockerignore
BolunThompson Nov 23, 2025
1b1344d
fix docerfile typo
BolunThompson Nov 23, 2025
a98ab16
fix: remove unusued original sra_id
BolunThompson Nov 23, 2025
d6b0273
Revert "add .git to dockerignore"
BolunThompson Nov 23, 2025
e6ae2bf
modify nemo to use ml commons dataset
BolunThompson Nov 23, 2025
72f054f
fix: copy benchmarks as last action in Dockerfile
BolunThompson Nov 23, 2025
2f5c403
fix: do not format preprocess result
BolunThompson Nov 23, 2025
ec9fd1f
increase amount of data used for nemo benchmark
BolunThompson Nov 23, 2025
16906f6
fix: skip already done bioinfo
BolunThompson Nov 23, 2025
0773289
fix: run on all bioinfo files
BolunThompson Nov 23, 2025
bba524a
fix: prefetch to transient errors
BolunThompson Nov 25, 2025
a3e288d
fix: use downsampled in bioinfo benchmark
BolunThompson Nov 25, 2025
b5aa434
fix: disable perf data dir rather than redirect
BolunThompson Nov 25, 2025
5a594c8
refactor speedup plot
BolunThompson Nov 25, 2025
dda6b3c
add plots to ignores
BolunThompson Nov 25, 2025
6d5383a
fix: seed macs3 callpeaks
BolunThompson Nov 25, 2025
1fddc39
compare name and value output correctness
BolunThompson Nov 25, 2025
f0b2c79
fix to download_data
BolunThompson Nov 25, 2025
86ac309
update benchmark READMEs
BolunThompson Nov 25, 2025
a890475
feat: include captk as a dataset from kaggle
BolunThompson Nov 25, 2025
0980fa5
move tools download to separate script
BolunThompson Nov 26, 2025
509faf7
add no conflicting flags to trimmomatic
BolunThompson Nov 26, 2025
b7f38c3
fix: make input dir for rainbowcake
BolunThompson Nov 26, 2025
e60e238
logging: add message when correctness check passed
BolunThompson Nov 26, 2025
d34b4c0
fix: cap size of rainbowcake videos downloaded
BolunThompson Nov 26, 2025
2d22cfe
docker fix
BolunThompson Nov 26, 2025
145d080
Move mkdir for python benchmarks to separate loop
BolunThompson Nov 26, 2025
ae879d2
use three runs by default
BolunThompson Nov 26, 2025
aa26f80
fix: custom video identical check
BolunThompson Nov 27, 2025
a124ca9
formatting
BolunThompson Nov 27, 2025
1bec2fb
fix: slightly decrease data
BolunThompson Nov 27, 2025
a1a46ec
fix: ensure each benchmark is a cold start
BolunThompson Nov 27, 2025
42d2997
rename run specific to benchmark
BolunThompson Nov 27, 2025
7bbdbb3
delete unused kaggle captk data after download
BolunThompson Nov 27, 2025
42af665
use window of 30
BolunThompson Nov 27, 2025
378966a
fix env vars in benchmark.sh
BolunThompson Nov 27, 2025
2748fe0
add bad biostars script
BolunThompson Nov 27, 2025
3eb3d41
fix: multiprocess uses 30 workers
BolunThompson Nov 27, 2025
d4f80f5
fix perms in benchmark.sh
BolunThompson Nov 27, 2025
e81640b
reduce biostars data
BolunThompson Nov 27, 2025
868389e
Revert "use window of 30"
BolunThompson Nov 28, 2025
02f1f62
Revert "fix: multiprocess uses 30 workers"
BolunThompson Nov 28, 2025
f5a2ee5
fix: less data for bioinfo
BolunThompson Nov 28, 2025
5aa6466
feat: order python benchmarks by interestingness
BolunThompson Nov 28, 2025
0aea2e8
move results after each run
BolunThompson Nov 28, 2025
edbb681
add slowdowns to process_results.py
BolunThompson Nov 28, 2025
f876c94
add correctnes check skip
BolunThompson Nov 28, 2025
52a0b04
process_results bugfixes
BolunThompson Nov 28, 2025
42d85d9
plot fixes
BolunThompson Nov 28, 2025
c752d4c
add cloudlab results
BolunThompson Nov 28, 2025
336f4f1
add manual experiment scripts
BolunThompson Nov 28, 2025
7ea49b7
add spectre-after results
BolunThompson Nov 29, 2025
e8ce232
add biostars 2/4 results
BolunThompson Nov 30, 2025
d3dc7a2
remove results from old cloudlab
BolunThompson Nov 30, 2025
87f51e4
fix: enforce run ordering in makefile
BolunThompson Nov 30, 2025
3ccb833
fix: ignore failures in nemo download data
BolunThompson Nov 30, 2025
d1e778d
correct benchmark description
BolunThompson Dec 3, 2025
2f73474
fix: visit conditionals for arguments
BolunThompson Dec 3, 2025
9e32977
fix: replace unsupported += with =
BolunThompson Dec 3, 2025
190f132
fix: touch downloaded in nemo
BolunThompson Dec 1, 2025
b1e7306
fix ulimit for fds in docker
BolunThompson Dec 1, 2025
5d5fbdc
refactor three runs to use makefile
BolunThompson Dec 1, 2025
a27c917
add first final osdi 2025 python results
BolunThompson Dec 3, 2025
ffd9751
fix kaggle captk data download
BolunThompson Dec 3, 2025
44451fd
delete bad final run 1 results
BolunThompson Dec 3, 2025
f3f6537
add corrected bioinfo results
BolunThompson Dec 4, 2025
a7010d5
add captk python benchmark results and processed.json
BolunThompson Dec 5, 2025
7d9993a
remove unused benchmarking scripts
BolunThompson Dec 5, 2025
ca1037a
remove sequential captk multi benchmark
BolunThompson Dec 5, 2025
541ff59
remove old runtime plot
BolunThompson Dec 5, 2025
d93ca7d
typo
BolunThompson Dec 5, 2025
9f78739
align table.py with paper changes
BolunThompson Dec 5, 2025
1faa92b
modify benchmark scripts to use new aliases
BolunThompson Dec 5, 2025
b3c7f4b
cleanup: remove python hs results
BolunThompson Dec 6, 2025
1c5b33b
Revert "Add python hs to ci"
BolunThompson Dec 6, 2025
48064a2
feat: target to build an individial benchmark
BolunThompson Dec 6, 2025
72a408a
add README for python hs report
BolunThompson Dec 6, 2025
fb930f3
cleanup: remove random file
BolunThompson Dec 6, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,6 @@ report/benchmarks
report/resources
report/output
report/archive
.venv
python_hs/.venv
python_hs/report/
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
scripts/vars.sh
*__pycache__*
*egg-info*
.DS_Store
*~
\#*\#
Expand All @@ -9,3 +10,4 @@ report/output/
test/output_bash/
test/output_orch/
test/results/
build/
26 changes: 16 additions & 10 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,30 @@ FROM debian:12
RUN mkdir -p /srv/hs
WORKDIR /srv/hs
SHELL ["/bin/bash", "-c"]
RUN apt update
RUN apt install -y vim sudo git python3 python3.11-venv strace wget make python3-cram file graphviz libtool python3-matplotlib libcap2-bin util-linux
# pash distro deps
RUN apt install -y bc curl graphviz bsdmainutils libffi-dev locales locales-all netcat-openbsd pkg-config procps python3-pip python3-setuptools python3-testresources wamerican-insane
# try deps
RUN apt install -y expect mergerfs attr

# https://docs.docker.com/build/cache/optimize/#use-cache-mounts
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
apt update \
&& apt install -y \
# hs deps
vim sudo git python3 python3.11-venv strace wget make python3-cram file graphviz libtool python3-matplotlib libcap2-bin util-linux \
# pash deps
curl graphviz bsdmainutils libffi-dev locales locales-all netcat-openbsd pkg-config procps python3-pip python3-setuptools python3-testresources wamerican-insane \
# try deps
expect mergerfs attr
RUN git config --global --add safe.directory /srv
COPY . .
RUN python3 -m venv .venv
RUN source .venv/bin/activate
ENV PASH_SPEC_TOP=/srv/hs
ENV PASH_TOP=/srv/hs/deps/pash
# pash, try
COPY deps/ deps/
WORKDIR /srv/hs/deps/try
RUN make -C utils
RUN mv utils/try-commit /bin
RUN mv utils/try-summary /bin
WORKDIR /srv/hs/deps/pash
RUN ./scripts/setup-pash.sh
WORKDIR /srv/hs
RUN chmod +x entrypoint.sh
RUN python3 -m venv .venv
COPY . .
ENTRYPOINT ["/srv/hs/entrypoint.sh"]
Empty file modified entrypoint.sh
100644 → 100755
Empty file.
15 changes: 9 additions & 6 deletions parallel-orch/scheduler_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,6 @@
## A scheduler server
##

def handler(signum, frame):
logging.debug(f'Signal: {signum} caught')
shutdown()

signal.signal(signal.SIGTERM, handler)

def parse_args():
parser = argparse.ArgumentParser(add_help=False)
## TODO: Import the arguments so that they are not duplicated here and in orch
Expand Down Expand Up @@ -80,6 +74,12 @@ def __init__(self, socket_file, window):
self.waiting_for_response = {}
self.partial_program_order = None

def handler(signum, frame):
logging.debug(f'Signal: {signum} caught')
self.shutdown()

signal.signal(signal.SIGTERM, handler)

def handle_init(self, input_cmd: str):
assert(input_cmd.startswith("Init"))
partial_order_file = input_cmd.split(":")[1].rstrip()
Expand Down Expand Up @@ -215,6 +215,9 @@ def shutdown(self):
self.terminate_pending_commands()

def terminate_pending_commands(self):
if self.partial_program_order is None:
return

for node in self.partial_program_order.get_executing_normal_and_spec_nodes():
node.reset_to_ready()
# proc.terminate()
Expand Down
7 changes: 6 additions & 1 deletion pash-spec.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,12 @@ export PASH_SPEC_SCHEDULER_SOCKET="${PASH_SPEC_TMP_PREFIX}/scheduler_socket"

## TODO: Replace this with a call to pa.sh (which will start the scheduler on its own).
# python3 "$PASH_SPEC_TOP/parallel-orch/orch.py" "$@"
"$PASH_TOP/pa.sh" --speculative "$@"
if [ "$1" = "--python" ]; then
shift
"$PASH_SPEC_TOP/python_hs/entrypoint.sh" "$@"
else
"$PASH_TOP/pa.sh" --speculative "$@"
fi
EXITCODE=$?

if [ -w /sys/fs/cgroup/ ]; then
Expand Down
12 changes: 12 additions & 0 deletions python_hs/assumptions.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
## Correctness

- Ignores python file-state changes (ex: `open`)
- Ignores stderr
- Assumes sandboxed evaluation context cannot make changes to system.

## Performance

- Only straightline code plus for loops
- Statically known arguments to commands
- Only can speculate loops with statically known iterators
- Only open() with statements that do a simple redirect of a command
19 changes: 19 additions & 0 deletions python_hs/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/bash

set -eu

readonly FILE="$1"
shift

# point to the local downloaded folders
export PYTHONPATH="$PASH_TOP/python_pkgs/:$PASH_SPEC_TOP/python_hs/:${PYTHONPATH:-}"
# for start_server
export PASH_TMP_PREFIX="$(mktemp -d /tmp/pash_XXXXXXX)/"

source "$PASH_TOP/compiler/orchestrator_runtime/pash_init_setup.sh" --speculative
# sets daemon_pid
start_server "$@"
trap "cleanup_server $daemon_pid" EXIT INT TERM

python3 "$PASH_SPEC_TOP/python_hs/python_hs/entrypoint.py" "$FILE"

60 changes: 60 additions & 0 deletions python_hs/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
[build-system]
requires = ["uv_build>=0.8.11,<0.9.0"]
build-backend = "uv_build"

[project]
name = "python_hs"
version = "0.1.0"
description = "Python HS"
authors = [{name = "The PaSh Authors"}]
requires-python = ">=3.11"
dependencies = [] # besides hS and the pash deps

[project.scripts]
preprocessor = "python_hs.preprocessor:main"

[tool.uv.build-backend]
module-name = "python_hs"
module-root = ""

[tool.ruff]
line-length = 88
target-version = "py311"

[tool.ruff.lint]
select = [
"E",
"F",
"I",
"B",
"UP",
"C4",
"SIM",
"RUF",
]
ignore = [
"B011",
"E501",
]
exclude = ["report/benchmarks/*/**.py"]

[tool.ruff.format]
docstring-code-format = true
docstring-code-line-length = 72

[tool.pyright]
include = ["python_hs", "tests"]
typeCheckingMode = "basic"
pythonVersion = "3.11"

[tool.pytest.ini_options]
log_cli = true
log_cli_level = "DEBUG"

[dependency-groups]
dev = [
"pyright>=1.1.406",
# only for the test runner; tests are written with unittest
"pytest>=8.4.2",
"ruff>=0.14.0",
]
3 changes: 3 additions & 0 deletions python_hs/python-hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

exec "$(dirname -- "${BASH_SOURCE[0]}")"/../pash-spec.sh --python "$@"
3 changes: 3 additions & 0 deletions python_hs/python_hs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from python_hs.runtime import hs_run

__all__ = ["hs_run"]
21 changes: 21 additions & 0 deletions python_hs/python_hs/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import os
from pathlib import Path


def _get_path(env_var: str) -> Path | None:
try:
return Path(os.environ[env_var])
except KeyError:
return None


PASH_SPEC_TOP = Path(__file__).parent.parent.parent
assert PASH_SPEC_TOP.name == "hs"

PASH_TOP = PASH_SPEC_TOP / "deps" / "pash"
PYTHON_SPEC_TOP = PASH_SPEC_TOP / "python_hs"

RUNTIME_DIR = PASH_TOP / "compiler" / "orchestrator_runtime"

PASH_SPEC_SCHEDULER_SOCKET = _get_path("PASH_SPEC_SCHEDULER_SOCKET")
PASH_SPEC_TMP_PREFIX = _get_path("PASH_SPEC_TMP_PREFIX")
32 changes: 32 additions & 0 deletions python_hs/python_hs/entrypoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import argparse
import runpy
from pathlib import Path

from python_hs import logging_


def parse_args():
"""Parse command line arguments."""
parser = argparse.ArgumentParser()
parser.add_argument("file", help="Input file to speculate", type=Path)
parser.add_argument("--debug", action="store_true", help="Enable debug logging")
return parser.parse_args()


def main():
args = parse_args()
logging_.DEBUG = args.debug

from python_hs.constants import PASH_SPEC_TMP_PREFIX
from python_hs.preprocessor import preprocess_file
from python_hs.runtime import init_scheduler

assert PASH_SPEC_TMP_PREFIX is not None

preprocessed_file = preprocess_file(PASH_SPEC_TMP_PREFIX, args.file)
init_scheduler()
runpy.run_path(str(preprocessed_file))


if __name__ == "__main__":
main()
33 changes: 33 additions & 0 deletions python_hs/python_hs/logging_.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from __future__ import annotations

import logging
import sys

DEBUG = True


def setup_logger(name: str) -> logging.Logger:
"""Setup and configure a logger with consistent formatting.

Args:
name: Logger name.
Returns:
Configured logger instance.
"""
format_string = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
stream = sys.stderr
logger = logging.getLogger(name)

logger.handlers.clear()

handler = logging.StreamHandler(stream)
level = logging.DEBUG if DEBUG else logging.WARNING
handler.setLevel(level)
formatter = logging.Formatter(format_string)
handler.setFormatter(formatter)

logger.addHandler(handler)
logger.setLevel(level)
logger.debug("Logger initiated")

return logger
Loading
Loading