Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce timing overhead #273

Merged
merged 30 commits into from
Aug 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
8431793
WIP performance work
joerick Oct 12, 2023
4acef86
Add some WIP timing thread work
joerick Oct 12, 2023
b09ac14
Lower-overhead timing - support CLOCK_MONOTONIC_COARSE and timing thread
joerick Oct 14, 2023
7530139
Fix imports for linux
joerick Oct 14, 2023
c37e508
Fix an issue with the call into setstatprofile
joerick Oct 14, 2023
0d250ab
Fix ctypes issue on linux
joerick Oct 15, 2023
ab3d160
Add Python.h import for MS_WINDOWS macro define
joerick Oct 15, 2023
49cb6b3
Add dllexport tags to the header files for windows ctypes access
joerick Oct 16, 2023
34a32d9
More compatible header file syntax
joerick Oct 16, 2023
133d7bf
make test more resilient to failures
joerick Oct 16, 2023
ea0b7de
Longer wait times for windows
joerick Oct 27, 2023
8f1cc18
Hook the timer thread into the main program
joerick Oct 28, 2023
a7d1d06
Merge remote-tracking branch 'origin/main' into perf
joerick Oct 28, 2023
b2f91f7
WIP python implementation
joerick Nov 8, 2023
db2b185
Add python impl of timing thread
joerick Nov 26, 2023
fef3ff9
Try to work around emulated test weirdness
joerick Nov 26, 2023
e572e84
Merge remote-tracking branch 'origin/main' into perf
joerick Jul 30, 2024
c562e88
Merge remote-tracking branch 'origin/main' into perf
joerick Jul 30, 2024
c409581
add back 3.13 fix lost in merge
joerick Jul 30, 2024
14fc429
nox fixes for 3.13
joerick Jul 30, 2024
e005693
Add code to detect high-overhead timers and warn users, with actionab…
joerick Jul 31, 2024
8257274
Fix compile on linux
joerick Jul 31, 2024
ad234a0
uv-friendly nox setup
joerick Jul 31, 2024
2d607b5
fix linux compile
joerick Jul 31, 2024
f5c96a1
Remove some warnings on windows compilers
joerick Jul 31, 2024
a3fddd1
Define a windows shim
joerick Jul 31, 2024
4c63068
Import fixes
joerick Jul 31, 2024
af3ae54
Fix extra output
joerick Jul 31, 2024
c20ec44
Emulated tests and multithreading is strange
joerick Jul 31, 2024
fbc1f18
Fix missing floatclock-type param
joerick Aug 1, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ jobs:
- name: Install dependencies and test
run: |
docker run --rm -v ${{ github.workspace }}:/ws:rw --workdir=/ws \
-e QEMU_EMULATED=1 \
${{ env.img }} \
bash -exc '${{ env.py }} -m venv .env && \
source .env/bin/activate && \
Expand Down
50 changes: 50 additions & 0 deletions examples/sympy_perf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# All right, here is a reproducer (sympy 1.12, pyinstrument 4.5.3, Python 3.11.5).
# With python sympy_instrument.py, prints This took 0:00:00.636278
# With pyinstrument sympy_instrument.py, prints This took 0:00:12.355938

from datetime import datetime

from sympy import FF, Poly, Rational, symbols, sympify # type: ignore


def do_thing():
# Some elliptic curve crypto stuff that is not important
field = 0xFFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFF
params = {
"a": 0xFFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFC,
"b": 0x5AC635D8AA3A93E7B3EBBD55769886BC651D06B0CC53B0F63BCE3C3E27D2604B,
}
k = FF(field)
expr = sympify(f"3*b - b3", evaluate=False)
for curve_param, value in params.items():
expr = expr.subs(curve_param, k(value))
param = str(expr.free_symbols.pop())

def resolve(expression, k):
if not expression.args:
return expression
args = []
for arg in expression.args:
if isinstance(arg, Rational):
a = arg.p
b = arg.q
res = k(a) / k(b)
else:
res = resolve(arg, k)
args.append(res)
return expression.func(*args)

expr = resolve(expr, k)
poly = Poly(expr, symbols(param), domain=k)
roots = poly.ground_roots()
for root in roots:
params[param] = int(root)
break


if __name__ == "__main__":
start = datetime.now()
for _ in range(1000):
do_thing()
end = datetime.now()
print("This took", end - start)
33 changes: 33 additions & 0 deletions metrics/count_samples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import time

import pyinstrument


def do_nothing():
pass


def busy_wait(duration: float):
start = time.time()
while time.time() - start < duration:
do_nothing()


def count_samples(duration: float, interval: float, use_timing_thread: bool):
profiler = pyinstrument.Profiler(interval=interval, use_timing_thread=use_timing_thread)
profiler.start()
busy_wait(duration)
session = profiler.stop()

reference = duration / interval
sample_count = len(session.frame_records)
print(f"Interval: {interval}, use_timing_thread: {use_timing_thread}")
print(
f"Expected samples: {reference}, actual samples: {sample_count}, {sample_count / reference:.2f}x"
)


count_samples(0.1, 0.001, False)
count_samples(0.1, 0.001, True)
count_samples(0.1, 0.0001, False)
count_samples(0.1, 0.0001, True)
9 changes: 8 additions & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
import os

import nox

nox.needs_version = ">=2024.4.15"
nox.options.default_venv_backend = "uv|virtualenv"


@nox.session(python=["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"])
def test(session):
session.install("-e", ".[test]")
session.env["UV_PRERELEASE"] = "allow"
session.install("-e", ".[test]", "setuptools")
session.run("python", "setup.py", "build_ext", "--inplace")
session.run("pytest")
18 changes: 17 additions & 1 deletion pyinstrument/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,16 @@ def store_and_consume_remaining(
),
default=0.001,
)
parser.add_option(
"",
"--use-timing-thread",
dest="use_timing_thread",
action="store_true",
help=(
"Use a separate thread to time the interval between stack samples. "
"This can reduce the overhead of sampling on some systems."
),
)

# parse the options

Expand Down Expand Up @@ -359,7 +369,12 @@ def store_and_consume_remaining(
# because it will always be capturing the whole program, we never want
# any execution to be <out-of-context>, and it avoids duplicate
# profiler errors.
profiler = Profiler(interval=options.interval, async_mode="disabled")
profiler = Profiler(
interval=options.interval,
async_mode="disabled",
use_timing_thread=options.use_timing_thread,
)

profiler.start()

try:
Expand Down Expand Up @@ -607,6 +622,7 @@ class CommandLineOptions:
renderer: str | None
timeline: bool
interval: float
use_timing_thread: bool | None


class ValueWithRemainingArgs:
Expand Down
102 changes: 102 additions & 0 deletions pyinstrument/low_level/pyi_floatclock.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#include "pyi_floatclock.h"

#include <Python.h>
#include <time.h> // gettimeofday, clock()
#include <float.h> // DBL_MAX


/*
The windows implementations mostly stolen from timemodule.c
*/

#if defined(MS_WINDOWS) && !defined(__BORLANDC__)
#include <windows.h>

double pyi_monotonic_coarse_resolution(void)
{
return DBL_MAX;
}

/* use QueryPerformanceCounter on Windows */

double pyi_floatclock(PYIFloatClockType timer)
{
if (timer == PYI_FLOATCLOCK_MONOTONIC_COARSE) {
warn_once("CLOCK_MONOTONIC_COARSE not available on this system.");
}
static LARGE_INTEGER ctrStart;
static double divisor = 0.0;
LARGE_INTEGER now;
double diff;

if (divisor == 0.0) {
LARGE_INTEGER freq;
QueryPerformanceCounter(&ctrStart);
if (!QueryPerformanceFrequency(&freq) || freq.QuadPart == 0) {
/* Unlikely to happen - this works on all intel
machines at least! Revert to clock() */
return ((double)clock()) / CLOCKS_PER_SEC;
}
divisor = (double)freq.QuadPart;
}
QueryPerformanceCounter(&now);
diff = (double)(now.QuadPart - ctrStart.QuadPart);
return diff / divisor;
}

#else /* !MS_WINDOWS */

#include <unistd.h>
#include <sys/time.h> // clock_gettime

static double SEC_PER_NSEC = 1e-9;
static double SEC_PER_USEC = 1e-6;

double pyi_monotonic_coarse_resolution(void)
{
#ifdef CLOCK_MONOTONIC_COARSE
static double resolution = -1;
if (resolution == -1) {
struct timespec res;
int success = clock_getres(CLOCK_MONOTONIC_COARSE, &res);
if (success == 0) {
resolution = res.tv_sec + res.tv_nsec * SEC_PER_NSEC;
} else {
// clock_getres failed, so let's set the resolution to something
// so this timer is never used.
resolution = DBL_MAX;
}
}
return resolution;
#else
return DBL_MAX;
#endif
}

double pyi_floatclock(PYIFloatClockType timer)
{
// gets the current time in seconds, as quickly as possible.
#ifdef _POSIX_TIMERS
struct timespec t;
int res;
if (timer == PYI_FLOATCLOCK_MONOTONIC_COARSE) {
# ifdef CLOCK_MONOTONIC_COARSE
res = clock_gettime(CLOCK_MONOTONIC_COARSE, &t);
if (res == 0) return t.tv_sec + t.tv_nsec * SEC_PER_NSEC;
# else
warn_once("CLOCK_MONOTONIC_COARSE not available on this system.");
# endif
}
# ifdef CLOCK_MONOTONIC
res = clock_gettime(CLOCK_MONOTONIC, &t);
if (res == 0) return t.tv_sec + t.tv_nsec * SEC_PER_NSEC;
# endif
res = clock_gettime(CLOCK_REALTIME, &t);
if (res == 0) return t.tv_sec + t.tv_nsec * SEC_PER_NSEC;
#endif
struct timeval tv;
gettimeofday(&tv, (struct timezone *)NULL);
return (double)tv.tv_sec + tv.tv_usec * SEC_PER_USEC;
}

#endif /* MS_WINDOWS */
15 changes: 15 additions & 0 deletions pyinstrument/low_level/pyi_floatclock.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#ifndef PYI_FLOATCLOCK_H
#define PYI_FLOATCLOCK_H

#include <Python.h>
#include "pyi_shared.h"

typedef enum {
PYI_FLOATCLOCK_DEFAULT = 0,
PYI_FLOATCLOCK_MONOTONIC_COARSE = 1,
} PYIFloatClockType;

Py_EXPORTED_SYMBOL double pyi_monotonic_coarse_resolution(void);
Py_EXPORTED_SYMBOL double pyi_floatclock(PYIFloatClockType timer);

#endif
32 changes: 32 additions & 0 deletions pyinstrument/low_level/pyi_shared.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#ifndef PYI_SHARED_H
#define PYI_SHARED_H

#include <Python.h>
#include <stdio.h>

#ifndef __has_attribute
# define __has_attribute(x) 0 // Compatibility with non-clang compilers.
#endif

// Define Py_EXPORTED_SYMBOL to be the appropriate symbol for exporting, it's not set in Python 3.8.
#ifndef Py_EXPORTED_SYMBOL
# if defined(_WIN32) || defined(__CYGWIN__)
# define Py_EXPORTED_SYMBOL __declspec(dllexport)
# elif (defined(__GNUC__) && (__GNUC__ >= 4)) ||\
(defined(__clang__) && __has_attribute(visibility))
# define Py_EXPORTED_SYMBOL __attribute__ ((visibility ("default")))
# else
# define Py_EXPORTED_SYMBOL
# endif
#endif

#define warn_once(msg) \
do { \
static int warned = 0; \
if (!warned) { \
fprintf(stderr, "pyinstrument: %s\n", msg); \
warned = 1; \
} \
} while (0)

#endif /* PYI_SHARED_H */
Loading
Loading