diff --git a/README.md b/README.md index e98c7bbd3330..44d4bc7b6b16 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,7 @@ Examples: - tools/[btrfsdist](tools/btrfsdist.py): Summarize btrfs operation latency distribution as a histogram. [Examples](tools/btrfsdist_example.txt). - tools/[btrfsslower](tools/btrfsslower.py): Trace slow btrfs operations. [Examples](tools/btrfsslower_example.txt). - tools/[cachestat](tools/cachestat.py): Trace page cache hit/miss ratio. [Examples](tools/cachestat_example.txt). +- tools/[cachetop](tools/cachetop.py): Trace page cache hit/miss ratio by processes. [Examples](tools/cachetop_example.txt). - tools/[cpudist](tools/cpudist.py): Summarize on- and off-CPU time per task as a histogram. [Examples](tools/cpudist_example.txt) - tools/[dcsnoop](tools/dcsnoop.py): Trace directory entry cache (dcache) lookups. [Examples](tools/dcsnoop_example.txt). - tools/[dcstat](tools/dcstat.py): Directory entry cache (dcache) stats. [Examples](tools/dcstat_example.txt). diff --git a/man/man8/cachetop.8 b/man/man8/cachetop.8 new file mode 100644 index 000000000000..5642fa1dc9aa --- /dev/null +++ b/man/man8/cachetop.8 @@ -0,0 +1,91 @@ +.TH cachetop 8 "2016-01-30" "USER COMMANDS" +.SH NAME +cachetop \- Statistics for linux page cache hit/miss ratios per processes. Uses Linux eBPF/bcc. +.SH SYNOPSIS +.B cachetop +[interval] +.SH DESCRIPTION +This traces four kernel functions and prints per-processes summaries every +\fBinterval\fR seconds. This can be useful for processes workload characterization, +and looking for patterns in operation usage over time. It provides a \fBtop\fR-like interface +which by default sorts by \fBHITS\fR in ascending order. + +This works by tracing kernel page cache functions using dynamic tracing, and will +need updating to match any changes to these functions. Edit the script to +customize which functions are traced. + +Since this uses BPF, only the root user can use this tool. +.SH KEYBINDINGS +The following keybindings can be used to control the output of \fBcachetop\fR. +.TP +.B < +Use the previous column for sorting. +.TP +.B > +Use the next column for sorting. +.TP +.B r +Toggle sorting order (default ascending). +.TP +.B q +Quit cachetop. +.SH REQUIREMENTS +CONFIG_BPF and bcc. +.SH EXAMPLES +.TP +Update summaries every five second: +# +.B cachetop +.TP +Print summaries each second: +# +.B cachetop 1 +.SH FIELDS +.TP +PID +Process ID of the process causing the cache activity. +.TP +UID +User ID of the process causing the cache activity. +.TP +HITS +Number of page cache hits. +.TP +MISSES +Number of page cache misses. +.TP +DIRTIES +Number of dirty pages added to the page cache. +.TP +READ_HIT% +Read hit percent of page cache usage. +.TP +WRITE_HIT% +Write hit percent of page cache usage. +.TP +BUFFERS_MB +Buffers size taken from /proc/meminfo. +.TP +CACHED_MB +Cached amount of data in current page cache taken from /proc/meminfo. +.SH OVERHEAD +This traces various kernel page cache functions and maintains in-kernel counts, which +are asynchronously copied to user-space. While the rate of operations can +be very high (>1G/sec) we can have up to 34% overhead, this is still a relatively efficient way to trace +these events, and so the overhead is expected to be small for normal workloads. +Measure in a test environment. +.SH SOURCE +This is from bcc. +.IP +https://github.com/iovisor/bcc +.PP +Also look in the bcc distribution for a companion _examples.txt file containing +example usage, output, and commentary for this tool. +.SH OS +Linux +.SH STABILITY +Unstable - in development. +.SH AUTHOR +Emmanuel Bretelle +.SH SEE ALSO +cachestat (8) diff --git a/src/cc/frontends/clang/loader.cc b/src/cc/frontends/clang/loader.cc index 4b0729df6b66..c7911ac326dd 100644 --- a/src/cc/frontends/clang/loader.cc +++ b/src/cc/frontends/clang/loader.cc @@ -99,9 +99,12 @@ int ClangLoader::parse(unique_ptr *mod, unique_ptr flags_cstr({"-O0", "-emit-llvm", "-I", dstack.cwd(), "-Wno-deprecated-declarations", "-Wno-gnu-variable-sized-type-not-at-end", + "-fno-color-diagnostics", "-x", "c", "-c", abs_file.c_str()}); KBuildHelper kbuild_helper(kdir); diff --git a/tools/cachetop.py b/tools/cachetop.py new file mode 100755 index 000000000000..fc57da03be7a --- /dev/null +++ b/tools/cachetop.py @@ -0,0 +1,260 @@ +#!/usr/bin/env python +# @lint-avoid-python-3-compatibility-imports +# +# cachetop Count cache kernel function calls per processes +# For Linux, uses BCC, eBPF. +# +# USAGE: cachetop +# Taken from cachestat by Brendan Gregg +# +# Copyright (c) 2016-present, Facebook, Inc. +# Licensed under the Apache License, Version 2.0 (the "License") +# +# 13-Jul-2016 Emmanuel Bretelle first version + +from __future__ import absolute_import +from __future__ import division +# Do not import unicode_literals until #623 is fixed +# from __future__ import unicode_literals +from __future__ import print_function + +from bcc import BPF +from collections import defaultdict +from time import strftime + +import argparse +import curses +import pwd +import re +import signal +from time import sleep + +FIELDS = ( + "PID", + "UID", + "CMD", + "HITS", + "MISSES", + "DIRTIES", + "READ_HIT%", + "WRITE_HIT%" +) +DEFAULT_FIELD = "HITS" + + +# signal handler +def signal_ignore(signal, frame): + print() + + +# Function to gather data from /proc/meminfo +# return dictionary for quicker lookup of both values +def get_meminfo(): + result = {} + + for line in open('/proc/meminfo'): + k = line.split(':', 3) + v = k[1].split() + result[k[0]] = int(v[0]) + return result + + +def get_processes_stats( + bpf, + sort_field=FIELDS.index(DEFAULT_FIELD), + sort_reverse=False): + ''' + Return a tuple containing: + buffer + cached + list of tuple with per process cache stats + ''' + rtaccess = 0 + wtaccess = 0 + mpa = 0 + mbd = 0 + apcl = 0 + apd = 0 + access = 0 + misses = 0 + rhits = 0 + whits = 0 + + counts = bpf.get_table("counts") + stats = defaultdict(lambda: defaultdict(int)) + for k, v in counts.items(): + stats["%d-%d-%s" % (k.pid, k.uid, k.comm)][k.ip] = v.value + stats_list = [] + + for pid, count in sorted(stats.items(), key=lambda stat: stat[0]): + for k, v in count.items(): + if re.match('mark_page_accessed', bpf.ksym(k)) is not None: + mpa = v + if mpa < 0: + mpa = 0 + + if re.match('mark_buffer_dirty', bpf.ksym(k)) is not None: + mbd = v + if mbd < 0: + mbd = 0 + + if re.match('add_to_page_cache_lru', bpf.ksym(k)) is not None: + apcl = v + if apcl < 0: + apcl = 0 + + if re.match('account_page_dirtied', bpf.ksym(k)) is not None: + apd = v + if apd < 0: + apd = 0 + + # access = total cache access incl. reads(mpa) and writes(mbd) + # misses = total of add to lru which we do when we write(mbd) + # and also the mark the page dirty(same as mbd) + access = (mpa + mbd) + misses = (apcl + apd) + + # rtaccess is the read hit % during the sample period. + # wtaccess is the write hit % during the smaple period. + if mpa > 0: + rtaccess = float(mpa) / (access + misses) + if apcl > 0: + wtaccess = float(apcl) / (access + misses) + + if wtaccess != 0: + whits = 100 * wtaccess + if rtaccess != 0: + rhits = 100 * rtaccess + + _pid, uid, comm = pid.split('-', 2) + stats_list.append( + (int(_pid), uid, comm, + access, misses, mbd, + rhits, whits)) + + stats_list = sorted( + stats_list, key=lambda stat: stat[sort_field], reverse=sort_reverse + ) + counts.clear() + return stats_list + + +def handle_loop(stdscr, args): + # don't wait on key press + stdscr.nodelay(1) + # set default sorting field + sort_field = FIELDS.index(DEFAULT_FIELD) + sort_reverse = False + + # load BPF program + bpf_text = """ + + #include + struct key_t { + u64 ip; + u32 pid; + u32 uid; + char comm[16]; + }; + + BPF_HASH(counts, struct key_t); + + int do_count(struct pt_regs *ctx) { + struct key_t key = {}; + u64 zero = 0 , *val; + u64 pid = bpf_get_current_pid_tgid(); + u32 uid = bpf_get_current_uid_gid(); + + key.ip = PT_REGS_IP(ctx); + key.pid = pid & 0xFFFFFFFF; + key.uid = uid & 0xFFFFFFFF; + bpf_get_current_comm(&(key.comm), 16); + + val = counts.lookup_or_init(&key, &zero); // update counter + (*val)++; + return 0; + } + + """ + b = BPF(text=bpf_text) + b.attach_kprobe(event="add_to_page_cache_lru", fn_name="do_count") + b.attach_kprobe(event="mark_page_accessed", fn_name="do_count") + b.attach_kprobe(event="account_page_dirtied", fn_name="do_count") + b.attach_kprobe(event="mark_buffer_dirty", fn_name="do_count") + + exiting = 0 + + while 1: + s = stdscr.getch() + if s == ord('q'): + exiting = 1 + elif s == ord('r'): + sort_reverse = not sort_reverse + elif s == ord('<'): + sort_field = max(0, sort_field - 1) + elif s == ord('>'): + sort_field = min(len(FIELDS) - 1, sort_field + 1) + try: + sleep(args.interval) + except KeyboardInterrupt: + exiting = 1 + # as cleanup can take many seconds, trap Ctrl-C: + signal.signal(signal.SIGINT, signal_ignore) + + # Get memory info + mem = get_meminfo() + cached = int(mem["Cached"]) / 1024 + buff = int(mem["Buffers"]) / 1024 + + process_stats = get_processes_stats( + b, + sort_field=sort_field, + sort_reverse=sort_reverse) + stdscr.clear() + stdscr.addstr( + 0, 0, + "%-8s Buffers MB: %.0f / Cached MB: %.0f" % ( + strftime("%H:%M:%S"), buff, cached + ) + ) + + # header + stdscr.addstr( + 1, 0, + "{0:8} {1:8} {2:16} {3:8} {4:8} {5:8} {6:10} {7:10}".format( + *FIELDS + ), + curses.A_REVERSE + ) + (height, width) = stdscr.getmaxyx() + for i, stat in enumerate(process_stats): + stdscr.addstr( + i + 2, 0, + "{0:8} {username:8.8} {2:16} {3:8} {4:8} " + "{5:8} {6:9.1f}% {7:9.1f}%".format( + *stat, username=pwd.getpwuid(int(stat[1]))[0] + ) + ) + if i > height - 4: + break + stdscr.refresh() + if exiting: + print("Detaching...") + return + + +def parse_arguments(): + parser = argparse.ArgumentParser( + description='show Linux page cache hit/miss statistics including read ' + 'and write hit % per processes in a UI like top.' + ) + parser.add_argument( + 'interval', type=int, default=5, nargs='?', + help='Interval between probes.' + ) + + args = parser.parse_args() + return args + +args = parse_arguments() +curses.wrapper(handle_loop, args) diff --git a/tools/cachetop_example.txt b/tools/cachetop_example.txt new file mode 100644 index 000000000000..13e56b4491b4 --- /dev/null +++ b/tools/cachetop_example.txt @@ -0,0 +1,70 @@ +# ./cachetop -h +usage: cachetop.py [-h] [interval] + +show Linux page cache hit/miss statistics including read and write hit % per +processes in a UI like top. + +positional arguments: + interval Interval between probes. + +optional arguments: + -h, --help show this help message and exit + +examples: + ./cachetop # run with default option of 5 seconds delay + ./cachetop 1 # print every second hit/miss stats + +# ./cachetop 5 +13:01:01 Buffers MB: 76 / Cached MB: 114 +PID UID CMD HITS MISSES DIRTIES READ_HIT% WRITE_HIT% + 1 root systemd 2 0 0 100.0% 0.0% + 680 root vminfo 3 4 2 14.3% 42.9% + 567 syslog rs:main Q:Reg 10 4 2 57.1% 21.4% + 986 root kworker/u2:2 10 2457 4 0.2% 99.5% + 988 root kworker/u2:2 10 9 4 31.6% 36.8% + 877 vagrant systemd 18 4 2 72.7% 13.6% + 983 root python 148 3 143 3.3% 1.3% + 981 root strace 419 3 143 65.4% 0.5% + 544 messageb dbus-daemon 455 371 454 0.1% 0.4% + 243 root jbd2/dm-0-8 457 371 454 0.4% 0.4% + 985 root (mount) 560 2457 4 18.4% 81.4% + 987 root systemd-udevd 566 9 4 97.7% 1.2% + 988 root systemd-cgroups 569 9 4 97.8% 1.2% + 986 root modprobe 578 9 4 97.8% 1.2% + 287 root systemd-journal 598 371 454 14.9% 0.3% + 985 root mount 692 2457 4 21.8% 78.0% + 984 vagrant find 9529 2457 4 79.5% 20.5% + +Above shows the run of `find /` on a newly booted system. + +Command used to generate the activity +# find / + +Below shows the hit rate increases as we run find a second time and it gets it +its pages from the cache. +# ./cachetop.py +13:01:01 Buffers MB: 76 / Cached MB: 115 +PID UID CMD HITS MISSES DIRTIES READ_HIT% WRITE_HIT% + 544 messageb dbus-daemon 2 2 1 25.0% 50.0% + 680 root vminfo 2 2 1 25.0% 50.0% + 243 root jbd2/dm-0-8 3 2 1 40.0% 40.0% + 1068 root python 5 0 0 100.0% 0.0% + 1071 vagrant bash 350 0 0 100.0% 0.0% + 1071 vagrant find 12959 0 0 100.0% 0.0% + + +Below shows that the dirty pages increases as a file of 80M is created running +# dd if=/dev/urandom of=/tmp/c bs=8192 count=10000 + +# ./cachetop.py 10 +13:01:01 Buffers MB: 77 / Cached MB: 193 +PID UID CMD HITS MISSES DIRTIES READ_HIT% WRITE_HIT% + 544 messageb dbus-daemon 9 10 7 10.5% 15.8% + 680 root vminfo 9 10 7 10.5% 15.8% + 1109 root python 22 0 0 100.0% 0.0% + 243 root jbd2/dm-0-8 25 10 7 51.4% 8.6% + 1070 root kworker/u2:2 85 0 0 100.0% 0.0% + 1110 vagrant bash 366 0 0 100.0% 0.0% + 1110 vagrant dd 42183 40000 20000 27.0% 24.3% + +The file copied into page cache was named /tmp/c with a size of 81920000 (81920000/4096) = 20000