From 57abe5b433e7aae3c60e7684c34de8bcf29b6244 Mon Sep 17 00:00:00 2001 From: unixtest Date: Sun, 31 Jan 2016 10:47:03 +0000 Subject: [PATCH] update with cachestat , squashed commit Added .txt to exmaple file to allow link to work duplicate file duplicate of .txt version Author: allan mcaleavy --- README.md | 1 + man/man8/cachestat.8 | 85 +++++++++++++++ tools/cachestat.py | 211 ++++++++++++++++++++++++++++++++++++ tools/cachestat_example.txt | 75 +++++++++++++ 4 files changed, 372 insertions(+) create mode 100644 man/man8/cachestat.8 create mode 100755 tools/cachestat.py create mode 100644 tools/cachestat_example.txt diff --git a/README.md b/README.md index 4db485151ba..ac6d249f0af 100644 --- a/README.md +++ b/README.md @@ -68,6 +68,7 @@ Tools: - tools/[bashreadline](tools/bashreadline.py): Print entered bash commands system wide. [Examples](tools/bashreadline_example.txt). - tools/[biolatency](tools/biolatency.py): Summarize block device I/O latency as a histogram. [Examples](tools/biolatency_example.txt). - tools/[biosnoop](tools/biosnoop.py): Trace block device I/O with PID and latency. [Examples](tools/biosnoop_example.txt). +- tools/[cachestat](tools/cachestat.py): Trace page cache hit/miss ratio. [Examples](tools/cachestat_example.txt). - tools/[funccount](tools/funccount.py): Count kernel function calls. [Examples](tools/funccount_example.txt). - tools/[funclatency](tools/funclatency.py): Time kernel functions and show their latency distribution. [Examples](tools/funclatency_example.txt). - tools/[gethostlatency](tools/gethostlatency.py): Show latency for getaddrinfo/gethostbyname[2] calls. [Examples](tools/gethostlatency_example.txt). diff --git a/man/man8/cachestat.8 b/man/man8/cachestat.8 new file mode 100644 index 00000000000..db7227c6a01 --- /dev/null +++ b/man/man8/cachestat.8 @@ -0,0 +1,85 @@ +.TH cachestat 8 "2016-01-30" "USER COMMANDS" +.SH NAME +cachestat \- Statistics for linux page cache hit/miss ratios. Uses Linux eBPF/bcc. +.SH SYNOPSIS +.B cachestat +[-T] [interval [count]] +.SH DESCRIPTION +This traces four kernel functions and prints per-second summaries. This can +be useful for general workload characterization, and looking for patterns +in operation usage over time. + +This works by tracing kernel page cache functions using dynamic tracing, and will +need updating to match any changes to these functions. Edit the script to +customize which functions are traced. + +Since this uses BPF, only the root user can use this tool. +.SH REQUIREMENTS +CONFIG_BPF and bcc. +.SH EXAMPLES +.TP +Print summaries every five second: +# +.B cachestat +.TP +Print summaries every five seconds with timestamp: +# +.B cachestat -T +.TP +Print summaries each second: +# +.B cachestat 1 +.TP +Print output every five seconds, three times: +# +.B cachestat 5 3 +.TP +Print output with timetsmap every five seconds, three times: +# +.B cachestat -T 5 3 +.SH FIELDS +.TP +TIME +Timestamp. +.TP +HITS +Number of page cache hits. +.TP +MISSES +Number of page cache misses. +.TP +DIRTIES +Number of dirty pages added to the page cache. +.TP +READ_HIT% +Read hit percent of page cache usage. +.TP +WRITE_HIT% +Write hit percent of page cache usage. +.TP +BUFFERS_MB +Buffers size taken from /proc/meminfo. +.TP +CACHED_MB +Cached amount of data in current page cache taken from /proc/meminfo. +.SH OVERHEAD +This traces various kernel page cache functions and maintains in-kernel counts, which +are asynchronously copied to user-space. While the rate of operations can +be very high (>1G/sec) we can have up to 34% overhead, this is still a relatively efficient way to trace +these events, and so the overhead is expected to be small for normal workloads. +Measure in a test environment. +.SH SOURCE +This is from bcc. +.IP +https://github.com/iovisor/bcc +.PP +Also look in the bcc distribution for a companion _examples.txt file containing +example usage, output, and commentary for this tool. +.SH OS +Linux +.SH STABILITY +Unstable - in development. +.SH AUTHOR +Allan McAleavy +.SH SEE ALSO +https://github.com/brendangregg/perf-tools/blob/master/fs/cachestat diff --git a/tools/cachestat.py b/tools/cachestat.py new file mode 100755 index 00000000000..71ee4953607 --- /dev/null +++ b/tools/cachestat.py @@ -0,0 +1,211 @@ +#!/usr/bin/python +# +# cachestat Count cache kernel function calls. +# For Linux, uses BCC, eBPF. See .c file. +# +# USAGE: cachestat +# Taken from funccount by Brendan Gregg +# This is a rewrite of cachestat from perf to bcc +# https://github.com/brendangregg/perf-tools/blob/master/fs/cachestat +# +# Copyright (c) 2016 Allan McAleavy. +# Copyright (c) 2015 Brendan Gregg. +# Licensed under the Apache License, Version 2.0 (the "License") +# +# 09-Sep-2015 Brendan Gregg Created this. +# 06-Nov-2015 Allan McAleavy +# 13-Jan-2016 Allan McAleavy run pep8 against program + +from __future__ import print_function +from bcc import BPF +from time import sleep, strftime +import signal +import re +from sys import argv + +# signal handler +def signal_ignore(signal, frame): + print() + +# Function to gather data from /proc/meminfo +# return dictionary for quicker lookup of both values +def get_meminfo(): + result = dict() + + for line in open('/proc/meminfo'): + k = line.split(':', 3) + v = k[1].split() + result[k[0]] = int(v[0]) + return result + +# set global variables +rtaccess = 0 +wtaccess = 0 +mpa = 0 +mbd = 0 +apcl = 0 +apd = 0 +access = 0 +misses = 0 +rhits = 0 +whits = 0 +debug = 0 + +# args +def usage(): + print("USAGE: %s [-T] [ interval [count] ]" % argv[0]) + exit() + +# arguments +interval = 5 +count = -1 +tstamp = 0 + +if len(argv) > 1: + if str(argv[1]) == '-T': + tstamp = 1 + +if len(argv) > 1 and tstamp == 0: + try: + if int(argv[1]) > 0: + interval = int(argv[1]) + if len(argv) > 2: + if int(argv[2]) > 0: + count = int(argv[2]) + except: + usage() + +elif len(argv) > 2 and tstamp == 1: + try: + if int(argv[2]) > 0: + interval = int(argv[2]) + if len(argv) >= 4: + if int(argv[3]) > 0: + count = int(argv[3]) + except: + usage() + +# load BPF program +bpf_text = """ + +#include +struct key_t { + u64 ip; +}; + +BPF_HASH(counts, struct key_t); + +int do_count(struct pt_regs *ctx) { + struct key_t key = {}; + u64 zero = 0, *val; + u64 ip; + + key.ip = ctx->ip; + val = counts.lookup_or_init(&key, &zero); // update counter + (*val)++; + return 0; +} + +""" +b = BPF(text=bpf_text) +b.attach_kprobe(event="add_to_page_cache_lru", fn_name="do_count") +b.attach_kprobe(event="mark_page_accessed", fn_name="do_count") +b.attach_kprobe(event="account_page_dirtied", fn_name="do_count") +b.attach_kprobe(event="mark_buffer_dirty", fn_name="do_count") + +# header +if tstamp: + print("%-8s " % "TIME", end="") +print("%8s %8s %8s %10s %10s %12s %10s" % + ("HITS", "MISSES", "DIRTIES", + "READ_HIT%", "WRITE_HIT%", "BUFFERS_MB", "CACHED_MB")) + +loop = 0 +exiting = 0 +while 1: + if count > 0: + loop += 1 + if loop > count: + exit() + + try: + sleep(interval) + except KeyboardInterrupt: + exiting = 1 + # as cleanup can take many seconds, trap Ctrl-C: + signal.signal(signal.SIGINT, signal_ignore) + + counts = b.get_table("counts") + for k, v in sorted(counts.items(), key=lambda counts: counts[1].value): + + if re.match('mark_page_accessed', b.ksym(k.ip)) is not None: + mpa = v.value + if mpa < 0: + mpa = 0 + + if re.match('mark_buffer_dirty', b.ksym(k.ip)) is not None: + mbd = v.value + if mbd < 0: + mbd = 0 + + if re.match('add_to_page_cache_lru', b.ksym(k.ip)) is not None: + apcl = v.value + if apcl < 0: + apcl = 0 + + if re.match('account_page_dirtied', b.ksym(k.ip)) is not None: + apd = v.value + if apd < 0: + apd = 0 + + # access = total cache access incl. reads(mpa) and writes(mbd) + # misses = total of add to lru which we do when we write(mbd) + # and also the mark the page dirty(same as mbd) + access = (mpa + mbd) + misses = (apcl + apd) + + # rtaccess is the read hit % during the sample period. + # wtaccess is the write hit % during the smaple period. + if mpa > 0: + rtaccess = float(mpa) / (access + misses) + if apcl > 0: + wtaccess = float(apcl) / (access + misses) + + if wtaccess != 0: + whits = 100 * wtaccess + if rtaccess != 0: + rhits = 100 * rtaccess + + if debug: + print("%d %d %d %d %d %d %f %f %d %d\n" % + (mpa, mbd, apcl, apd, access, misses, + rtaccess, wtaccess, rhits, whits)) + + counts.clear() + + # Get memory info + mem = get_meminfo() + cached = int(mem["Cached"]) / 1024 + buff = int(mem["Buffers"]) / 1024 + + if tstamp == 1: + print("%-8s " % strftime("%H:%M:%S"), end="") + print("%8d %8d %8d %9.1f%% %9.1f%% %12.0f %10.0f" % + (access, misses, mbd, rhits, whits, buff, cached)) + + mpa = 0 + mbd = 0 + apcl = 0 + apd = 0 + access = 0 + misses = 0 + rhits = 0 + cached = 0 + buff = 0 + whits = 0 + rtaccess = 0 + wtaccess = 0 + + if exiting: + print("Detaching...") + exit() diff --git a/tools/cachestat_example.txt b/tools/cachestat_example.txt new file mode 100644 index 00000000000..6cebc7b6ee2 --- /dev/null +++ b/tools/cachestat_example.txt @@ -0,0 +1,75 @@ +# ./cachestat -h +USAGE: ./cachestat [-T] [ interval [count] ] + +show Linux page cache hit/miss statistics including read and write hit % + +optional arguments: + -T include timestamp on output + +examples: + ./cachestat # run with default option of 5 seconds delay + ./cachestat -T # run with default option of 5 seconds delay with timestamps + ./cachestat 1 # print every second hit/miss stats + ./cachestat -T 1 # include timestamps with one second samples + ./cachestat 1 5 # run with interval of one second for five iterations + ./cachestat -T 1 5 # include timestamps with interval of one second for five iterations + + +# ./cachestat 1 + HITS MISSES DIRTIES READ_HIT% WRITE_HIT% BUFFERS_MB CACHED_MB + 0 58 0 0.0% 100.0% 0 11334 + 146113 0 0 100.0% 0.0% 0 11334 + 244143 0 0 100.0% 0.0% 0 11334 + 216833 0 0 100.0% 0.0% 0 11334 + 248209 0 0 100.0% 0.0% 0 11334 + 205825 0 0 100.0% 0.0% 0 11334 + 286654 0 0 100.0% 0.0% 0 11334 + 275850 0 0 100.0% 0.0% 0 11334 + 272883 0 0 100.0% 0.0% 0 11334 + 261633 0 0 100.0% 0.0% 0 11334 + 252826 0 0 100.0% 0.0% 0 11334 + 235253 70 3 100.0% 0.0% 0 11335 + 204946 0 0 100.0% 0.0% 0 11335 + 0 0 0 0.0% 0.0% 0 11335 + 0 0 0 0.0% 0.0% 0 11335 + 0 0 0 0.0% 0.0% 0 11335 + +Above shows the reading of a 12GB file already cached in the OS page cache and again below with timestamps. + +# ./cachestat -T 1 +TIME HITS MISSES DIRTIES READ_HIT% WRITE_HIT% BUFFERS_MB CACHED_MB +16:07:10 0 0 0 0.0% 0.0% 0 11336 +16:07:11 0 0 0 0.0% 0.0% 0 11336 +16:07:12 117849 0 0 100.0% 0.0% 0 11336 +16:07:13 212558 0 0 100.0% 0.0% 0 11336 +16:07:14 302559 1 0 100.0% 0.0% 0 11336 +16:07:15 309230 0 0 100.0% 0.0% 0 11336 +16:07:16 305701 0 0 100.0% 0.0% 0 11336 +16:07:17 312754 0 0 100.0% 0.0% 0 11336 +16:07:18 308406 0 0 100.0% 0.0% 0 11336 +16:07:19 298185 0 0 100.0% 0.0% 0 11336 +16:07:20 236128 0 0 100.0% 0.0% 0 11336 +16:07:21 257616 0 0 100.0% 0.0% 0 11336 +16:07:22 179792 0 0 100.0% 0.0% 0 11336 + +Command used to generate the activity +# dd if=/root/mnt2/testfile of=/dev/null bs=8192 +1442795+0 records in +1442795+0 records out +11819376640 bytes (12 GB) copied, 3.9301 s, 3.0 GB/s + +Below shows the dirty ratio increasing as we add a file to the page cache +# ./cachestat.py + HITS MISSES DIRTIES READ_HIT% WRITE_HIT% BUFFERS_MB CACHED_MB + 1074 44 13 94.9% 2.9% 1 223 + 2195 170 8 92.5% 6.8% 1 143 + 182 53 56 53.6% 1.3% 1 143 + 62480 40960 20480 40.6% 19.8% 1 223 + 7 2 5 22.2% 22.2% 1 223 + 348 0 0 100.0% 0.0% 1 223 + 0 0 0 0.0% 0.0% 1 223 + 0 0 0 0.0% 0.0% 1 223 + 0 0 0 0.0% 0.0% 1 223 + 0 0 0 0.0% 0.0% 1 223 + +The file copied into page cache was named 80m with a size of 83886080 (83886080/4096) = 20480, this matches what we see under dirties