-
Notifications
You must be signed in to change notification settings - Fork 9
/
ahas-kernel-3.10.yaml
580 lines (508 loc) · 17.2 KB
/
ahas-kernel-3.10.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
programs:
# See:
# * https://github.com/iovisor/bcc/blob/master/tools/biolatency.py
# * https://github.com/iovisor/bcc/blob/master/tools/biolatency_example.txt
#
# See also: bio-tracepoints.yaml
- name: bio
metrics:
histograms:
- name: bio_latency_seconds
help: Block IO latency histogram
table: io_latency
bucket_type: exp2
bucket_min: 0
bucket_max: 26
bucket_multiplier: 0.000001 # microseconds to seconds
labels:
- name: app_namespace
size: 8
reuse: true
decoders:
- name: kube_podnamespace
- name: app_container
size: 8
reuse: false
decoders:
- name: kube_containername
- name: operation
size: 8
reuse: false
decoders:
- name: uint
- name: static_map
static_map:
1: read
2: write
- name: device
size: 32
reuse: false
decoders:
- name: string
- name: bucket
size: 8
reuse: false
decoders:
- name: uint
- name: bio_size_bytes
help: Block IO size histogram with kibibyte buckets
table: io_size
bucket_type: exp2
bucket_min: 0
bucket_max: 15
bucket_multiplier: 1024 # kibibytes to bytes
labels:
- name: app_namespace
size: 8
reuse: true
decoders:
- name: kube_podnamespace
- name: app_container
size: 8
reuse: false
decoders:
- name: kube_containername
- name: operation
size: 8
reuse: false
decoders:
- name: uint
- name: static_map
static_map:
1: read
2: write
- name: device
size: 32
reuse: false
decoders:
- name: string
- name: bucket
size: 8
reuse: false
decoders:
- name: uint
kprobes:
blk_start_request: trace_req_start
blk_mq_start_request: trace_req_start
blk_account_io_completion: trace_req_completion
code: |
#include <linux/blkdev.h>
#include <linux/blk_types.h>
#ifndef REQ_OP_WRITE
#define REQ_OP_WRITE REQ_WRITE
#endif
typedef struct disk_key {
u64 pid;
u64 op;
char disk[32];
u64 slot;
} disk_key_t;
// Max number of disks we expect to see on the host
const u8 max_disks = 255;
// 27 buckets for latency, max range is 33.6s .. 67.1s
const u8 max_latency_slot = 26;
// 16 buckets per disk in kib, max range is 16mib .. 32mib
const u8 max_size_slot = 15;
// Hash to temporily hold the start time of each bio request, max 10k in-flight by default
BPF_HASH(start, struct request *);
// Histograms to record latencies
BPF_HISTOGRAM(io_latency, disk_key_t, (max_latency_slot + 2) * max_disks);
// Histograms to record sizes
BPF_HISTOGRAM(io_size, disk_key_t, (max_size_slot + 2) * max_disks);
// Record start time of a request
int trace_req_start(struct pt_regs *ctx, struct request *req) {
u64 ts = bpf_ktime_get_ns();
start.update(&req, &ts);
return 0;
}
// Calculate request duration and store in appropriate histogram bucket
int trace_req_completion(struct pt_regs *ctx, struct request *req, unsigned int bytes) {
u64 *tsp, delta;
// Fetch timestamp and calculate delta
tsp = start.lookup(&req);
if (tsp == 0) {
return 0; // missed issue
}
// There are write request with zero length on sector zero,
// which do not seem to be real writes to device.
if (req->__sector == 0 && req->__data_len == 0) {
start.delete(&req);
return 0;
}
// Disk that received the request
struct gendisk *disk = req->rq_disk;
// Delta in nanoseconds
delta = bpf_ktime_get_ns() - *tsp;
// Skip entries with backwards time: temp workaround for https://github.com/iovisor/bcc/issues/728
if ((s64) delta < 0) {
start.delete(&req);
return 0;
}
// Convert to microseconds
delta /= 1000;
// Latency histogram key
u64 latency_slot = bpf_log2l(delta);
// Cap latency bucket at max value
if (latency_slot > max_latency_slot) {
latency_slot = max_latency_slot;
}
u64 pid = bpf_get_current_pid_tgid() >> 32;
disk_key_t latency_key = { .pid = pid, .slot = latency_slot };
bpf_probe_read(&latency_key.disk, sizeof(latency_key.disk), &disk->disk_name);
// Size in kibibytes
u64 size_kib = bytes / 1024;
// Request size histogram key
u64 size_slot = bpf_log2(size_kib);
// Cap latency bucket at max value
if (size_slot > max_size_slot) {
size_slot = max_size_slot;
}
disk_key_t size_key = { .pid = pid, .slot = size_slot };
bpf_probe_read(&size_key.disk, sizeof(size_key.disk), &disk->disk_name);
if ((req->cmd_flags & REQ_COMMON_MASK) == REQ_OP_WRITE) {
latency_key.op = 2;
size_key.op = 2;
} else {
latency_key.op = 1;
size_key.op = 1;
}
io_latency.increment(latency_key);
io_size.increment(size_key);
// Increment sum keys
latency_key.slot = max_latency_slot + 1;
io_latency.increment(latency_key, delta);
size_key.slot = max_size_slot + 1;
io_size.increment(size_key, size_kib);
start.delete(&req);
return 0;
}
# Count EADDRINUSE errors, that can be triggered by either error
# or by running out of free sockets on the machine.
- name: tcpconnecterror
sink_root: /ahas-workspace/data/ahas/ahas-agent/ebpf-exporter/data
metrics:
counters:
- name: tcp_connect_error_total
help: Calls resulted in EADDRINUSE
table: counts
sink_mode: 2
labels:
- name: app_namespace
size: 8
reuse: true
decoders:
- name: kube_podnamespace
- name: app_container
size: 8
reuse: false
decoders:
- name: kube_containername
- name: function
size: 8
reuse: false
decoders:
- name: uint
- name: static_map
static_map:
1: bind_erruse
2: connect_erruse
3: connect_timeout
4: connect_refuse
- name: command
size: 128
reuse: false
decoders:
- name: string
tracepoints:
syscalls:sys_exit_bind: tracepoint__syscalls__sys_exit_bind
syscalls:sys_exit_connect: tracepoint__syscalls__sys_exit_connect
code: |
#include <linux/errno.h>
enum stats {
S_BIND = 1,
S_CONNECT = 2,
S_CONNECT_TIMEOUT = 3,
S_CONNECT_REFUSED = 4,
};
struct key_t {
u64 pid;
u64 op;
char comm[128];
};
BPF_HASH(counts, struct key_t);
static int count_bind_return(int ret) {
if (ret == -EADDRINUSE) {
struct key_t key = { .op = S_BIND };
bpf_get_current_comm(&key.comm, sizeof(key.comm));
u64 pid = bpf_get_current_pid_tgid() >> 32;
key.pid = pid;
counts.increment(key);
}
return 0;
}
static int count_connect_return(int ret) {
u64 pid = bpf_get_current_pid_tgid() >> 32;
struct key_t key = { .pid = pid};
bpf_get_current_comm(&key.comm, sizeof(key.comm));
if (ret == -EADDRINUSE) {
key.op = S_CONNECT;
counts.increment(key);
}
if (ret == -ETIMEDOUT) {
key.op = S_CONNECT_TIMEOUT;
counts.increment(key);
}
if (ret == -ECONNREFUSED) {
key.op = S_CONNECT_REFUSED;
counts.increment(key);
}
return 0;
}
// Generates function tracepoint__syscalls__sys_exit_bind
TRACEPOINT_PROBE(syscalls, sys_exit_bind) {
return count_bind_return(args->ret);
}
// Generates function tracepoint__syscalls__sys_exit_connect
TRACEPOINT_PROBE(syscalls, sys_exit_connect) {
return count_connect_return(args->ret);
}
- name: tcpconnectinfo
sink_root: /ahas-workspace/data/ahas/ahas-agent/ebpf-exporter/data
metrics:
histograms:
- name: tcp_connect_latency_seconds
help: Tcp connect latency histogram
table: tcp_connect_latency
bucket_type: exp2
bucket_min: 0
bucket_max: 26
bucket_multiplier: 0.000001 # microseconds to seconds
labels:
- name: app_namespace
size: 4
reuse: true
decoders:
- name: kube_podnamespace
- name: app_container
size: 4
reuse: false
decoders:
- name: kube_containername
- name: subnet
size: 4
reuse: false
decoders:
- name: inet_ip
- name: bucket
size: 8
reuse: false
decoders:
- name: uint
counters:
- name: tcp_connect_total
help: Tcp connect counter
table: tcp_connect_total
sink_mode: 2
labels:
- name: app_namespace
size: 8
reuse: true
decoders:
- name: kube_podnamespace
- name: app_container
size: 8
reuse: true
decoders:
- name: kube_containername
- name: app_pid
size: 8
reuse: false
decoders:
- name: uint
- name: conn_dst_addr
size: 4
reuse: false
decoders:
- name: inet_ip
- name: conn_dst_port
size: 4
reuse: false
decoders:
- name: uint
kprobes:
tcp_v4_connect: trace_connect
tcp_v6_connect: trace_connect
tcp_rcv_state_process: trace_tcp_rcv_state_process
code: |
#include <uapi/linux/ptrace.h>
#include <net/sock.h>
#include <net/tcp_states.h>
#include <bcc/proto.h>
typedef struct pid_key {
u32 pid;
u32 addr;
u64 slot;
} pid_key_t;
typedef struct counter_key {
u64 pid;
u32 addr;
u32 port;
} counter_key_t;
// Max number of subnets
const u8 max_subnets = 255;
// 27 buckets for latency, max range is 33.6s .. 67.1s
const u8 max_latency_slot = 26;
// Histograms to record latencies
BPF_HISTOGRAM(tcp_connect_latency, pid_key_t, (max_latency_slot + 2) * max_subnets);
// Counter to record tcpu connect
BPF_HASH(tcp_connect_total, counter_key_t);
BPF_HASH(start, struct sock *, u64);
int trace_connect(struct pt_regs *ctx, struct sock *sk) {
u64 ts = bpf_ktime_get_ns();
start.update(&sk, &ts);
return 0;
}
// Calculate latency
int trace_tcp_rcv_state_process(struct pt_regs *ctx, struct sock *skp) {
// will be in TCP_SYN_SENT for handshake
if (skp->__sk_common.skc_state != TCP_SYN_SENT)
return 0;
// check start and calculate delta
u64 *tsp = start.lookup(&skp);
if (tsp == 0) {
return 0; // missed entry or filtered
}
// Latency in microseconds
u64 latency_us = bpf_ktime_get_ns() - *tsp;
// Skip entries with backwards time: temp workaround for https://github.com/iovisor/bcc/issues/728
if ((s64) latency_us < 0) {
start.delete(&skp);
return 0;
}
// Convert to microseconds
latency_us /= 1000;
// Latency histogram key
u64 latency_slot = bpf_log2l(latency_us);
// Cap latency bucket at max value
if (latency_slot > max_latency_slot) {
latency_slot = max_latency_slot;
}
u64 id = bpf_get_current_pid_tgid();
u32 pid = id >> 32; // PID is higher part
pid_key_t latency_key = { .pid = pid, .slot = latency_slot };
counter_key_t counter_key = { .pid = pid};
u32 daddr = 0;
u32 dport = 0;
u32 addr = 0;
u32 port = 0;
u32 subnet = 0;
u16 family = 0;
family = skp->__sk_common.skc_family;
if (family == AF_INET) {
daddr = skp->__sk_common.skc_daddr;
dport = skp->__sk_common.skc_dport;
addr = daddr;
port = ntohl(dport) >> 16;
subnet = daddr & 0xFFFF;
}
latency_key.addr = subnet;
counter_key.addr = addr;
counter_key.port = port;
// Increment counter key
tcp_connect_total.increment(counter_key);
// Increment bucket key
tcp_connect_latency.increment(latency_key);
// Increment sum key
latency_key.slot = max_latency_slot + 1;
tcp_connect_latency.increment(latency_key, latency_us);
// Remove enqueued task
start.delete(&skp);
return 0;
}
- name: drsnoop
metrics:
histograms:
- name: direct_reclaim_latency_seconds
help: Direct reclaim memory latency histogram
table: direct_reclaim_latency
bucket_type: exp2
bucket_min: 0
bucket_max: 26
bucket_multiplier: 0.000001 # microseconds to seconds
labels:
- name: app_namespace
size: 8
reuse: true
decoders:
- name: kube_podnamespace
- name: app_container
size: 8
reuse: false
decoders:
- name: kube_containername
- name: bucket
size: 8
reuse: false
decoders:
- name: uint
tracepoints:
vmscan:mm_vmscan_direct_reclaim_begin: tracepoint__vmscan__mm_vmscan_direct_reclaim_begin
vmscan:mm_vmscan_direct_reclaim_end: tracepoint__vmscan__mm_vmscan_direct_reclaim_end
code: |
#include <uapi/linux/ptrace.h>
#include <linux/sched.h>
#include <linux/mmzone.h>
typedef struct pid_key {
u64 pid;
u64 slot;
} pid_key_t;
// 27 buckets for latency, max range is 33.6s .. 67.1s
const u8 max_latency_slot = 26;
// Histograms to record latencies
BPF_HISTOGRAM(direct_reclaim_latency, pid_key_t, max_latency_slot + 2);
struct val_t {
u64 id;
u64 ts; // start time
};
BPF_HASH(start, u64, struct val_t);
TRACEPOINT_PROBE(vmscan, mm_vmscan_direct_reclaim_begin) {
u64 id = bpf_get_current_pid_tgid();
struct val_t val = {.id = id};
val.ts = bpf_ktime_get_ns();
start.update(&id, &val);
return 0;
}
TRACEPOINT_PROBE(vmscan, mm_vmscan_direct_reclaim_end) {
u64 id = bpf_get_current_pid_tgid();
u64 pid = id >> 32; // PID is higher part
struct val_t *valp;
u64 ts = bpf_ktime_get_ns();
valp = start.lookup(&id);
if (valp == NULL) {
// missed entry
return 0;
}
// Latency in microseconds
u64 latency_us = bpf_ktime_get_ns() - valp->ts;
// Skip entries with backwards time: temp workaround for https://github.com/iovisor/bcc/issues/728
if ((s64) latency_us < 0) {
start.delete(&id);
return 0;
}
// Convert to microseconds
latency_us /= 1000;
// Latency histogram key
u64 latency_slot = bpf_log2l(latency_us);
// Cap latency bucket at max value
if (latency_slot > max_latency_slot) {
latency_slot = max_latency_slot;
}
pid_key_t latency_key = { .pid = pid, .slot = latency_slot };
// Increment bucket key
direct_reclaim_latency.increment(latency_key);
// Increment sum key
latency_key.slot = max_latency_slot + 1;
direct_reclaim_latency.increment(latency_key, latency_us);
start.delete(&id);
return 0;
}