Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

trace connections using pinned eBPF map #2057

Merged
merged 9 commits into from Jul 30, 2019
5 changes: 5 additions & 0 deletions include/h2o.h
Expand Up @@ -1189,6 +1189,11 @@ void h2o_accept_setup_memcached_ssl_resumption(h2o_memcached_context_t *ctx, uns
*/
void h2o_accept_setup_redis_ssl_resumption(const char *host, uint16_t port, unsigned expiration, const char *prefix);

/**
* helper to return if the socket is to be traced
*/
int h2o_conn_is_traced(h2o_conn_t *conn);

/**
* returns the protocol version (e.g. "HTTP/1.1", "HTTP/2")
*/
Expand Down
40 changes: 40 additions & 0 deletions include/h2o/ebpf.h
@@ -0,0 +1,40 @@
/*
* Copyright (c) 2019 Fastly Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef h2o__ebpf_h
#define h2o__ebpf_h

typedef struct h2o_ebpf_map_key_t {
struct {
uint8_t ip[16];
uint16_t port;
} source;
struct {
uint8_t ip[16];
uint16_t port;
} destination;
uint8_t family;
uint8_t protocol;
} h2o_ebpf_map_key_t;

#define H2O_EBPF_MAP_PATH "/sys/fs/bpf/h2o_map"

#endif
14 changes: 11 additions & 3 deletions include/h2o/probes.h
Expand Up @@ -31,23 +31,31 @@
#include "picotls.h"
#include "h2o-probes.h"

#define H2O_PROBE_CONN(label, conn, ...) \
do { \
h2o_conn_t *_conn = (conn); \
if (PTLS_UNLIKELY(H2O_H2O_##label##_ENABLED()) && h2o_conn_is_traced(_conn) == 1) { \
H2O_H2O_##label(_conn, __VA_ARGS__); \
} \
} while (0)
kazuho marked this conversation as resolved.
Show resolved Hide resolved

#define H2O_PROBE(label, ...) \
do { \
if (PTLS_UNLIKELY(H2O_H2O_##label##_ENABLED())) { \
H2O_H2O_##label(__VA_ARGS__); \
} \
} while (0)

#define H2O_PROBE_HEXDUMP(s, l) \
({ \
size_t _l = (l); \
ptls_hexdump(alloca(_l * 2 + 1), (s), _l); \
})

#else

#define H2O_PROBE_CONN(label, conn, ...)
#define H2O_PROBE(label, ...)
#define H2O_PROBE_HEXDUMP(s, l)

#endif

#endif
#endif
9 changes: 9 additions & 0 deletions include/h2o/socket.h
Expand Up @@ -108,6 +108,11 @@ struct st_h2o_socket_t {
* total bytes written (above the TLS layer)
*/
size_t bytes_written;
/**
* ternary flag to track if sock is being traced. 0 indicates value unknown, 1 means is traced, -1 is not traced
*/
int _is_traced;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure if this should be a ternary flag.

If I understand correctly, the intent of defining this as a ternary flag is to lazy-load the information from the eBPF map. However, I am not sure if that should be done, because the map is an LRU.

I think we should try to load the information as soon as the server-side of the socket is created, to avoid the risk of the entry corresponding to the created socket evicted from the eBPF map. The positive side effect of making such a change would be that this can then be an ordinary boolean flag, because the "unknown" state becomes unnecessary.

WDYT?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I understand correctly, the intent of defining this as a ternary flag is to lazy-load the information from the eBPF map

Yes exactly - but the LRU relatively small data retention is a good point indeed !

My initial idea behind the lazily-loaded ternary state was to completely isolate the tracing path from the rest of the code. h2o_socket_is_traced is then the only gateway to the tracing code, which is only executed once a probe is attached.

This works if h2o_socket_is_traced isn't called too late after the arrival of the connection, so the relevant information is hopefully still within the map at this time.

I'm not sure what to conclude from here. I implemented the non-ternary-state version in this commit - feel free to tell me what you think, I can cherry-pick it to this branch if you think it's a better setup 👍

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for the non-ternary-state version. Applied.


struct {
void (*cb)(void *data);
void *data;
Expand Down Expand Up @@ -339,6 +344,10 @@ void h2o_ssl_register_alpn_protocols(SSL_CTX *ctx, const h2o_iovec_t *protocols)
* registers the protocol list to be used for NPN
*/
void h2o_ssl_register_npn_protocols(SSL_CTX *ctx, const char *protocols);
/**
* helper to check if socket is to be traced according to eBPF map
*/
int h2o_socket_is_traced(h2o_socket_t *sock);

void h2o_socket__write_pending(h2o_socket_t *sock);
void h2o_socket__write_on_complete(h2o_socket_t *sock, int status);
Expand Down
113 changes: 113 additions & 0 deletions lib/common/socket.c
Expand Up @@ -1470,3 +1470,116 @@ void h2o_sliding_counter_stop(h2o_sliding_counter_t *counter, uint64_t now)
/* recalc average */
counter->average = counter->prev.sum / (sizeof(counter->prev.slots) / sizeof(counter->prev.slots[0]));
}

#if H2O_USE_DTRACE && defined(__linux__)
#include <linux/bpf.h>
#include <linux/unistd.h>
#include "h2o-probes.h"
#include "include/h2o/ebpf.h"
#include <sys/stat.h>

static __thread int tracing_map_fd = -1;
static __thread uint64_t tracing_map_last_attempt = 0;

static void open_tracing_map(h2o_socket_t *sock)
{
// only check every second
uint64_t now = h2o_now(h2o_socket_get_loop(sock));
if (tracing_map_last_attempt - now < 1000)
return;

tracing_map_last_attempt = now;

// check if map exists at path
struct stat s;
if (stat(&H2O_EBPF_MAP_PATH[0], &s) == -1) {
// map path unavailable, cleanup fd if needed and leave
if (tracing_map_fd >= 0) {
close(tracing_map_fd);
tracing_map_fd = -1;
}
return;
}

if (tracing_map_fd >= 0)
return; // map still exists and we have a fd

// map exists, try connect
union bpf_attr attr;
memset(&attr, 0, sizeof(attr));
attr.pathname = (uint64_t)&H2O_EBPF_MAP_PATH[0];
tracing_map_fd = syscall(__NR_bpf, BPF_OBJ_GET, &attr, sizeof(attr));
}

static int lookup_map(const void *key, const void *value)
{
union bpf_attr attr;
memset(&attr, 0, sizeof(attr));
attr.map_fd = tracing_map_fd;
attr.key = (uint64_t)key;
attr.value = (uint64_t)value;
return syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)) == -1 ? -1 : 1; // return 1 if found, -1 otherwise
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the returned value a boolean? Assuming that it is, I think using 1 (true) / 0 (false) would make the code consistent.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The syscall returns 0 if the key is found, -1 otherwise. As the output is going to the ternary state variable _is_traced, we need to shift the output (1 if present within map, -1 otherwise).

I changed this to a 1 if present 0 otherwise setup in the non-ternary-state version discussed below.

}

static inline void set_ebpf_map_key_tuples(struct sockaddr *sa, uint8_t *ip, uint16_t *port)
{
if (sa->sa_family == AF_INET) {
struct sockaddr_in *sin = (void *)sa;
memcpy(ip, &sin->sin_addr, sizeof(sin->sin_addr));
*port = sin->sin_port;
} else if (sa->sa_family == AF_INET6) {
struct sockaddr_in6 *sin = (void *)sa;
memcpy(ip, &sin->sin6_addr, sizeof(sin->sin6_addr));
*port = sin->sin6_port;
}
}

static inline int init_ebpf_map_key(h2o_ebpf_map_key_t *key, h2o_socket_t *sock)
{
struct sockaddr_storage sockname, peername;
unsigned int sock_type, sock_type_len = sizeof(sock_type_len);
memset(key, 0, sizeof(*key));

// fetch sock/peer name and socket type
if (h2o_socket_getsockname(sock, (void *)&sockname) == 0 ||
h2o_socket_getpeername(sock, (void *)&peername) == 0 ||
getsockopt(h2o_socket_get_fd(sock), SOL_SOCKET, SO_TYPE, &sock_type, &sock_type_len) == -1)
return 0;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we might call init_ebpf_map_key function with a UNIX socket as an argument?

Assuming that that could happen, I think we might call getsockopt first, bail out immediately if sock_type is neither IPv4 ar IPv6.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure I understand here - is the idea to early exit if we can't determine the socket type ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FTR, the problem was that we were failing to setup the eBPF key when the socket was unix socket. Fixed in 262f9c9.


set_ebpf_map_key_tuples((void *)&sockname, &key->source.ip[0], &key->source.port);
set_ebpf_map_key_tuples((void *)&peername, &key->destination.ip[0], &key->destination.port);
key->family = sockname.ss_family == AF_INET6 ? 6 : 4;
key->protocol = sock_type;
return 1;
}

int h2o_socket_is_traced(h2o_socket_t *sock)
{
if (sock == NULL)
return 0;

if (sock->_is_traced != 0)
return sock->_is_traced;

// try open map if not opened
open_tracing_map(sock);
if (tracing_map_fd <= 0)
return 1; // map is not connected, fallback accepting probe

// define key/vals - we are only interrested in presence of the key, discard values
h2o_ebpf_map_key_t key;
void *vals = NULL;

// init key - fallback refusing probe if key can't be initialized
if (init_ebpf_map_key(&key, sock) == 0)
return 0;

// lookup map for our key
return sock->_is_traced = lookup_map(&key, &vals);
}
#else
int h2o_socket_is_traced(h2o_socket_t *sock)
{
return 1;
}
#endif
5 changes: 5 additions & 0 deletions lib/core/util.c
Expand Up @@ -944,3 +944,8 @@ void h2o_cleanup_thread(void)
h2o_mem_clear_recycle(&h2o_http2_wbuf_buffer_prototype.allocator);
h2o_mem_clear_recycle(&h2o_socket_buffer_prototype.allocator);
}

int h2o_conn_is_traced(h2o_conn_t *conn)
{
return h2o_socket_is_traced(conn->callbacks->get_socket(conn));
}
pldubouilh marked this conversation as resolved.
Show resolved Hide resolved
6 changes: 4 additions & 2 deletions lib/http1.c
Expand Up @@ -527,8 +527,10 @@ static void handle_incoming_request(struct st_h2o_http1_conn_t *conn)
send_bad_request(conn, "line folding of header fields is not supported");
return;
}
H2O_PROBE(RECEIVE_REQUEST_HEADERS, &conn->super, conn->_req_index, &conn->req.input.method, &conn->req.input.authority,
&conn->req.input.path, conn->req.version, conn->req.headers.entries, conn->req.headers.size);

H2O_PROBE_CONN(RECEIVE_REQUEST_HEADERS, &conn->super, conn->_req_index, &conn->req.input.method, &conn->req.input.authority,
&conn->req.input.path, conn->req.version, conn->req.headers.entries, conn->req.headers.size);

if (entity_body_header_index != -1) {
conn->req.timestamps.request_body_begin_at = h2o_gettimeofday(conn->super.ctx->loop);
if (expect.base != NULL) {
Expand Down