Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

UDP header now contains offset to first protocol data

Summary: This implements the "offset to first byte of protocol text"
         feature that should make UDP resynchronization easier.

Reviewed By: marc

Test Plan: Updated the UDP test cases in the built-in test suite.

Revert: OK



git-svn-id: http://svn.facebook.com/svnroot/projects/memcached/trunk@64270 2c7ba8d8-a2f7-0310-a573-de162e16dcc7
  • Loading branch information...
commit a4a44a1069c09231948a20fc7eeb53b500edcb03 1 parent ff6881f
sgrimm authored
Showing with 85 additions and 29 deletions.
  1. +10 −10 binary_sm.c
  2. +10 −1 doc/protocol.txt
  3. +35 −10 memcached.c
  4. +1 −1  memcached.h
  5. +29 −7 t/udp.t
View
20 binary_sm.c
@@ -630,7 +630,7 @@ static void handle_echo_cmd(conn* c)
// nothing special for the echo command to do, so just add ourselves to the
// list of buffers to transmit.
- if (add_iov(c, rep, sizeof(empty_rep_t))) {
+ if (add_iov(c, rep, sizeof(empty_rep_t), true)) {
bp_write_err_msg(c, "couldn't build response");
return;
}
@@ -657,8 +657,8 @@ static void handle_version_cmd(conn* c)
// nothing special for the echo command to do, so just add ourselves to the
// list of buffers to transmit.
- if (add_iov(c, rep, sizeof(string_rep_t)) ||
- add_iov(c, VERSION, sizeof(VERSION) - 1)) {
+ if (add_iov(c, rep, sizeof(string_rep_t), true) ||
+ add_iov(c, VERSION, sizeof(VERSION) - 1, false)) {
bp_write_err_msg(c, "couldn't build response");
return;
}
@@ -726,8 +726,8 @@ static void handle_get_cmd(conn* c)
rep->body_length = htonl((sizeof(*rep) - BINARY_PROTOCOL_REPLY_HEADER_SZ) +
it->nbytes - 2); // chop off the '\r\n'
- if (add_iov(c, rep, sizeof(value_rep_t)) ||
- add_iov(c, ITEM_data(it), it->nbytes - 2)) {
+ if (add_iov(c, rep, sizeof(value_rep_t), true) ||
+ add_iov(c, ITEM_data(it), it->nbytes - 2, false)) {
bp_write_err_msg(c, "couldn't build response");
return;
}
@@ -740,7 +740,7 @@ static void handle_get_cmd(conn* c)
rep->status = mcc_res_notfound;
rep->body_length = htonl((sizeof(*rep) - BINARY_PROTOCOL_REPLY_HEADER_SZ));
- if (add_iov(c, rep, sizeof(value_rep_t))) {
+ if (add_iov(c, rep, sizeof(value_rep_t), true)) {
bp_write_err_msg(c, "couldn't build response");
return;
}
@@ -811,7 +811,7 @@ static void handle_update_cmd(conn* c)
item_remove(c->item);
c->item = NULL;
- if (add_iov(c, rep, sizeof(empty_rep_t))) {
+ if (add_iov(c, rep, sizeof(empty_rep_t), true)) {
bp_write_err_msg(c, "couldn't build response");
return;
}
@@ -875,7 +875,7 @@ static void handle_delete_cmd(conn* c)
rep->status = mcc_res_notfound;
}
- if (add_iov(c, rep, sizeof(empty_rep_t))) {
+ if (add_iov(c, rep, sizeof(empty_rep_t), true)) {
bp_write_err_msg(c, "couldn't build response");
}
@@ -932,7 +932,7 @@ static void handle_arith_cmd(conn* c)
rep->status = mcc_res_notfound;
}
- if (add_iov(c, rep, sizeof(number_rep_t))) {
+ if (add_iov(c, rep, sizeof(number_rep_t), true)) {
bp_write_err_msg(c, "couldn't build response");
}
@@ -992,7 +992,7 @@ static void bp_write_err_msg(conn* c, const char* str)
rep->opaque = 0;
rep->body_length = htonl(strlen(str) + (sizeof(*rep) - BINARY_PROTOCOL_REPLY_HEADER_SZ));
- if (add_iov(c, c->wbuf, sizeof(string_rep_t)) ||
+ if (add_iov(c, c->wbuf, sizeof(string_rep_t), true) ||
(c->udp && build_udp_headers(c))) {
if (settings.verbose > 0) {
fprintf(stderr, "Couldn't build response\n");
View
11 doc/protocol.txt
@@ -452,7 +452,7 @@ in network byte order, high byte first):
0-1 Request ID
2-3 Sequence number
4-5 Total number of datagrams in this message
-6-7 Reserved for future use; must be 0
+6-7 Offset of first response line, or 0 if none
The request ID is supplied by the client. Typically it will be a
monotonically increasing value starting from a random seed, but the client
@@ -468,3 +468,12 @@ datagrams in the message. The client should concatenate the payloads of the
datagrams for a given response in sequence number order; the resulting byte
stream will contain a complete response in the same format as the TCP
protocol (including terminating \r\n sequences).
+
+Since responses can contain a mix of data and memcached protocol keywords,
+the offset in the last two bytes of the header points to the first byte of
+the first line of memcached protocol text in the datagram. This offset is
+inclusive of the header (that is, a value of 8 means there is protocol data
+at the start of the datagram's payload). A value of 0 means the entire
+datagram is full of non-protocol data, e.g., the middle of a large value
+being sent in response to a "get" request. This field may be used by clients
+to recover from dropped packets in the middle of long UDP responses.
View
45 memcached.c
@@ -220,7 +220,7 @@ int add_msghdr(conn *c)
if (c->udp) {
/* Leave room for the UDP header, which we'll fill in later. */
- return add_iov(c, NULL, UDP_HEADER_SIZE);
+ return add_iov(c, NULL, UDP_HEADER_SIZE, false);
}
return 0;
@@ -567,10 +567,13 @@ static int ensure_iov_space(conn *c) {
* Adds data to the list of pending data that will be written out to a
* connection.
*
+ * is_start should be true if this data represents the start of a protocol
+ * response, e.g., a "VALUE" line.
+ *
* Returns 0 on success, -1 on out-of-memory.
*/
-int add_iov(conn *c, const void *buf, int len) {
+int add_iov(conn *c, const void *buf, int len, bool is_start) {
struct msghdr *m;
int leftover;
bool limit_to_mtu;
@@ -608,12 +611,23 @@ int add_iov(conn *c, const void *buf, int len) {
m->msg_iov[m->msg_iovlen].iov_base = (void *)buf;
m->msg_iov[m->msg_iovlen].iov_len = len;
+ /*
+ * If this is the start of a response (e.g., a "VALUE" line),
+ * and it's the first one so far in this message, mark it as
+ * such so we can put its offset in the UDP header.
+ */
+ if (c->udp && is_start && ! m->msg_flags) {
+ m->msg_flags = 1;
+ m->msg_controllen = m->msg_iovlen;
+ }
+
c->msgbytes += len;
c->iovused++;
m->msg_iovlen++;
buf = ((char *)buf) + len;
len = leftover;
+ is_start = false;
} while (leftover > 0);
return 0;
@@ -624,7 +638,7 @@ int add_iov(conn *c, const void *buf, int len) {
* Constructs a set of UDP headers and attaches them to the outgoing messages.
*/
int build_udp_headers(conn *c) {
- int i;
+ int i, j, offset;
unsigned char *hdr;
assert(c != NULL);
@@ -645,14 +659,25 @@ int build_udp_headers(conn *c) {
for (i = 0; i < c->msgused; i++) {
c->msglist[i].msg_iov[0].iov_base = hdr;
c->msglist[i].msg_iov[0].iov_len = UDP_HEADER_SIZE;
+
+ /* Find the offset of the first response line in the message, if any */
+ offset = 0;
+ if (c->msglist[i].msg_flags) {
+ for (j = 0; j < c->msglist[i].msg_controllen; j++) {
+ offset += c->msglist[i].msg_iov[j].iov_len;
+ }
+ c->msglist[i].msg_flags = 0;
+ c->msglist[i].msg_controllen = 0;
+ }
+
*hdr++ = c->request_id / 256;
*hdr++ = c->request_id % 256;
*hdr++ = i / 256;
*hdr++ = i % 256;
*hdr++ = c->msgused / 256;
*hdr++ = c->msgused % 256;
- *hdr++ = 0;
- *hdr++ = 0;
+ *hdr++ = offset / 256;
+ *hdr++ = offset % 256;
assert((void *) hdr == (void *)c->msglist[i].msg_iov[0].iov_base + UDP_HEADER_SIZE);
}
@@ -1100,9 +1125,9 @@ static inline void process_get_command(conn *c, token_t *tokens, size_t ntokens)
* key
* " " + flags + " " + data length + "\r\n" + data (with \r\n)
*/
- if (add_iov(c, "VALUE ", 6) != 0 ||
- add_iov(c, ITEM_key(it), it->nkey) != 0 ||
- add_iov(c, ITEM_suffix(it), it->nsuffix + it->nbytes) != 0)
+ if (add_iov(c, "VALUE ", 6, true) != 0 ||
+ add_iov(c, ITEM_key(it), it->nkey, false) != 0 ||
+ add_iov(c, ITEM_suffix(it), it->nsuffix + it->nbytes, false) != 0)
{
break;
}
@@ -1147,7 +1172,7 @@ static inline void process_get_command(conn *c, token_t *tokens, size_t ntokens)
if (settings.verbose > 1)
fprintf(stderr, ">%d END\n", c->sfd);
- add_iov(c, "END\r\n", 5);
+ add_iov(c, "END\r\n", 5, true);
if (c->udp && build_udp_headers(c) != 0) {
out_string(c, "SERVER_ERROR out of memory");
@@ -2033,7 +2058,7 @@ static void drive_machine(conn *c) {
* list for TCP or a two-entry list for UDP).
*/
if (c->iovused == 0 || (c->udp && c->iovused == 1)) {
- if (add_iov(c, c->wcurr, c->wbytes) != 0 ||
+ if (add_iov(c, c->wcurr, c->wbytes, true) != 0 ||
(c->udp && build_udp_headers(c) != 0)) {
if (settings.verbose > 0)
fprintf(stderr, "Couldn't build response\n");
View
2  memcached.h
@@ -312,7 +312,7 @@ conn *conn_new(const int sfd, const int init_state, const int event_flags, const
void conn_close(conn *c);
void accept_new_conns(const bool do_accept, const bool is_binary);
bool update_event(conn *c, const int new_flags);
-int add_iov(conn *c, const void *buf, int len);
+int add_iov(conn *c, const void *buf, int len, bool is_start);
int add_msghdr(conn *c);
#include "stats.h"
View
36 t/udp.t
@@ -1,7 +1,7 @@
#!/usr/bin/perl
use strict;
-use Test::More tests => 33;
+use Test::More tests => 43;
use FindBin qw($Bin);
use lib "$Bin/lib";
use MemcachedTest;
@@ -39,7 +39,7 @@ ok($res->{0}, "only got seq number 0");
is(hexify(substr($res->{0}, 0, 2)), hexify(pack("n", 404)), "sequence number 404 correct");
is(substr($res->{0}, 8), "END\r\n");
-# test multi-packet response
+# test multi-packet response with a big value
{
my $big = "abcd" x 1024;
my $len = length $big;
@@ -48,9 +48,31 @@ is(substr($res->{0}, 8), "END\r\n");
mem_get_is($sock, "big", $big, "big value matches");
my $res = send_udp_request($usock, 999, "get big\r\n");
is(scalar keys %$res, 3, "three packet response");
- like($res->{0}, qr/VALUE big 0 4096/, "first packet has value line");
- like($res->{2}, qr/\r\nEND\r\n/, "last packet has end");
+ like(substr($res->{0}, 8), qr/^VALUE big 0 4096/, "first packet has value line");
is(hexify(substr($res->{1}, 0, 2)), hexify(pack("n", 999)), "sequence number of middle packet is correct");
+ is(hexify(substr($res->{0}, 6, 2)), "0008", "response offset of first packet points to start of payload");
+ is(hexify(substr($res->{1}, 6, 2)), "0000", "response offset of middle packet is zero since it is all data");
+ my ($resid, $seq, $this_numpkts, $offset) = unpack("nnnn", substr($res->{2}, 0, 8));
+ is(substr($res->{2}, $offset), "END\r\n", "offset of last packet points to END");
+}
+
+# test multi-packet response with several small values, to make sure
+# value-offset field is correct
+{
+ my $big = "abcd" x 100;
+ my $len = length $big;
+ print $sock "set big 0 0 $len\r\n$big\r\n";
+ is(scalar <$sock>, "STORED\r\n", "stored big");
+ mem_get_is($sock, "big", $big, "big value matches");
+ my $multi = " big" x 6;
+ my $res = send_udp_request($usock, 999, "get$multi\r\n");
+ is(scalar keys %$res, 2, "three packet response");
+ like(substr($res->{0}, 8), qr/^VALUE big 0 400/, "first packet has value line");
+ is(hexify(substr($res->{1}, 0, 2)), hexify(pack("n", 999)), "sequence number of middle packet is correct");
+ is(hexify(substr($res->{0}, 6, 2)), "0008", "response offset of first packet points to start of payload");
+ my ($resid, $seq, $this_numpkts, $offset) = unpack("nnnn", substr($res->{1}, 0, 8));
+ like(substr($res->{1}, $offset), qr/VALUE big 0 400/, "offset of middle packet points to VALUE line");
+ is(hexify(substr($res->{1}, 6, 2)), "0124", "response offset of middle packet points to first VALUE line");
}
sub test_single {
@@ -69,7 +91,7 @@ sub test_single {
$sender = $usock->recv($res, 1500, 0);
my $id = pack("n", 45);
- is(hexify(substr($res, 0, 8)), hexify($id) . '0000' . '0001' . '0000', "header is correct");
+ is(hexify(substr($res, 0, 8)), hexify($id) . '0000' . '0001' . '0008', "header is correct");
is(length $res, 36, '');
is(substr($res, 8), "VALUE foo 0 6\r\nfooval\r\nEND\r\n", "payload is as expected");
}
@@ -107,9 +129,9 @@ sub send_udp_request {
my $res;
my $sender = $sock->recv($res, 1500, 0);
- my ($resid, $seq, $this_numpkts, $resv) = unpack("nnnn", substr($res, 0, 8));
+ my ($resid, $seq, $this_numpkts, $offset) = unpack("nnnn", substr($res, 0, 8));
die "Response ID of $resid doesn't match request if of $reqid" unless $resid == $reqid;
- die "Reserved area not zero" unless $resv == 0;
+ die "Offset to first response out of bounds" if ($offset < 8 && $offset != 0) || $offset > length($res);
die "num packets changed midstream!" if defined $numpkts && $this_numpkts != $numpkts;
$numpkts = $this_numpkts;
$ret->{$seq} = $res;
Please sign in to comment.
Something went wrong with that request. Please try again.