Skip to content

Commit

Permalink
cassandra: Fallback consistency fix - it wasn't used permanently
Browse files Browse the repository at this point in the history
Because the "do I want to retry using primary consistency?" check was done
using the first failure timestamp, after 60 seconds that check always
returned TRUE. We should instead be checking the last timestamp for a
query that was sent with the primary consistency.
  • Loading branch information
sirainen committed Mar 27, 2017
1 parent a8a5f2f commit 2a24f35
Showing 1 changed file with 18 additions and 4 deletions.
22 changes: 18 additions & 4 deletions src/lib-sql/driver-cassandra.c
Expand Up @@ -102,7 +102,7 @@ struct cassandra_db {
struct timeout *to_metrics;
uint64_t counters[CASSANDRA_COUNTER_COUNT];

struct timeval first_fallback_sent[CASSANDRA_QUERY_TYPE_COUNT];
struct timeval primary_query_last_sent[CASSANDRA_QUERY_TYPE_COUNT];
time_t last_fallback_warning[CASSANDRA_QUERY_TYPE_COUNT];
unsigned int fallback_failures[CASSANDRA_QUERY_TYPE_COUNT];

Expand Down Expand Up @@ -782,8 +782,7 @@ static void query_resend_with_fallback(struct cassandra_result *result)
db->last_fallback_warning[result->query_type] = ioloop_time;
}
i_free_and_null(result->error);
if (db->fallback_failures[result->query_type]++ == 0)
db->first_fallback_sent[result->query_type] = ioloop_timeval;
db->fallback_failures[result->query_type]++;

result->consistency = result->fallback_consistency;
driver_cassandra_result_send_query(result);
Expand Down Expand Up @@ -899,14 +898,27 @@ driver_cassandra_want_fallback_query(struct cassandra_result *result)

if (failure_count == 0)
return FALSE;
tv = db->first_fallback_sent[result->query_type];
/* double the retries every time. */
for (i = 1; i < failure_count; i++) {
msecs *= 2;
if (msecs >= CASSANDRA_FALLBACK_MAX_RETRY_MSECS) {
msecs = CASSANDRA_FALLBACK_MAX_RETRY_MSECS;
break;
}
}
/* If last primary query sent timestamp + msecs is older than current
time, we need to retry the primary query. Note that this practically
prevents multiple primary queries from being attempted
simultaneously, because the caller updates primary_query_last_sent
immediately when returning.
The only time when multiple primary queries can be running in
parallel is when the earlier query is being slow and hasn't finished
early enough. This could even be a wanted feature, since while the
first query might have to wait for a timeout, Cassandra could have
been fixed in the meantime and the second query finishes
successfully. */
tv = db->primary_query_last_sent[result->query_type];
timeval_add_msecs(&tv, msecs);
return timeval_cmp(&ioloop_timeval, &tv) < 0;
}
Expand Down Expand Up @@ -943,6 +955,8 @@ static int driver_cassandra_send_query(struct cassandra_result *result)

if (driver_cassandra_want_fallback_query(result))
result->consistency = result->fallback_consistency;
else
db->primary_query_last_sent[result->query_type] = ioloop_timeval;

driver_cassandra_result_send_query(result);
result->query_sent = TRUE;
Expand Down

0 comments on commit 2a24f35

Please sign in to comment.