From 97eb4eaf077a475625c517adcde74e4596305365 Mon Sep 17 00:00:00 2001 From: Alexey Andriyanov Date: Thu, 15 Jan 2015 14:14:36 +0400 Subject: [PATCH] ipvs: fix problems with config reload The commit 7bf6fc contained a bad trying to fix the issue when an alive RS does not appear in a new VSG entry on reload. It has not fixed the original issue and added a new one: vs_groups lose quorum on config reload. This commit fixes the issue properly, and also the case when RS in VSG is in inhibit mode. The reloaded flag is added to the virtual_server_group_entry_t. ipvs_group_sync_entry: add alive destinations to the newly created vsge. It is aware of inhibit-on-failure destinations. sync_service_vsg: calls the former for each created vsg entry vsge_exist: changed just as other *_exist routines. --- keepalived/check/ipvswrapper.c | 101 +++++++++++++++++++++++++++++++ keepalived/check/ipwrapper.c | 72 +++++++++++++++------- keepalived/include/check_data.h | 1 + keepalived/include/ipvswrapper.h | 1 + 4 files changed, 154 insertions(+), 21 deletions(-) diff --git a/keepalived/check/ipvswrapper.c b/keepalived/check/ipvswrapper.c index ca1a4792b..9415556d5 100644 --- a/keepalived/check/ipvswrapper.c +++ b/keepalived/check/ipvswrapper.c @@ -273,6 +273,51 @@ ipvs_cmd(int cmd, virtual_server_t * vs, real_server_t * rs) return err; } + +/* add alive destinations to the newly created vsge */ +int +ipvs_group_sync_entry(virtual_server_t *vs, virtual_server_group_entry_t *vsge) +{ + real_server_t *rs; + int err = 0; + element e; + list l = vs->rs; + + /* Clean target rules */ + memset(urule, 0, sizeof (struct ip_vs_rule_user)); + + /* Process realserver queue */ + for (e = LIST_HEAD(l); e; ELEMENT_NEXT(e)) { + rs = ELEMENT_DATA(e); + + if (rs->reloaded && (rs->alive || rs->inhibit && rs->set)) { + /* Prepare the IPVS rule */ + if (urule->daddr) { + /* Setting IPVS rule with vs root rs */ + ipvs_set_rule(IP_VS_SO_SET_ADDDEST, vs, rs); + } else { + urule->daddr = inet_sockaddrip4(&rs->addr); + urule->dport = inet_sockaddrport(&rs->addr); + } + urule->weight = rs->inhibit && ! rs->alive ? 0: rs->weight; + + /* Set vs rule */ + if (vsge->range) { + ipvs_group_range_cmd(IP_VS_SO_SET_ADDDEST, vsge); + } else { + urule->vfwmark = vsge->vfwmark; + urule->vaddr = inet_sockaddrip4(&vsge->addr); + urule->vport = inet_sockaddrport(&vsge->addr); + + /* Talk to the IPVS channel */ + err = ipvs_talk(IP_VS_SO_SET_ADDDEST); + } + } + } + + return IPVS_SUCCESS; +} + /* Remove a specific vs group entry */ int ipvs_group_remove_entry(virtual_server_t *vs, virtual_server_group_entry_t *vsge) @@ -319,6 +364,7 @@ ipvs_group_remove_entry(virtual_server_t *vs, virtual_server_group_entry_t *vsge err = ipvs_group_range_cmd(IP_VS_SO_SET_DEL, vsge); else err = ipvs_talk(IP_VS_SO_SET_DEL); + UNSET_ALIVE(vsge); return err; } @@ -619,6 +665,60 @@ ipvs_cmd(int cmd, virtual_server_t * vs, real_server_t * rs) return IPVS_SUCCESS; } +/* add alive destinations to the newly created vsge */ +int +ipvs_group_sync_entry(virtual_server_t *vs, virtual_server_group_entry_t *vsge) +{ + real_server_t *rs; + element e; + list l = vs->rs; + + /* Clean target rules */ + memset(srule, 0, sizeof(ipvs_service_t)); + memset(drule, 0, sizeof(ipvs_dest_t)); + + /* Process realserver queue */ + for (e = LIST_HEAD(l); e; ELEMENT_NEXT(e)) { + rs = ELEMENT_DATA(e); + + if (rs->reloaded && (rs->alive || (rs->inhibit && rs->set))) { + /* Prepare the IPVS rule */ + if (!drule->addr.ip) { + /* Setting IPVS rule with vs root rs */ + ipvs_set_rule(IP_VS_SO_SET_ADDDEST, vs, rs); + } else { + drule->af = rs->addr.ss_family; + if (rs->addr.ss_family == AF_INET6) + inet_sockaddrip6(&rs->addr, &drule->addr.in6); + else + drule->addr.ip = inet_sockaddrip4(&rs->addr); + drule->port = inet_sockaddrport(&rs->addr); + } + drule->weight = rs->inhibit && ! rs->alive ? 0: rs->weight; + + /* Set vs rule */ + if (vsge->range) { + ipvs_group_range_cmd(IP_VS_SO_SET_ADDDEST, vsge); + } else { + srule->af = vsge->addr.ss_family; + if (vsge->addr.ss_family == AF_INET6) + inet_sockaddrip6(&vsge->addr, &srule->addr.in6); + else + srule->addr.ip = inet_sockaddrip4(&vsge->addr); + srule->port = inet_sockaddrport(&vsge->addr); + srule->fwmark = vsge->vfwmark; + drule->u_threshold = rs->u_threshold; + drule->l_threshold = rs->l_threshold; + + /* Talk to the IPVS channel */ + ipvs_talk(IP_VS_SO_SET_ADDDEST); + } + } + } + + return IPVS_SUCCESS; +} + /* Remove a specific vs group entry */ int ipvs_group_remove_entry(virtual_server_t *vs, virtual_server_group_entry_t *vsge) @@ -675,6 +775,7 @@ ipvs_group_remove_entry(virtual_server_t *vs, virtual_server_group_entry_t *vsge ipvs_group_range_cmd(IP_VS_SO_SET_DEL, vsge); else ipvs_talk(IP_VS_SO_SET_DEL); + UNSET_ALIVE(vsge); return IPVS_SUCCESS; } diff --git a/keepalived/check/ipwrapper.c b/keepalived/check/ipwrapper.c index 4562114cf..7d26c5988 100644 --- a/keepalived/check/ipwrapper.c +++ b/keepalived/check/ipwrapper.c @@ -159,13 +159,8 @@ init_service_rs(virtual_server_t * vs) for (e = LIST_HEAD(vs->rs); e; ELEMENT_NEXT(e)) { rs = ELEMENT_DATA(e); /* Do not re-add failed RS instantly on reload */ - if (rs->reloaded) { - /* force re-adding of the rs into vs_group: - * we may have new vsg entries */ - if (vs->vsgname) - UNSET_ALIVE(rs); + if (rs->reloaded) continue; - } /* In alpha mode, be pessimistic (or realistic?) and don't * add real servers into the VS pool. They will get there * later upon healthchecks recovery (if ever). @@ -180,6 +175,38 @@ init_service_rs(virtual_server_t * vs) return 1; } +static void +sync_service_vsg(virtual_server_t * vs) +{ + virtual_server_group_t *vsg; + virtual_server_group_entry_t *vsge; + list *l; + element e; + + vsg = vs->vsg; + list ll[] = { + vsg->addr_ip, + vsg->vfwmark, + vsg->range, + NULL, + }; + + for (l = ll; *l; l++) + for (e = LIST_HEAD(*l); e; ELEMENT_NEXT(e)) { + vsge = ELEMENT_DATA(e); + if (vs->reloaded && !vsge->reloaded) { + log_message(LOG_INFO, "VS [%s:%d:%u] added into group %s" + , inet_sockaddrtopair(&vsge->addr) + , vsge->range + , vsge->vfwmark + , vs->vsgname); + /* add all reloaded and alive/inhibit-set dests + * to the newly created vsg item */ + ipvs_group_sync_entry(vs, vsge); + } + } +} + /* Set a virtualserver IPVS rules */ static int init_service_vs(virtual_server_t * vs) @@ -196,9 +223,14 @@ init_service_vs(virtual_server_t * vs) if (!init_service_rs(vs)) return 0; - /* if the service was reloaded, we may have got/lost quorum due to quorum setting changed */ - if (vs->reloaded) + if (vs->reloaded) { + if (vs->vsgname) + /* add reloaded dests into new vsg entries */ + sync_service_vsg(vs); + + /* we may have got/lost quorum due to quorum setting changed */ update_quorum_state(vs); + } return 1; } @@ -474,7 +506,7 @@ update_svr_checker_state(int alive, checker_id_t cid, virtual_server_t *vs, real } /* Check if a vsg entry is in new data */ -static int +static virtual_server_group_entry_t * vsge_exist(virtual_server_group_entry_t *vsg_entry, list l) { element e; @@ -482,30 +514,28 @@ vsge_exist(virtual_server_group_entry_t *vsg_entry, list l) for (e = LIST_HEAD(l); e; ELEMENT_NEXT(e)) { vsge = ELEMENT_DATA(e); - if (VSGE_ISEQ(vsg_entry, vsge)) { - /* - * If vsge exist this entry - * is alive since only rs entries - * are changing from alive state. - */ - SET_ALIVE(vsge); - return 1; - } + if (VSGE_ISEQ(vsg_entry, vsge)) + return vsge; } - return 0; + return NULL; } /* Clear the diff vsge of old group */ static int clear_diff_vsge(list old, list new, virtual_server_t * old_vs) { - virtual_server_group_entry_t *vsge; + virtual_server_group_entry_t *vsge, *new_vsge; element e; for (e = LIST_HEAD(old); e; ELEMENT_NEXT(e)) { vsge = ELEMENT_DATA(e); - if (!vsge_exist(vsge, new)) { + new_vsge = vsge_exist(vsge, new); + if (new_vsge) { + new_vsge->alive = vsge->alive; + new_vsge->reloaded = 1; + } + else { log_message(LOG_INFO, "VS [%s:%d:%u] in group %s no longer exist" , inet_sockaddrtopair(&vsge->addr) , vsge->range diff --git a/keepalived/include/check_data.h b/keepalived/include/check_data.h index dbedb0f29..c795f6768 100644 --- a/keepalived/include/check_data.h +++ b/keepalived/include/check_data.h @@ -99,6 +99,7 @@ typedef struct _virtual_server_group_entry { uint8_t range; uint32_t vfwmark; int alive; + int reloaded; } virtual_server_group_entry_t; typedef struct _virtual_server_group { diff --git a/keepalived/include/ipvswrapper.h b/keepalived/include/ipvswrapper.h index 2e795929f..f69038a73 100644 --- a/keepalived/include/ipvswrapper.h +++ b/keepalived/include/ipvswrapper.h @@ -92,6 +92,7 @@ do { \ extern int ipvs_start(void); extern void ipvs_stop(void); extern virtual_server_group_t *ipvs_get_group_by_name(char *, list); +extern int ipvs_group_sync_entry(virtual_server_t *vs, virtual_server_group_entry_t *vsge); extern int ipvs_group_remove_entry(virtual_server_t *, virtual_server_group_entry_t *); extern int ipvs_cmd(int, virtual_server_t *, real_server_t *); extern int ipvs_syncd_cmd(int, char *, int, int);