From 1cc28eb9ca06ae6834a779ee889f8b9c3ab8c2b9 Mon Sep 17 00:00:00 2001 From: Peter Chou Date: Fri, 24 Jun 2016 16:21:05 -0700 Subject: [PATCH 1/6] TS-4475: Modified the event handlers in LogCollationClientSM.cc to handle and ignore the VC_EVENT_INACTIVITY_TIMEOUT event. Previously, this event was un-handled and would cause a core dump. (cherry picked from commit 0aefbe5349432e44031401ca6a60535a8516adcb) --- proxy/logging/LogCollationClientSM.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/proxy/logging/LogCollationClientSM.cc b/proxy/logging/LogCollationClientSM.cc index 0d459e8612a..3eb7ca083f3 100644 --- a/proxy/logging/LogCollationClientSM.cc +++ b/proxy/logging/LogCollationClientSM.cc @@ -266,6 +266,9 @@ LogCollationClientSM::client_auth(int event, VIO * /* vio ATS_UNUSED */) return client_send(LOG_COLL_EVENT_SWITCH, NULL); + case VC_EVENT_INACTIVITY_TIMEOUT: + Debug("log-coll", "[%d]client::client_auth - Ignoring VC_EVENT_INACTIVITY_TIMEOUT", m_id); + return EVENT_CONT; case VC_EVENT_EOS: case VC_EVENT_ERROR: { Debug("log-coll", "[%d]client::client_auth - EOS|ERROR", m_id); @@ -449,6 +452,9 @@ LogCollationClientSM::client_idle(int event, void * /* data ATS_UNUSED */) m_client_state = LOG_COLL_CLIENT_IDLE; return EVENT_CONT; + case VC_EVENT_INACTIVITY_TIMEOUT: + Debug("log-coll", "[%d]client::client_idle - Ignoring VC_EVENT_INACTIVITY_TIMEOUT", m_id); + return EVENT_CONT; case VC_EVENT_EOS: case VC_EVENT_ERROR: Debug("log-coll", "[%d]client::client_idle - EOS|ERROR", m_id); @@ -663,6 +669,9 @@ LogCollationClientSM::client_send(int event, VIO * /* vio ATS_UNUSED */) // switch back to client_send return client_send(LOG_COLL_EVENT_SWITCH, NULL); + case VC_EVENT_INACTIVITY_TIMEOUT: + Debug("log-coll", "[%d]client::client_send - Ignoring VC_EVENT_INACTIVITY_TIMEOUT", m_id); + return EVENT_CONT; case VC_EVENT_EOS: case VC_EVENT_ERROR: { Debug("log-coll", "[%d]client::client_send - EOS|ERROR", m_id); From c4344440715e72fc87fa5c3ad4b788317f013e80 Mon Sep 17 00:00:00 2001 From: Peter Chou Date: Wed, 3 Aug 2016 12:20:45 -0700 Subject: [PATCH 2/6] TS-4475 : Handle VC_EVENT_INACTIVITY_TIMEOUT in LogCollationClientSM.cc Based on suggestions from Oknet Xu, we now close the client connection on the timeout event instead of ignoring it. Also, we set the net_vc associated with the client connection to have a timeout of one day or 86400 seconds instead of the default value. (cherry picked from commit efd3582b92d825649216705521dbe92784a7279b) --- proxy/logging/LogCollationClientSM.cc | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/proxy/logging/LogCollationClientSM.cc b/proxy/logging/LogCollationClientSM.cc index 3eb7ca083f3..564b0c6b348 100644 --- a/proxy/logging/LogCollationClientSM.cc +++ b/proxy/logging/LogCollationClientSM.cc @@ -267,8 +267,8 @@ LogCollationClientSM::client_auth(int event, VIO * /* vio ATS_UNUSED */) return client_send(LOG_COLL_EVENT_SWITCH, NULL); case VC_EVENT_INACTIVITY_TIMEOUT: - Debug("log-coll", "[%d]client::client_auth - Ignoring VC_EVENT_INACTIVITY_TIMEOUT", m_id); - return EVENT_CONT; + Debug("log-coll", "[%d]client::client_auth - closing on VC_EVENT_INACTIVITY_TIMEOUT", m_id); + return client_fail(LOG_COLL_EVENT_SWITCH, NULL); case VC_EVENT_EOS: case VC_EVENT_ERROR: { Debug("log-coll", "[%d]client::client_auth - EOS|ERROR", m_id); @@ -453,8 +453,8 @@ LogCollationClientSM::client_idle(int event, void * /* data ATS_UNUSED */) return EVENT_CONT; case VC_EVENT_INACTIVITY_TIMEOUT: - Debug("log-coll", "[%d]client::client_idle - Ignoring VC_EVENT_INACTIVITY_TIMEOUT", m_id); - return EVENT_CONT; + Debug("log-coll", "[%d]client::client_idle - closing on VC_EVENT_INACTIVITY_TIMEOUT", m_id); + return client_fail(LOG_COLL_EVENT_SWITCH, NULL); case VC_EVENT_EOS: case VC_EVENT_ERROR: Debug("log-coll", "[%d]client::client_idle - EOS|ERROR", m_id); @@ -553,6 +553,9 @@ LogCollationClientSM::client_open(int event, NetVConnection *net_vc) ink_assert(net_vc != NULL); m_host_vc = net_vc; + // assign a non-default-inactivity-timeout + net_vc->set_inactivity_timeout(HRTIME_SECONDS(86400)); + // setup a client reader just for detecting a host disconnnect // (iocore should call back this function with and EOS/ERROR) m_abort_vio = m_host_vc->do_io_read(this, 1, m_abort_buffer); @@ -670,8 +673,8 @@ LogCollationClientSM::client_send(int event, VIO * /* vio ATS_UNUSED */) return client_send(LOG_COLL_EVENT_SWITCH, NULL); case VC_EVENT_INACTIVITY_TIMEOUT: - Debug("log-coll", "[%d]client::client_send - Ignoring VC_EVENT_INACTIVITY_TIMEOUT", m_id); - return EVENT_CONT; + Debug("log-coll", "[%d]client::client_send - closing on VC_EVENT_INACTIVITY_TIMEOUT", m_id); + return client_fail(LOG_COLL_EVENT_SWITCH, NULL); case VC_EVENT_EOS: case VC_EVENT_ERROR: { Debug("log-coll", "[%d]client::client_send - EOS|ERROR", m_id); From 569407f970489f3d16b835cc7f990d902e946300 Mon Sep 17 00:00:00 2001 From: Peter Chou Date: Fri, 5 Aug 2016 16:35:09 -0700 Subject: [PATCH 3/6] TS-4475 : Handle VC_EVENT_INACTIVITY_TIMEOUT in LogCollationClientSM.cc Based on suggestions from Susan Hinrich, we now also handle the VC_EVENT_ACTIVE_TIMEOUT event. In addition, we also perform the same clean-up activities performed for the EOS and ERROR events in addition to closing the client connection. (cherry picked from commit 483412986c5875ed2451b72328e8548dfc7ef207) --- proxy/logging/LogCollationClientSM.cc | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/proxy/logging/LogCollationClientSM.cc b/proxy/logging/LogCollationClientSM.cc index 564b0c6b348..3b743c37333 100644 --- a/proxy/logging/LogCollationClientSM.cc +++ b/proxy/logging/LogCollationClientSM.cc @@ -266,12 +266,11 @@ LogCollationClientSM::client_auth(int event, VIO * /* vio ATS_UNUSED */) return client_send(LOG_COLL_EVENT_SWITCH, NULL); + case VC_EVENT_ACTIVE_TIMEOUT: case VC_EVENT_INACTIVITY_TIMEOUT: - Debug("log-coll", "[%d]client::client_auth - closing on VC_EVENT_INACTIVITY_TIMEOUT", m_id); - return client_fail(LOG_COLL_EVENT_SWITCH, NULL); case VC_EVENT_EOS: case VC_EVENT_ERROR: { - Debug("log-coll", "[%d]client::client_auth - EOS|ERROR", m_id); + Debug("log-coll", "[%d]client::client_auth - TIMEOUT|EOS|ERROR", m_id); int64_t read_avail = m_auth_reader->read_avail(); if (read_avail > 0) { @@ -452,12 +451,11 @@ LogCollationClientSM::client_idle(int event, void * /* data ATS_UNUSED */) m_client_state = LOG_COLL_CLIENT_IDLE; return EVENT_CONT; + case VC_EVENT_ACTIVE_TIMEOUT: case VC_EVENT_INACTIVITY_TIMEOUT: - Debug("log-coll", "[%d]client::client_idle - closing on VC_EVENT_INACTIVITY_TIMEOUT", m_id); - return client_fail(LOG_COLL_EVENT_SWITCH, NULL); case VC_EVENT_EOS: case VC_EVENT_ERROR: - Debug("log-coll", "[%d]client::client_idle - EOS|ERROR", m_id); + Debug("log-coll", "[%d]client::client_idle - TIMEOUT|EOS|ERROR", m_id); return client_fail(LOG_COLL_EVENT_SWITCH, NULL); default: @@ -672,12 +670,11 @@ LogCollationClientSM::client_send(int event, VIO * /* vio ATS_UNUSED */) // switch back to client_send return client_send(LOG_COLL_EVENT_SWITCH, NULL); + case VC_EVENT_ACTIVE_TIMEOUT: case VC_EVENT_INACTIVITY_TIMEOUT: - Debug("log-coll", "[%d]client::client_send - closing on VC_EVENT_INACTIVITY_TIMEOUT", m_id); - return client_fail(LOG_COLL_EVENT_SWITCH, NULL); case VC_EVENT_EOS: case VC_EVENT_ERROR: { - Debug("log-coll", "[%d]client::client_send - EOS|ERROR", m_id); + Debug("log-coll", "[%d]client::client_send - TIMEOUT|EOS|ERROR", m_id); int64_t read_avail = m_send_reader->read_avail(); if (read_avail > 0) { From 6f3795631011da4232ebe99c94206168318a9b47 Mon Sep 17 00:00:00 2001 From: Peter Chou Date: Tue, 9 Aug 2016 16:52:47 -0700 Subject: [PATCH 4/6] TS-4475: Fix un-handled event crash in LogCollationClientSM.cc I modified the comment and used the post-assignment pointer (instead of the pre-assignment pointer) to implement an explicit inactivity time-out value for Log Collation client net-vcs so that they do not use the default defined in the config. (cherry picked from commit cb2fa3c5159f17dc5a8e75bfaa4e3cacd8251071) --- proxy/logging/LogCollationClientSM.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/proxy/logging/LogCollationClientSM.cc b/proxy/logging/LogCollationClientSM.cc index 3b743c37333..abbf8bb1cba 100644 --- a/proxy/logging/LogCollationClientSM.cc +++ b/proxy/logging/LogCollationClientSM.cc @@ -551,8 +551,8 @@ LogCollationClientSM::client_open(int event, NetVConnection *net_vc) ink_assert(net_vc != NULL); m_host_vc = net_vc; - // assign a non-default-inactivity-timeout - net_vc->set_inactivity_timeout(HRTIME_SECONDS(86400)); + // assign an explicit inactivity timeout so that it will not get the default value later + m_host_vc->set_inactivity_timeout(HRTIME_SECONDS(86400)); // setup a client reader just for detecting a host disconnnect // (iocore should call back this function with and EOS/ERROR) From a23a620c53c50e1f15e10cbdc3057877697a59ad Mon Sep 17 00:00:00 2001 From: Peter Chou Date: Mon, 22 Aug 2016 12:56:18 -0700 Subject: [PATCH 5/6] TS-4475: Fix un-handled time-out event crash in Log Collation 1. For collation hosts and clients, handle VC_EVENT_ACTIVE_TIMEOUT and VC_EVENT_INACTIVITY_TIMEOUT events and treat them the same as other error conditions (EOS and ERROR) including performing any clean-up operations. 2. Added two configuration options for log collation VC time-outs -- - proxy.config.log.collation_host_timeout INT 86390 - proxy.config.log.collation_client_timeout INT 86400 These options control how long before the host and client ends of the log collation net-vc will time-out after being inactive. The host end defaults to 10 fewer seconds to avoid any race condition. If the host disconnects first, the client will re-establish with a fresh connection. (cherry picked from commit b6ed91805d9769394131b577045fb6331a25f5c8) --- doc/admin-guide/files/records.config.en.rst | 8 ++++++++ mgmt/RecordsConfig.cc | 4 ++++ proxy/logging/LogCollationClientSM.cc | 7 ++++++- proxy/logging/LogCollationHostSM.cc | 16 ++++++++++++++-- 4 files changed, 32 insertions(+), 3 deletions(-) diff --git a/doc/admin-guide/files/records.config.en.rst b/doc/admin-guide/files/records.config.en.rst index 58e57c65c24..d40ca1ad0a7 100644 --- a/doc/admin-guide/files/records.config.en.rst +++ b/doc/admin-guide/files/records.config.en.rst @@ -2427,6 +2427,14 @@ refer to :ref:`admin-monitoring-logging-formats` and :file:`logs_xml.config`. The number of seconds between collation server connection retries. +.. ts:cv:: CONFIG proxy.config.log.collation_host_timeout INT 86390 + + The number of seconds before active or inactivity time-out events for the server side. + +.. ts:cv:: CONFIG proxy.config.log.collation_client_timeout INT 86400 + + The number of seconds before active or inactivity time-out events for the client side. + .. ts:cv:: CONFIG proxy.config.log.rolling_enabled INT 1 :reloadable: diff --git a/mgmt/RecordsConfig.cc b/mgmt/RecordsConfig.cc index 221d0cae60b..0d56c326442 100644 --- a/mgmt/RecordsConfig.cc +++ b/mgmt/RecordsConfig.cc @@ -1172,6 +1172,10 @@ static const RecordElement RecordsConfig[] = , {RECT_CONFIG, "proxy.config.log.collation_preproc_threads", RECD_INT, "1", RECU_DYNAMIC, RR_REQUIRED, RECC_INT, "[1-128]", RECA_NULL} , + {RECT_CONFIG, "proxy.config.log.collation_host_timeout", RECD_INT, "86390", RECU_NULL, RR_NULL, RECC_NULL, NULL, RECA_NULL} + , + {RECT_CONFIG, "proxy.config.log.collation_client_timeout", RECD_INT, "86400", RECU_NULL, RR_NULL, RECC_NULL, NULL, RECA_NULL} + , {RECT_CONFIG, "proxy.config.log.rolling_enabled", RECD_INT, "1", RECU_DYNAMIC, RR_NULL, RECC_INT, "[0-4]", RECA_NULL} , {RECT_CONFIG, "proxy.config.log.rolling_interval_sec", RECD_INT, "86400", RECU_DYNAMIC, RR_NULL, RECC_STR, "^[0-9]+$", RECA_NULL} diff --git a/proxy/logging/LogCollationClientSM.cc b/proxy/logging/LogCollationClientSM.cc index abbf8bb1cba..8f46e9e3e7a 100644 --- a/proxy/logging/LogCollationClientSM.cc +++ b/proxy/logging/LogCollationClientSM.cc @@ -520,6 +520,8 @@ LogCollationClientSM::client_init(int event, void * /* data ATS_UNUSED */) int LogCollationClientSM::client_open(int event, NetVConnection *net_vc) { + RecInt rec_timeout; + int timeout = 86400; ip_port_text_buffer ipb; Debug("log-coll", "[%d]client::client_open", m_id); @@ -552,7 +554,10 @@ LogCollationClientSM::client_open(int event, NetVConnection *net_vc) m_host_vc = net_vc; // assign an explicit inactivity timeout so that it will not get the default value later - m_host_vc->set_inactivity_timeout(HRTIME_SECONDS(86400)); + if (RecGetRecordInt("proxy.config.log.collation_client_timeout", &rec_timeout) == REC_ERR_OKAY) { + timeout = rec_timeout; + } + m_host_vc->set_inactivity_timeout(HRTIME_SECONDS(timeout)); // setup a client reader just for detecting a host disconnnect // (iocore should call back this function with and EOS/ERROR) diff --git a/proxy/logging/LogCollationHostSM.cc b/proxy/logging/LogCollationHostSM.cc index e081f8c851b..916314c5fee 100644 --- a/proxy/logging/LogCollationHostSM.cc +++ b/proxy/logging/LogCollationHostSM.cc @@ -74,10 +74,18 @@ LogCollationHostSM::LogCollationHostSM(NetVConnection *client_vc) m_client_port(0), m_id(ID++) { + RecInt rec_timeout; + int timeout = 86390; Debug("log-coll", "[%d]host::constructor", m_id); ink_assert(m_client_vc != NULL); + // assign an explicit inactivity timeout so that it will not get the default value later + if (RecGetRecordInt("proxy.config.log.collation_host_timeout", &rec_timeout) == REC_ERR_OKAY) { + timeout = rec_timeout; + } + m_client_vc->set_inactivity_timeout(HRTIME_SECONDS(timeout)); + // get client info m_client_ip = m_client_vc->get_remote_ip(); m_client_port = m_client_vc->get_remote_port(); @@ -412,9 +420,11 @@ LogCollationHostSM::read_hdr(int event, VIO *vio) ink_assert(m_read_bytes_wanted == m_read_bytes_received); return read_body(LOG_COLL_EVENT_SWITCH, NULL); + case VC_EVENT_ACTIVE_TIMEOUT: + case VC_EVENT_INACTIVITY_TIMEOUT: case VC_EVENT_EOS: case VC_EVENT_ERROR: - Debug("log-coll", "[%d]host::read_hdr - EOS|ERROR", m_id); + Debug("log-coll", "[%d]host::read_hdr - TIMEOUT|EOS|ERROR", m_id); return read_done(LOG_COLL_EVENT_ERROR, NULL); default: @@ -470,9 +480,11 @@ LogCollationHostSM::read_body(int event, VIO *vio) ink_assert(m_read_bytes_wanted == m_read_bytes_received); return read_done(LOG_COLL_EVENT_READ_COMPLETE, NULL); + case VC_EVENT_ACTIVE_TIMEOUT: + case VC_EVENT_INACTIVITY_TIMEOUT: case VC_EVENT_EOS: case VC_EVENT_ERROR: - Debug("log-coll", "[%d]host::read_body - EOS|ERROR", m_id); + Debug("log-coll", "[%d]host::read_body - TIMEOUT|EOS|ERROR", m_id); return read_done(LOG_COLL_EVENT_ERROR, NULL); default: From fb4ad2cdc92f6023b7cf61b8836a3535f3e41a0b Mon Sep 17 00:00:00 2001 From: Peter Chou Date: Thu, 1 Sep 2016 12:06:24 -0700 Subject: [PATCH 6/6] TS-4475: Fix un-handled time-out event crash in Log Collation Updated the documenation to explain the difference in host/client default time-outs. (cherry picked from commit 4c03b08851eb55ee92537029fbc99cf760542ed6) --- doc/admin-guide/files/records.config.en.rst | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/doc/admin-guide/files/records.config.en.rst b/doc/admin-guide/files/records.config.en.rst index d40ca1ad0a7..f1aa73ac5ec 100644 --- a/doc/admin-guide/files/records.config.en.rst +++ b/doc/admin-guide/files/records.config.en.rst @@ -2429,11 +2429,21 @@ refer to :ref:`admin-monitoring-logging-formats` and :file:`logs_xml.config`. .. ts:cv:: CONFIG proxy.config.log.collation_host_timeout INT 86390 - The number of seconds before active or inactivity time-out events for the server side. + The number of seconds before inactivity time-out events for the host side. + This setting over-rides the default set with proxy.config.net.default_inactivity_timeout + for log collation connections. + + The default is set for 10s less on the host side to help prevent any possible race + conditions. If the host disconnects first, the client will see the disconnect + before its own time-out and re-connect automatically. If the client does not see + the disconnect, i.e., connection is "locked-up" for some reason, it will disconnect + when it reaches its own time-out and then re-connect automatically. .. ts:cv:: CONFIG proxy.config.log.collation_client_timeout INT 86400 - The number of seconds before active or inactivity time-out events for the client side. + The number of seconds before inactivity time-out events for the client side. + This setting over-rides the default set with proxy.config.net.default_inactivity_timeout + for log collation connections. .. ts:cv:: CONFIG proxy.config.log.rolling_enabled INT 1 :reloadable: