From 9ff031dacdad0d91253b6d746dcd03adb6f6de1e Mon Sep 17 00:00:00 2001 From: penge Date: Sat, 4 Apr 2020 14:31:51 +0000 Subject: [PATCH 1/2] Fix flaky test The problem was caused by not waiting for the asynchronous kill() to finish, before making a request to get connectionPool.size --- test/behavior/resurrect.test.js | 6 ++-- test/behavior/sniff.test.js | 59 ++++++++++++++++++++++----------- test/utils/buildCluster.js | 42 ++++++++++++++--------- 3 files changed, 68 insertions(+), 39 deletions(-) diff --git a/test/behavior/resurrect.test.js b/test/behavior/resurrect.test.js index 18bb4c164..28ba78e87 100644 --- a/test/behavior/resurrect.test.js +++ b/test/behavior/resurrect.test.js @@ -49,8 +49,7 @@ test('Should execute the recurrect API with the ping strategy', t => { }) q.add((q, done) => { - cluster.kill('node0') - setTimeout(done, 100) + cluster.kill('node0', done) }) q.add((q, done) => { @@ -173,8 +172,7 @@ test('Should execute the recurrect API with the optimistic strategy', t => { }) q.add((q, done) => { - cluster.kill('node0') - setTimeout(done, 100) + cluster.kill('node0', done) }) q.add((q, done) => { diff --git a/test/behavior/sniff.test.js b/test/behavior/sniff.test.js index e1d686c4d..28e73687c 100644 --- a/test/behavior/sniff.test.js +++ b/test/behavior/sniff.test.js @@ -111,8 +111,8 @@ test('Should handle hostnames in publish_address', t => { }) }) -test('Sniff interval', { skip: 'Flaky on CI' }, t => { - t.plan(10) +test('Sniff interval', t => { + t.plan(13) buildCluster(({ nodes, shutdown, kill }) => { const client = new Client({ @@ -120,29 +120,50 @@ test('Sniff interval', { skip: 'Flaky on CI' }, t => { sniffInterval: 50 }) - // this event will be triggered by api calls - client.on(events.SNIFF, (err, request) => { + // Should be 1 because SNIFF wasn't run yet + t.strictEqual(client.connectionPool.size, 1) + + // SNIFF is triggered by API calls + // See Transport.js => makeRequest() => getConnection() => sniff() + let run = 0 + let expectedSize + client.on(events.SNIFF, (err, result) => { + run += 1 + if (run > 3) { return } + t.error(err) - const { hosts, reason } = request.meta.sniff - t.strictEqual( - client.connectionPool.size, - hosts.length - ) + const { reason, hosts } = result.meta.sniff t.strictEqual(reason, Transport.sniffReasons.SNIFF_INTERVAL) - }) - t.strictEqual(client.connectionPool.size, 1) - setTimeout(() => client.info(t.error), 60) + // Test assumptions about connectionPool and hosts + t.strictEqual(client.connectionPool.size, expectedSize) + t.strictEqual(hosts.length, expectedSize) - setTimeout(() => { - // let's kill a node - kill('node1') - client.info(t.error) - }, 150) + if (run === 3) { + return // at this point, 'node1' and 'node2' should be killed + } + + // Should kill the node and run SNIFF again + // Should get here 2x, to kill 'node1' and 'node2' + kill(`node${run}`, () => { + setTimeout(() => { + expectedSize = 4 - run // from 4 to 3, later 2 + client.info() + }, 60) // wait > sniffInterval + }) + }) + // SNIFF should be run only when: + // 1) delay is greater than sniffInterval + // 2) delay is greater than time of last sniff + sniffInterval + setTimeout(() => client.info(), 20) + setTimeout(() => client.info(), 30) setTimeout(() => { - t.strictEqual(client.connectionPool.size, 3) - }, 200) + expectedSize = 4 + client.info() + }, 60) // meets 1) and 2) + setTimeout(() => client.info(), 70) + setTimeout(() => client.info(), 80) t.teardown(shutdown) }) diff --git a/test/utils/buildCluster.js b/test/utils/buildCluster.js index dbbaf04ab..629de5009 100644 --- a/test/utils/buildCluster.js +++ b/test/utils/buildCluster.js @@ -55,31 +55,41 @@ function buildCluster (options, callback) { } function shutdown () { - debug(`Shutting down cluster '${clusterId}'`) - Object.keys(nodes).forEach(kill) - } + const nodeIds = Object.keys(nodes) + debug(`Shutting down cluster '${clusterId}' with ${nodeIds.length} nodes`) - function kill (id) { - debug(`Shutting down cluster node '${id}' (cluster id: '${clusterId}')`) - nodes[id].server.stop() - delete nodes[id] - delete sniffResult.nodes[id] + let shut = 0 + nodeIds.forEach(id => kill(id, (shutNodeId) => { + debug(`Shut down node '${shutNodeId}' (cluster '${clusterId}')`) + + shut += 1 + if (shut === nodeIds.length) { + debug(`Shutting down cluster '${clusterId}' DONE!`) + } + })) } - function spawn (id, callback) { - debug(`Spawning cluster node '${id}' (cluster id: '${clusterId}')`) - q.add(bootNode, { id }) - q.add((q, done) => { - callback() - done() + function kill (id, callback) { + if (!nodes[id]) { + callback(null) + return + } + + debug(`Shutting down cluster node '${id}' (cluster id: '${clusterId}')`) + nodes[id].server.stop((err) => { + if (err) { + throw err // Failed to stop + } + delete nodes[id] + delete sniffResult.nodes[id] + callback(id) }) } const cluster = { nodes, shutdown, - kill, - spawn + kill } q.drain(done => { From b070706a3d950303cd6d192ba13aba45887c088a Mon Sep 17 00:00:00 2001 From: penge Date: Sat, 11 Apr 2020 16:43:04 +0000 Subject: [PATCH 2/2] Check if conn exists before marking it dead/alive --- lib/pool/ConnectionPool.js | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/lib/pool/ConnectionPool.js b/lib/pool/ConnectionPool.js index fa3bc04fb..a124aa73e 100644 --- a/lib/pool/ConnectionPool.js +++ b/lib/pool/ConnectionPool.js @@ -31,6 +31,24 @@ class ConnectionPool extends BaseConnectionPool { ) } + /** + * Checks if the connection is present in this.connections + * + * @param {number} connectionId + * + * @returns {boolean} True if the connection is found, otherwise False + */ + hasConnection (connectionId) { + return this.connections.find(conn => conn.id === connectionId) + } + + removeDead (connectionId) { + const index = this.dead.indexOf(connectionId) + if (index > -1) { + this.dead.splice(index, 1) + } + } + /** * Marks a connection as 'alive'. * If needed removes the connection from the dead list @@ -40,9 +58,14 @@ class ConnectionPool extends BaseConnectionPool { */ markAlive (connection) { const { id } = connection - debug(`Marking as 'alive' connection '${id}'`) - const index = this.dead.indexOf(id) - if (index > -1) this.dead.splice(index, 1) + if (!this.hasConnection(id)) { + debug(`markAlive: Cannot mark connection '${id}' as alive (connection not found)`) + this.removeDead(id) + return + } + + debug(`markAlive: Marking as 'alive' connection '${id}'`) + this.removeDead(id) connection.status = Connection.statuses.ALIVE connection.deadCount = 0 connection.resurrectTimeout = 0 @@ -58,7 +81,12 @@ class ConnectionPool extends BaseConnectionPool { */ markDead (connection) { const { id } = connection - debug(`Marking as 'dead' connection '${id}'`) + if (!this.hasConnection(id)) { + debug(`markDead: Cannot mark connection '${id}' as dead (connection not found)`) + return + } + + debug(`markDead: Marking as 'dead' connection '${id}'`) if (this.dead.indexOf(id) === -1) { this.dead.push(id) }