Skip to content

Commit

Permalink
Allow Epmd strategy to reconnect after connection failures (#183)
Browse files Browse the repository at this point in the history
Co-authored-by: Paul Schoenfelder <paulschoenfelder@fastmail.com>
  • Loading branch information
ronaldwind and bitwalker committed Jun 22, 2023
1 parent d908239 commit fcf8a4c
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 17 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## Unreleased

- Use new cypher names
- Allow Epmd strategy to reconnect after connection failures
- Detect Self Signed Certificate Authority for Kubernetes Strategy
- Remove calls to deprecated `Logger.warn/2`

Expand Down
39 changes: 37 additions & 2 deletions lib/strategy/epmd.ex
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,56 @@ defmodule Cluster.Strategy.Epmd do
epmd_example: [
strategy: #{__MODULE__},
config: [
timeout: 30_000,
hosts: [:"a@127.0.0.1", :"b@127.0.0.1"]]]]
An optional timeout can be specified in the config. This is the timeout that
will be used in the GenServer to connect the nodes. This defaults to
`:infinity` meaning that the connection process will only happen when the
worker is started. Any integer timeout will result in the connection process
being triggered. In the example above, it has been configured for 30 seconds.
"""
use GenServer
use Cluster.Strategy

alias Cluster.Strategy.State

@impl true
def start_link([%State{config: config} = state]) do
case Keyword.get(config, :hosts, []) do
[] ->
:ignore

nodes when is_list(nodes) ->
Cluster.Strategy.connect_nodes(state.topology, state.connect, state.list_nodes, nodes)
:ignore
GenServer.start_link(__MODULE__, [state])
end
end

@impl true
def init([state]) do
connect_hosts(state)
{:ok, state, configured_timeout(state)}
end

@impl true
def handle_info(:timeout, state) do
handle_info(:connect, state)
end

def handle_info(:connect, state) do
connect_hosts(state)
{:noreply, state, configured_timeout(state)}
end

@spec configured_timeout(State.t()) :: integer() | :infinity
defp configured_timeout(%State{config: config}) do
Keyword.get(config, :timeout, :infinity)
end

@spec connect_hosts(State.t()) :: State.t()
defp connect_hosts(%State{config: config} = state) do
nodes = Keyword.get(config, :hosts, [])
Cluster.Strategy.connect_nodes(state.topology, state.connect, state.list_nodes, nodes)
state
end
end
63 changes: 48 additions & 15 deletions test/epmd_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,56 @@ defmodule Cluster.Strategy.EpmdTest do

alias Cluster.Strategy.Epmd

import ExUnit.CaptureLog
require Cluster.Nodes

describe "start_link/1" do
test "calls right functions" do
capture_log(fn ->
:ignore =
Epmd.start_link([
%Cluster.Strategy.State{
topology: :name,
config: [hosts: [:foo@bar]],
connect: {Cluster.Nodes, :connect, [self()]},
list_nodes: {Cluster.Nodes, :list_nodes, [[]]}
}
])

assert_receive {:connect, :foo@bar}, 5_000
end)
@tag capture_log: true
test "starts GenServer and connects nodes" do
{:ok, pid} =
Epmd.start_link([
%Cluster.Strategy.State{
topology: :name,
config: [hosts: [:foo@bar]],
connect: {Cluster.Nodes, :connect, [self()]},
list_nodes: {Cluster.Nodes, :list_nodes, [[]]}
}
])

assert is_pid(pid)

assert_receive {:connect, :foo@bar}, 5_000
end

@tag capture_log: true
test "reconnects every time the configured timeout was reached" do
timeout = 500
start_timestamp = NaiveDateTime.utc_now()

{:ok, _pid} =
Epmd.start_link([
%Cluster.Strategy.State{
topology: :name,
config: [hosts: [:foo@bar], timeout: timeout],
connect: {Cluster.Nodes, :connect, [self()]},
list_nodes: {Cluster.Nodes, :list_nodes, [[]]}
}
])

# Initial connect
assert_receive {:connect, :foo@bar}, 5_000

# First reconnect should not have happened right away,
# but it should happen after a timeout
refute_received {:connect, _}
assert_receive {:connect, :foo@bar}, 2 * timeout

# A consecutive reconnect should not have happened right away,
# but it should happen after a timeout
refute_received {:connect, _}
assert_receive {:connect, :foo@bar}, 2 * timeout

duration = NaiveDateTime.diff(NaiveDateTime.utc_now(), start_timestamp, :millisecond)
assert duration > 2 * timeout
end
end
end

0 comments on commit fcf8a4c

Please sign in to comment.