Skip to content

Commit

Permalink
feat(gateway): add HTTP health check (#4120)
Browse files Browse the repository at this point in the history
This adds the same kind of HTTP health-check that is already present in
the relay to the gateway. The health-check returns 200 OK for as long as
the gateway is active. The gateway automatically shuts down on fatal
errors (like authentication failures with the portal).

To enable this, I've extracted a crate `http-health-check` that shares
this code between the relay and the gateway.

Resolves: #2465.

---------

Signed-off-by: Thomas Eizinger <thomas@eizinger.io>
Co-authored-by: Reactor Scram <ReactorScram@users.noreply.github.com>
  • Loading branch information
thomaseizinger and ReactorScram committed Mar 13, 2024
1 parent 4c77aae commit 9767bdd
Show file tree
Hide file tree
Showing 10 changed files with 66 additions and 25 deletions.
12 changes: 11 additions & 1 deletion rust/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions rust/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ members = [
"phoenix-channel",
"relay",
"gui-client/src-tauri",
"http-health-check",
]

resolver = "2"
Expand Down Expand Up @@ -45,6 +46,7 @@ firezone-relay = { path = "relay"}
connlib-shared = { path = "connlib/shared"}
firezone-tunnel = { path = "connlib/tunnel"}
phoenix-channel = { path = "phoenix-channel"}
http-health-check = { path = "http-health-check"}

[patch.crates-io]
boringtun = { git = "https://github.com/cloudflare/boringtun", branch = "master" }
Expand Down
1 change: 1 addition & 0 deletions rust/gateway/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ ip_network = { version = "0.4", default-features = false }
dns-lookup = { workspace = true }
libc = { version = "0.2", default-features = false, features = ["std", "const-extern-fn", "extra_traits"] }
either = "1"
http-health-check = { workspace = true }

[dev-dependencies]
serde_json = { version = "1.0", default-features = false, features = ["std"] }
6 changes: 6 additions & 0 deletions rust/gateway/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ async fn try_main() -> Result<()> {

let ctrl_c = pin!(ctrl_c().map_err(anyhow::Error::new));

tokio::spawn(http_health_check::serve(cli.health_check.health_check_addr));

match future::try_select(task, ctrl_c)
.await
.map_err(|e| e.factor_first().0)?
Expand Down Expand Up @@ -127,6 +129,10 @@ impl Callbacks for CallbackHandler {
struct Cli {
#[command(flatten)]
common: CommonArgs,

#[command(flatten)]
health_check: http_health_check::HealthCheckArgs,

/// Identifier generated by the portal to identify and display the device.
#[arg(short = 'i', long, env = "FIREZONE_ID")]
pub firezone_id: Option<String>,
Expand Down
12 changes: 12 additions & 0 deletions rust/http-health-check/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[package]
name = "http-health-check"
# mark:automatic-version
version = "1.0.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
axum = { version = "0.7.3", default-features = false, features = ["http1", "tokio"] }
tokio = { version = "1.36.0", features = ["net"] }
clap = { version = "4.5.2", features = ["derive", "env"] }
27 changes: 27 additions & 0 deletions rust/http-health-check/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
use axum::routing::get;
use axum::Router;
use std::net::SocketAddr;

/// Runs an HTTP server that always responds to `GET /healthz` with 200 OK.
///
/// To signal an unhealthy state, simply stop the task.
pub async fn serve(addr: impl Into<SocketAddr>) -> std::io::Result<()> {
let addr = addr.into();

let service = Router::new()
.route("/healthz", get(|| async { "" }))
.into_make_service();

axum::serve(tokio::net::TcpListener::bind(addr).await?, service).await?;

Ok(())
}

#[derive(clap::Args, Debug, Clone)]
pub struct HealthCheckArgs {
/// The address of the local interface where we should serve our health-check endpoint.
///
/// The actual health-check endpoint will be at `http://<health_check_addr>/healthz`.
#[arg(long, env, hide = true, default_value = "0.0.0.0:8080")]
pub health_check_addr: SocketAddr,
}
2 changes: 1 addition & 1 deletion rust/relay/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ url = "2.4.1"
serde = { version = "1.0.196", features = ["derive"] }
trackable = "1.3.0"
socket2 = "0.5.6"
axum = { version = "0.7.3", default-features = false, features = ["http1", "tokio"] }
backoff = "0.4"
http-health-check = { workspace = true }

[dev-dependencies]
redis = { version = "0.25.0", default-features = false, features = ["tokio-comp"] }
Expand Down
16 changes: 0 additions & 16 deletions rust/relay/src/health_check.rs

This file was deleted.

1 change: 0 additions & 1 deletion rust/relay/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ mod sleep;
mod time_events;
mod udp_socket;

pub mod health_check;
#[cfg(feature = "proptest")]
pub mod proptest;

Expand Down
12 changes: 6 additions & 6 deletions rust/relay/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,6 @@ struct Args {
/// The public (i.e. internet-reachable) IPv6 address of the relay server.
#[arg(long, env)]
public_ip6_addr: Option<Ipv6Addr>,
/// The address of the local interface where we should serve our health-check endpoint.
///
/// The actual health-check endpoint will be at `http://<health_check_addr>/healthz`.
#[arg(long, env, hide = true, default_value = "0.0.0.0:8080")]
health_check_addr: SocketAddr,
// See https://www.rfc-editor.org/rfc/rfc8656.html#name-allocations
/// The lowest port used for TURN allocations.
#[arg(long, env, hide = true, default_value = "49152")]
Expand Down Expand Up @@ -86,6 +81,9 @@ struct Args {
/// OTLP is vendor-agnostic but for spans to be correctly recognised by Google Cloud, they need the project ID to be set.
#[arg(long, env, hide = true)]
google_cloud_project_id: Option<String>,

#[command(flatten)]
health_check: http_health_check::HealthCheckArgs,
}

#[derive(clap::ValueEnum, Debug, Clone, Copy)]
Expand Down Expand Up @@ -134,7 +132,9 @@ async fn main() -> Result<()> {

let mut eventloop = Eventloop::new(server, channel, public_addr)?;

tokio::spawn(firezone_relay::health_check::serve(args.health_check_addr));
tokio::spawn(http_health_check::serve(
args.health_check.health_check_addr,
));

tracing::info!(target: "relay", "Listening for incoming traffic on UDP port 3478");

Expand Down

0 comments on commit 9767bdd

Please sign in to comment.