Skip to content

Commit

Permalink
fix(portal): Fix edge cases with OIDC discovered in logs (#4777)
Browse files Browse the repository at this point in the history
Can be reviewed commit by commit.
  • Loading branch information
AndrewDryga committed May 11, 2024
1 parent 90e2033 commit f5b4736
Show file tree
Hide file tree
Showing 15 changed files with 107 additions and 88 deletions.
15 changes: 13 additions & 2 deletions elixir/apps/api/lib/api/client/channel.ex
Original file line number Diff line number Diff line change
Expand Up @@ -349,8 +349,19 @@ defmodule API.Client.Channel do

OpenTelemetry.Tracer.with_span "client.create_log_sink" do
case Instrumentation.create_remote_log_sink(socket.assigns.client, actor_name, account_slug) do
{:ok, signed_url} -> {:reply, {:ok, signed_url}, socket}
{:error, :disabled} -> {:reply, {:error, %{reason: :disabled}}, socket}
{:ok, signed_url} ->
{:reply, {:ok, signed_url}, socket}

{:error, :disabled} ->
{:reply, {:error, %{reason: :disabled}}, socket}

{:error, reason} ->
Logger.error("Failed to create log sink for client",
client_id: socket.assigns.client.id,
reason: inspect(reason)
)

{:reply, {:error, %{reason: :retry_later}}, socket}
end
end
end
Expand Down
19 changes: 19 additions & 0 deletions elixir/apps/api/test/api/client/channel_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,25 @@ defmodule API.Client.ChannelTest do
assert_reply ref, :error, %{reason: :disabled}
end

test "returns error when google api is not available", %{socket: socket} do
bypass = Bypass.open()

GoogleCloudPlatform.override_endpoint_url(
:metadata_endpoint_url,
"http://localhost:#{bypass.port}/"
)

GoogleCloudPlatform.override_endpoint_url(
:sign_endpoint_url,
"http://localhost:#{bypass.port}/service_accounts/"
)

Bypass.down(bypass)

ref = push(socket, "create_log_sink", %{})
assert_reply ref, :error, %{reason: :retry_later}
end

test "returns a signed URL which can be used to upload the logs", %{
account: account,
socket: socket,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
defmodule Domain.Auth.Adapter.DirectorySync do
defmodule Domain.Auth.Adapter.OpenIDConnect.DirectorySync do
alias Domain.Repo
alias Domain.Jobs.Executors.Concurrent
alias Domain.{Auth, Actors}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
defmodule Domain.Auth.Adapter.OpenIDConnect.RefreshTokens do
require Logger

def refresh_access_tokens(name, adapter) do
providers = Domain.Auth.all_providers_pending_token_refresh_by_adapter!(name)
Logger.debug("Refreshing access tokens for #{length(providers)} #{name} providers")

Enum.each(providers, fn provider ->
Logger.metadata(
provider_id: provider.id,
account_id: provider.account_id,
adapter: name
)

Logger.debug("Refreshing access token")

fn -> adapter.refresh_access_token(provider) end
|> with_sync_retries()
|> case do
{:ok, _provider} ->
Logger.debug("Finished refreshing access token")

{:error, reason} ->
Logger.error("Failed refreshing access token",
reason: inspect(reason)
)
end
end)
end

defp with_sync_retries(cb, retries_left \\ 3, retry_timeout \\ 100) do
case cb.() do
{:ok, result} ->
{:ok, result}

{:error, _reason} when retries_left > 0 ->
Process.sleep(retry_timeout)
with_sync_retries(cb, retries_left - 1, retry_timeout)

{:error, reason} ->
{:error, reason}
end
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,11 @@ defmodule Domain.Auth.Adapters.GoogleWorkspace.Jobs.RefreshAccessTokens do
every: :timer.minutes(5),
executor: Domain.Jobs.Executors.GloballyUnique

alias Domain.Auth.Adapters.GoogleWorkspace
require Logger

@impl true
def execute(_config) do
providers = Domain.Auth.all_providers_pending_token_refresh_by_adapter!(:google_workspace)
Logger.debug("Refreshing access tokens for #{length(providers)} providers")

Enum.each(providers, fn provider ->
Logger.metadata(
account_id: provider.account_id,
provider_id: provider.id,
provider_adapter: provider.adapter
)

Logger.debug("Refreshing access token")

case GoogleWorkspace.refresh_access_token(provider) do
{:ok, _provider} ->
Logger.debug("Finished refreshing access token")

{:error, reason} ->
Logger.error("Failed refreshing access token",
reason: inspect(reason)
)
end
end)
Domain.Auth.Adapter.OpenIDConnect.RefreshTokens.refresh_access_tokens(
:google_workspace,
Domain.Auth.Adapters.GoogleWorkspace
)
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ defmodule Domain.Auth.Adapters.GoogleWorkspace.Jobs.SyncDirectory do
every: :timer.minutes(2),
executor: Domain.Jobs.Executors.Concurrent

alias Domain.Auth.Adapter.DirectorySync
alias Domain.Auth.Adapter.OpenIDConnect.DirectorySync
alias Domain.Auth.Adapters.GoogleWorkspace
require Logger
require OpenTelemetry.Tracer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,11 @@ defmodule Domain.Auth.Adapters.MicrosoftEntra.Jobs.RefreshAccessTokens do
every: :timer.minutes(5),
executor: Domain.Jobs.Executors.GloballyUnique

alias Domain.Auth.Adapters.MicrosoftEntra
require Logger

@impl true
def execute(_config) do
providers = Domain.Auth.all_providers_pending_token_refresh_by_adapter!(:microsoft_entra)
Logger.debug("Refreshing access tokens for #{length(providers)} Microsoft Entra providers")

Enum.each(providers, fn provider ->
Logger.metadata(
account_id: provider.account_id,
provider_id: provider.id,
provider_adapter: provider.adapter
)

Logger.debug("Refreshing access token")

case MicrosoftEntra.refresh_access_token(provider) do
{:ok, _provider} ->
Logger.debug("Finished refreshing access token")

{:error, reason} ->
Logger.error("Failed refreshing access token",
reason: inspect(reason)
)
end
end)
Domain.Auth.Adapter.OpenIDConnect.RefreshTokens.refresh_access_tokens(
:microsoft_entra,
Domain.Auth.Adapters.MicrosoftEntra
)
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ defmodule Domain.Auth.Adapters.MicrosoftEntra.Jobs.SyncDirectory do
every: :timer.minutes(5),
executor: Domain.Jobs.Executors.Concurrent

alias Domain.Auth.Adapter.DirectorySync
alias Domain.Auth.Adapter.OpenIDConnect.DirectorySync
alias Domain.Auth.Adapters.MicrosoftEntra
require Logger
require OpenTelemetry.Tracer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,11 @@ defmodule Domain.Auth.Adapters.Okta.Jobs.RefreshAccessTokens do
every: :timer.minutes(5),
executor: Domain.Jobs.Executors.GloballyUnique

alias Domain.Auth.Adapters.Okta
require Logger

@impl true
def execute(_config) do
providers = Domain.Auth.all_providers_pending_token_refresh_by_adapter!(:okta)
Logger.debug("Refreshing access tokens for #{length(providers)} Okta providers")

Enum.each(providers, fn provider ->
Logger.metadata(
account_id: provider.account_id,
provider_id: provider.id,
provider_adapter: provider.adapter
)

Logger.debug("Refreshing access token")

case Okta.refresh_access_token(provider) do
{:ok, _provider} ->
Logger.debug("Finished refreshing access token")

{:error, reason} ->
Logger.error("Failed refreshing access token",
reason: inspect(reason)
)
end
end)
Domain.Auth.Adapter.OpenIDConnect.RefreshTokens.refresh_access_tokens(
:okta,
Domain.Auth.Adapters.Okta
)
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ defmodule Domain.Auth.Adapters.Okta.Jobs.SyncDirectory do
every: :timer.minutes(5),
executor: Domain.Jobs.Executors.Concurrent

alias Domain.Auth.Adapter.DirectorySync
alias Domain.Auth.Adapter.OpenIDConnect.DirectorySync
alias Domain.Auth.Adapters.Okta
require Logger
require OpenTelemetry.Tracer
Expand Down
14 changes: 11 additions & 3 deletions elixir/apps/domain/lib/domain/auth/adapters/openid_connect.ex
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ defmodule Domain.Auth.Adapters.OpenIDConnect do

with {:ok, tokens} <- OpenIDConnect.fetch_tokens(config, token_params),
{:ok, claims} <- OpenIDConnect.verify(config, tokens["id_token"]),
{:ok, userinfo} <- OpenIDConnect.fetch_userinfo(config, tokens["access_token"]) do
{:ok, userinfo} <- fetch_userinfo(config, tokens) do
expires_at =
cond do
not is_nil(tokens["expires_in"]) ->
Expand Down Expand Up @@ -274,15 +274,15 @@ defmodule Domain.Auth.Adapters.OpenIDConnect do
{:error, :invalid_token}

{:error, {status, _reason} = other} when status in 400..499 ->
Logger.info("Failed to connect OpenID Connect provider",
Logger.info("Failed to fetch OpenID Connect state",
provider_id: provider.id,
reason: inspect(other)
)

{:error, :invalid_token}

{:error, other} ->
Logger.error("Failed to connect OpenID Connect provider",
Logger.error("Failed to fetch OpenID Connect state",
provider_id: provider.id,
account_id: provider.account_id,
reason: inspect(other)
Expand All @@ -292,6 +292,14 @@ defmodule Domain.Auth.Adapters.OpenIDConnect do
end
end

defp fetch_userinfo(config, tokens) do
case OpenIDConnect.fetch_userinfo(config, tokens["access_token"]) do
{:ok, userinfo} -> {:ok, userinfo}
{:error, :userinfo_endpoint_is_not_implemented} -> {:ok, %{}}
{:error, _reason} -> {:error, :invalid_token}
end
end

defp config_for_provider(%Provider{} = provider) do
Ecto.embedded_load(Settings, provider.adapter_config, :json)
|> Map.from_struct()
Expand Down
2 changes: 1 addition & 1 deletion elixir/apps/domain/mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ defmodule Domain.MixProject do
# Auth-related deps
{:plug_crypto, "~> 2.0"},
{:openid_connect,
github: "firezone/openid_connect", ref: "76c1a1c9a9da3b8be7b8270306a9240d80d7696f"},
github: "firezone/openid_connect", ref: "dee689382699fce7a6ca70084ccbc8bc351d3246"},
{:argon2_elixir, "~> 4.0"},

# Erlang Clustering
Expand Down
2 changes: 1 addition & 1 deletion elixir/mix.lock
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
"nimble_pool": {:hex, :nimble_pool, "1.1.0", "bf9c29fbdcba3564a8b800d1eeb5a3c58f36e1e11d7b7fb2e084a643f645f06b", [:mix], [], "hexpm", "af2e4e6b34197db81f7aad230c1118eac993acc0dae6bc83bac0126d4ae0813a"},
"number": {:hex, :number, "1.0.4", "3e6e6032a3c1d4c3760e77a42c580a57a15545dd993af380809da30fe51a032c", [:mix], [{:decimal, "~> 1.5 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}], "hexpm", "16f7516584ef2be812af4f33f2eaf3f9b9f6ed8892f45853eb93113f83721e42"},
"observer_cli": {:hex, :observer_cli, "1.7.4", "3c1bfb6d91bf68f6a3d15f46ae20da0f7740d363ee5bc041191ce8722a6c4fae", [:mix, :rebar3], [{:recon, "~> 2.5.1", [hex: :recon, repo: "hexpm", optional: false]}], "hexpm", "50de6d95d814f447458bd5d72666a74624eddb0ef98bdcee61a0153aae0865ff"},
"openid_connect": {:git, "https://github.com/firezone/openid_connect.git", "76c1a1c9a9da3b8be7b8270306a9240d80d7696f", [ref: "76c1a1c9a9da3b8be7b8270306a9240d80d7696f"]},
"openid_connect": {:git, "https://github.com/firezone/openid_connect.git", "dee689382699fce7a6ca70084ccbc8bc351d3246", [ref: "dee689382699fce7a6ca70084ccbc8bc351d3246"]},
"opentelemetry": {:hex, :opentelemetry, "1.4.0", "f928923ed80adb5eb7894bac22e9a198478e6a8f04020ae1d6f289fdcad0b498", [:rebar3], [{:opentelemetry_api, "~> 1.3.0", [hex: :opentelemetry_api, repo: "hexpm", optional: false]}, {:opentelemetry_semantic_conventions, "~> 0.2", [hex: :opentelemetry_semantic_conventions, repo: "hexpm", optional: false]}], "hexpm", "50b32ce127413e5d87b092b4d210a3449ea80cd8224090fe68d73d576a3faa15"},
"opentelemetry_api": {:hex, :opentelemetry_api, "1.3.0", "03e2177f28dd8d11aaa88e8522c81c2f6a788170fe52f7a65262340961e663f9", [:mix, :rebar3], [{:opentelemetry_semantic_conventions, "~> 0.2", [hex: :opentelemetry_semantic_conventions, repo: "hexpm", optional: false]}], "hexpm", "b9e5ff775fd064fa098dba3c398490b77649a352b40b0b730a6b7dc0bdd68858"},
"opentelemetry_cowboy": {:hex, :opentelemetry_cowboy, "0.3.0", "0144b211fa6cda0e6211c340cebd1bbd9158e350099ea3bf3d838f993cb4b90e", [:rebar3], [{:cowboy_telemetry, "~> 0.4", [hex: :cowboy_telemetry, repo: "hexpm", optional: false]}, {:opentelemetry_api, "~> 1.2", [hex: :opentelemetry_api, repo: "hexpm", optional: false]}, {:opentelemetry_telemetry, "~> 1.0", [hex: :opentelemetry_telemetry, repo: "hexpm", optional: false]}, {:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "4f44537b4c7430018198d480f55bc88a40f7d0582c3ad927a5bab4ceb39e80ea"},
Expand Down
2 changes: 1 addition & 1 deletion terraform/environments/production/relays.tf
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ resource "google_monitoring_alert_policy" "connected_relays_count" {
comparison = "COMPARISON_GT"

# at least one relay per region must be always online
threshold_value = length(module.relays[0].instances)
threshold_value = length(module.relays[0].instances) - 1
duration = "0s"

trigger {
Expand Down
2 changes: 1 addition & 1 deletion terraform/modules/google-cloud/ops/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ resource "google_monitoring_alert_policy" "sql_disk_utiliziation_policy" {
}
}

resource "google_monitoring_alert_policy" "genservers_crash_policy" {
resource "google_monitoring_alert_policy" "errors" {
project = var.project_id

display_name = "Errors in logs"
Expand Down

0 comments on commit f5b4736

Please sign in to comment.