Skip to content

Commit

Permalink
enhancement: Configurable database connection retries (#1926)
Browse files Browse the repository at this point in the history
Allow users to configure the retry settings for establishing a database
connection. This is helpful in CI environments where the database
service might not be available right away.

Fixes #1919

Signed-off-by: Charith Ellawala <charith@cerbos.dev>

Signed-off-by: Charith Ellawala <charith@cerbos.dev>
  • Loading branch information
charithe committed Dec 28, 2023
1 parent 89dcf2c commit ddcc341
Show file tree
Hide file tree
Showing 14 changed files with 142 additions and 46 deletions.
11 changes: 11 additions & 0 deletions docs/modules/configuration/pages/storage.adoc
Expand Up @@ -240,6 +240,11 @@ storage:
maxIdle: 5
----

=== Connection retries

include::partial$connretry.adoc[]


[#postgres-schema]
=== Database object definitions

Expand Down Expand Up @@ -329,6 +334,9 @@ storage:
maxIdle: 5
----

=== Connection retries

include::partial$connretry.adoc[]

[#mysql-schema]
=== Database object definitions
Expand Down Expand Up @@ -382,6 +390,9 @@ storage:
maxIdle: 5
----

=== Connection retries

include::partial$connretry.adoc[]

[#sqlserver-schema]
=== Database object definitions
Expand Down
7 changes: 7 additions & 0 deletions docs/modules/configuration/partials/connretry.adoc
@@ -0,0 +1,7 @@
Cerbos attempts to connect to the database on startup and exits if connection cannot be established after three attempts. You can configure the connection retry settings using the `connRetry` options.

`maxAttempts`:: Maximum number of connection attempts before giving up
`initialInterval`:: The time to wait before the second connection attempt. Subsequent attempts have increasing wait times (exponential backoff) derived from a combination of this value and the retry attempt number
`maxInterval`:: Maximum amount of time to wait between retries. This affects the maximum value produced by the exponential backoff algorithm.

CAUTION: Changing the retry settings affect the availability of Cerbos and the time it takes to detect and recover from a failure. For example, if the database connection details are incorrect or have changed, it will take longer for a Cerbos PDP to fail on startup because of retries.
12 changes: 12 additions & 0 deletions docs/modules/configuration/partials/fullconfiguration.adoc
Expand Up @@ -170,6 +170,10 @@ storage:
maxIdleTime: 45s
maxOpen: 4
maxIdle: 1
connRetry:
maxAttempts: 3
initialInterval: 0.5s
maxInterval: 60s
dsn: "user:password@tcp(localhost:3306)/db?interpolateParams=true" # Required. DSN is the data source connection string.
serverPubKey:
mykey: testdata/server_public_key.pem
Expand All @@ -192,6 +196,10 @@ storage:
maxIdleTime: 45s
maxOpen: 4
maxIdle: 1
connRetry:
maxAttempts: 3
initialInterval: 0.5s
maxInterval: 60s
skipSchemaCheck: false # SkipSchemaCheck skips checking for required database tables on startup.
url: "postgres://user:password@localhost:port/db" # Required. URL is the Postgres connection URL. See https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING
sqlite3:
Expand All @@ -204,6 +212,10 @@ storage:
maxIdleTime: 45s
maxOpen: 4
maxIdle: 1
connRetry:
maxAttempts: 3
initialInterval: 0.5s
maxInterval: 60s
skipSchemaCheck: false # SkipSchemaCheck skips checking for required database tables on startup.
url: "sqlserver://username:password@host/instance?param1=value&param2=value" # Required. URL is the SQL Server connection URL. See https://github.com/microsoft/go-mssqldb#connection-parameters-and-dsn.
telemetry:
Expand Down
82 changes: 82 additions & 0 deletions internal/storage/db/internal/conf.go
@@ -0,0 +1,82 @@
// Copyright 2021-2023 Zenauth Ltd.
// SPDX-License-Identifier: Apache-2.0

package internal

import (
"errors"
"time"

"github.com/cenkalti/backoff/v4"
"github.com/jmoiron/sqlx"
)

const (
defaultRetryMaxAttempts = 3
)

// ConnPoolConf holds common SQL connection pool settings.
type ConnPoolConf struct {
MaxLifetime time.Duration `yaml:"maxLifeTime"`
MaxIdleTime time.Duration `yaml:"maxIdleTime"`
MaxOpen uint `yaml:"maxOpen"`
MaxIdle uint `yaml:"maxIdle"`
}

func (cc *ConnPoolConf) Configure(db *sqlx.DB) {
if cc == nil {
return
}

db.SetConnMaxLifetime(cc.MaxLifetime)
db.SetConnMaxIdleTime(cc.MaxIdleTime)
db.SetMaxIdleConns(int(cc.MaxIdle))
db.SetMaxOpenConns(int(cc.MaxOpen))
}

// ConnRetryConf holds common retry settings for establishing a database connection.
type ConnRetryConf struct {
// MaxAttempts is the maximum number of retries to attempt before giving up.
MaxAttempts uint64 `yaml:"maxAttempts"`
// InitialInterval is the initial wait period between retry attempts. Subsequent attempts will be longer depending on the attempt number.
InitialInterval time.Duration `yaml:"initialInterval"`
// MaxInterval is the maximum amount of time to wait between retry attempts.
MaxInterval time.Duration `yaml:"maxInterval"`
}

func (rc *ConnRetryConf) Validate() (outErr error) {
if rc == nil {
return nil
}

if rc.InitialInterval < 0 {
outErr = errors.Join(outErr, errors.New("retry.initialInterval must be a positive value"))
}

if rc.MaxInterval < 0 {
outErr = errors.Join(outErr, errors.New("retry.maxInterval must be a positive value"))
}

if rc.MaxInterval < rc.InitialInterval {
outErr = errors.Join(outErr, errors.New("retry.maxInterval must be larger than retry.initialInterval"))
}

return outErr
}

func (rc *ConnRetryConf) BackoffConf() backoff.BackOff {
if rc == nil {
return backoff.WithMaxRetries(backoff.NewExponentialBackOff(), defaultRetryMaxAttempts)
}

b := backoff.NewExponentialBackOff()
if rc.MaxInterval > 0 {
b.MaxInterval = rc.MaxInterval
}

if rc.InitialInterval > 0 {
b.InitialInterval = rc.InitialInterval
}

return backoff.WithMaxRetries(b, rc.MaxAttempts)
}
29 changes: 0 additions & 29 deletions internal/storage/db/internal/connpool.go

This file was deleted.

6 changes: 2 additions & 4 deletions internal/storage/db/internal/funcs.go
Expand Up @@ -52,9 +52,7 @@ func ansiConcatWithSep(sep string, args ...any) exp.Expression {
}
}

const DBConnectionRetries = 3

func ConnectWithRetries(driverName, connStr string, retries uint64) (*sqlx.DB, error) {
func ConnectWithRetries(driverName, connStr string, retryConf *ConnRetryConf) (*sqlx.DB, error) {
var db *sqlx.DB

connectFn := func() error {
Expand All @@ -63,7 +61,7 @@ func ConnectWithRetries(driverName, connStr string, retries uint64) (*sqlx.DB, e
return err
}

err := backoff.Retry(connectFn, backoff.WithMaxRetries(backoff.NewExponentialBackOff(), retries))
err := backoff.Retry(connectFn, retryConf.BackoffConf())
if err != nil {
return nil, err
}
Expand Down
8 changes: 4 additions & 4 deletions internal/storage/db/internal/funcs_test.go
Expand Up @@ -131,30 +131,30 @@ func TestConnectWithRetries(t *testing.T) {

t.Run("connect_with_no_retries", func(t *testing.T) {
defer resetConn()
_, err := internal.ConnectWithRetries(driverName, "", 0)
_, err := internal.ConnectWithRetries(driverName, "", &internal.ConnRetryConf{MaxAttempts: 0})
require.NoError(t, err)
require.Equal(t, 0, mc.attempts)
})

t.Run("connect_with_no_failures", func(t *testing.T) {
defer resetConn()
_, err := internal.ConnectWithRetries(driverName, "", 1)
_, err := internal.ConnectWithRetries(driverName, "", &internal.ConnRetryConf{MaxAttempts: 1})
require.NoError(t, err)
require.Equal(t, 0, mc.attempts)
})

t.Run("connect_with_retry", func(t *testing.T) {
defer resetConn()
mc.nFailures = 1
_, err := internal.ConnectWithRetries(driverName, "", 1)
_, err := internal.ConnectWithRetries(driverName, "", &internal.ConnRetryConf{MaxAttempts: 1})
require.NoError(t, err)
require.Equal(t, 1, mc.attempts)
})

t.Run("connect_with_error", func(t *testing.T) {
defer resetConn()
mc.nFailures = 2
_, err := internal.ConnectWithRetries(driverName, "", 1)
_, err := internal.ConnectWithRetries(driverName, "", &internal.ConnRetryConf{MaxAttempts: 1})
require.Error(t, err)
require.Equal(t, 2, mc.attempts)
})
Expand Down
11 changes: 8 additions & 3 deletions internal/storage/db/mysql/conf.go
Expand Up @@ -13,9 +13,10 @@ const confKey = storage.ConfKey + ".mysql"
// Conf is required (if driver is set to 'mysql') configuration for mysql driver.
// +desc=This section is required only if storage.driver is mysql.
type Conf struct {
ConnPool *internal.ConnPoolConf `yaml:"connPool" conf:",example=\n maxLifeTime: 60m\n maxIdleTime: 45s\n maxOpen: 4\n maxIdle: 1"`
TLS map[string]TLSConf `yaml:"tls" conf:",example=\n mytls:\n cert: /path/to/certificate\n key: /path/to/private_key\n caCert: /path/to/CA_certificate"`
ServerPubKey map[string]string `yaml:"serverPubKey" conf:",example=\n mykey: testdata/server_public_key.pem"`
ConnPool *internal.ConnPoolConf `yaml:"connPool" conf:",example=\n maxLifeTime: 60m\n maxIdleTime: 45s\n maxOpen: 4\n maxIdle: 1"`
ConnRetry *internal.ConnRetryConf `yaml:"connRetry" conf:",example=\n maxAttempts: 3\n initialInterval: 0.5s\n maxInterval: 60s"`
TLS map[string]TLSConf `yaml:"tls" conf:",example=\n mytls:\n cert: /path/to/certificate\n key: /path/to/private_key\n caCert: /path/to/CA_certificate"`
ServerPubKey map[string]string `yaml:"serverPubKey" conf:",example=\n mykey: testdata/server_public_key.pem"`
// DSN is the data source connection string.
DSN string `yaml:"dsn" conf:"required,example=\"user:password@tcp(localhost:3306)/db?interpolateParams=true\""`
// SkipSchemaCheck skips checking for required database tables on startup.
Expand All @@ -31,3 +32,7 @@ type TLSConf struct {
func (c *Conf) Key() string {
return confKey
}

func (c *Conf) Validate() error {
return c.ConnRetry.Validate()
}
2 changes: 1 addition & 1 deletion internal/storage/db/mysql/mysql.go
Expand Up @@ -57,7 +57,7 @@ func NewStore(ctx context.Context, conf *Conf) (*Store, error) {
return nil, err
}

db, err := internal.ConnectWithRetries("mysql", dsn, internal.DBConnectionRetries)
db, err := internal.ConnectWithRetries("mysql", dsn, conf.ConnRetry)
if err != nil {
return nil, fmt.Errorf("failed to connect to database: %w", err)
}
Expand Down
7 changes: 6 additions & 1 deletion internal/storage/db/postgres/conf.go
Expand Up @@ -13,7 +13,8 @@ const confKey = storage.ConfKey + ".postgres"
// Conf is required (if driver is set to 'postres') configuration for postres driver.
// +desc=This section is required only if storage.driver is postgres.
type Conf struct {
ConnPool *internal.ConnPoolConf `yaml:"connPool" conf:",example=\n maxLifeTime: 60m\n maxIdleTime: 45s\n maxOpen: 4\n maxIdle: 1"`
ConnPool *internal.ConnPoolConf `yaml:"connPool" conf:",example=\n maxLifeTime: 60m\n maxIdleTime: 45s\n maxOpen: 4\n maxIdle: 1"`
ConnRetry *internal.ConnRetryConf `yaml:"connRetry" conf:",example=\n maxAttempts: 3\n initialInterval: 0.5s\n maxInterval: 60s"`
// URL is the Postgres connection URL. See https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING
URL string `yaml:"url" conf:"required,example=\"postgres://user:password@localhost:port/db\""`
// SkipSchemaCheck skips checking for required database tables on startup.
Expand All @@ -23,3 +24,7 @@ type Conf struct {
func (c *Conf) Key() string {
return confKey
}

func (c *Conf) Validate() error {
return c.ConnRetry.Validate()
}
2 changes: 1 addition & 1 deletion internal/storage/db/postgres/postgres.go
Expand Up @@ -58,7 +58,7 @@ func NewStore(ctx context.Context, conf *Conf) (*Store, error) {
log.Info("Initializing Postgres storage", zap.String("host", pgConf.Host), zap.String("database", pgConf.Database))

connStr := stdlib.RegisterConnConfig(pgConf)
db, err := internal.ConnectWithRetries("pgx", connStr, internal.DBConnectionRetries)
db, err := internal.ConnectWithRetries("pgx", connStr, conf.ConnRetry)
if err != nil {
return nil, fmt.Errorf("failed to open database: %w", err)
}
Expand Down
2 changes: 1 addition & 1 deletion internal/storage/db/sqlite3/sqlite3.go
Expand Up @@ -88,7 +88,7 @@ func NewStore(ctx context.Context, conf *Conf) (*Store, error) {
log := logging.FromContext(ctx).Named("sqlite3")
log.Info("Initializing sqlite3 storage", zap.String("DSN", conf.DSN))

db, err := internal.ConnectWithRetries("sqlite", conf.DSN, internal.DBConnectionRetries)
db, err := internal.ConnectWithRetries("sqlite", conf.DSN, nil)
if err != nil {
return nil, fmt.Errorf("failed to open database: %w", err)
}
Expand Down
7 changes: 6 additions & 1 deletion internal/storage/db/sqlserver/conf.go
Expand Up @@ -13,7 +13,8 @@ const confKey = storage.ConfKey + ".sqlserver"
// Conf is required (if driver is set to 'sqlserver') configuration for mssql driver.
// +desc=This section is required only if storage.driver is sqlserver.
type Conf struct {
ConnPool *internal.ConnPoolConf `yaml:"connPool" conf:",example=\n maxLifeTime: 60m\n maxIdleTime: 45s\n maxOpen: 4\n maxIdle: 1"`
ConnPool *internal.ConnPoolConf `yaml:"connPool" conf:",example=\n maxLifeTime: 60m\n maxIdleTime: 45s\n maxOpen: 4\n maxIdle: 1"`
ConnRetry *internal.ConnRetryConf `yaml:"connRetry" conf:",example=\n maxAttempts: 3\n initialInterval: 0.5s\n maxInterval: 60s"`
// URL is the SQL Server connection URL. See https://github.com/microsoft/go-mssqldb#connection-parameters-and-dsn.
URL string `yaml:"url" conf:"required,example=\"sqlserver://username:password@host/instance?param1=value&param2=value\""`
// SkipSchemaCheck skips checking for required database tables on startup.
Expand All @@ -23,3 +24,7 @@ type Conf struct {
func (c *Conf) Key() string {
return confKey
}

func (c *Conf) Validate() error {
return c.ConnRetry.Validate()
}
2 changes: 1 addition & 1 deletion internal/storage/db/sqlserver/sqlserver.go
Expand Up @@ -56,7 +56,7 @@ func NewStore(ctx context.Context, conf *Conf) (*Store, error) {
log := logging.FromContext(ctx).Named("sqlserver")
log.Info("Initialising SQL Server storage")

db, err := internal.ConnectWithRetries(DriverName, conf.URL, internal.DBConnectionRetries)
db, err := internal.ConnectWithRetries(DriverName, conf.URL, conf.ConnRetry)
if err != nil {
return nil, fmt.Errorf("failed to open database: %w", err)
}
Expand Down

0 comments on commit ddcc341

Please sign in to comment.