Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CBG-3925: Add FileLogger RotationInterval #6835

Merged
merged 19 commits into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions base/logger_console.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import (
"strconv"
"sync"

"github.com/natefinch/lumberjack"
"gopkg.in/natefinch/lumberjack.v2"
)

// ConsoleLogger is a file logger with a default output of stderr, and tunable log level/keys.
Expand Down Expand Up @@ -197,7 +197,7 @@ func (lcc *ConsoleLoggerConfig) init() error {
Filename: filepath.FromSlash(lcc.FileOutput),
MaxSize: *lcc.Rotation.MaxSize,
MaxAge: *lcc.Rotation.MaxAge,
Compress: false,
Compress: BoolDefault(lcc.Rotation.Compress, false),
}
}

Expand Down
79 changes: 60 additions & 19 deletions base/logger_file.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,25 +22,27 @@ import (
"sync"
"time"

"github.com/natefinch/lumberjack"
"github.com/pkg/errors"
"gopkg.in/natefinch/lumberjack.v2"
)

var (
ErrInvalidLogFilePath = errors.New("invalid log file path")
ErrUnwritableLogFilePath = errors.New("cannot write to log file path directory")

maxAgeLimit = 9999 // days
defaultMaxSize = 100 // 100 MB
defaultMaxAgeMultiplier = 2 // e.g. 90 minimum == 180 default maxAge
defaultCumulativeMaxSizeBeforeDeletion = 1024 // 1 GB
minRotatedLogsSizeLimit = 10 // 10 MB
rotatedLogsLowWatermarkMultiplier = 0.8 // eg. 100 minRotatedLogsSizeLimit == 80 low watermark
maxAgeLimit = 9999 // days
defaultMaxSize = 100 // 100 MB
defaultMaxAgeMultiplier = 2 // e.g. 90 minimum == 180 default maxAge
defaultCumulativeMaxSizeBeforeDeletion = 1024 // 1 GB
minRotatedLogsSizeLimit = 10 // 10 MB
rotatedLogsLowWatermarkMultiplier = 0.8 // eg. 100 minRotatedLogsSizeLimit == 80 low watermark
minLogRotationInterval = 15 * time.Minute // 15 minutes
maxLogRotationInterval = 7 * 24 * time.Hour // 7 days

logFilePrefix = "sg_"

belowMinValueFmt = "%s for %v was set to %d which is below the minimum of %d"
aboveMaxValueFmt = "%s for %v was set to %d which is above the maximum of %d"
belowMinValueFmt = "%s for %v was set to %v which is below the minimum of %v"
aboveMaxValueFmt = "%s for %v was set to %v which is above the maximum of %v"
)

type FileLogger struct {
Expand Down Expand Up @@ -69,10 +71,12 @@ type FileLoggerConfig struct {
}

type logRotationConfig struct {
MaxSize *int `json:"max_size,omitempty"` // The maximum size in MB of the log file before it gets rotated.
MaxAge *int `json:"max_age,omitempty"` // The maximum number of days to retain old log files.
LocalTime *bool `json:"localtime,omitempty"` // If true, it uses the computer's local time to format the backup timestamp.
RotatedLogsSizeLimit *int `json:"rotated_logs_size_limit,omitempty"` // Max Size of log files before deletion
MaxSize *int `json:"max_size,omitempty"` // The maximum size in MB of the log file before it gets rotated.
MaxAge *int `json:"max_age,omitempty"` // The maximum number of days to retain old log files.
LocalTime *bool `json:"localtime,omitempty"` // If true, it uses the computer's local time to format the backup timestamp.
RotatedLogsSizeLimit *int `json:"rotated_logs_size_limit,omitempty"` // Max Size of log files before deletion
RotationInterval *ConfigDuration `json:"rotation_interval,omitempty"` // Interval at which logs are rotated
Compress *bool `json:"-"` // Enable log compression - not exposed in config
}

// NewFileLogger returns a new FileLogger from a config.
Expand Down Expand Up @@ -139,6 +143,13 @@ func (l *FileLogger) Rotate() error {
return errors.New("can't rotate non-lumberjack log output")
}

func (l *FileLogger) Close() error {
if c, ok := l.output.(io.Closer); ok {
return c.Close()
}
return nil
}

func (l *FileLogger) String() string {
return "FileLogger(" + l.level.String() + ")"
}
Expand Down Expand Up @@ -188,12 +199,15 @@ func (lfc *FileLoggerConfig) init(ctx context.Context, level LogLevel, name stri
return err
}

var rotateableLogger *lumberjack.Logger
if lfc.Output == nil {
lfc.Output = newLumberjackOutput(
rotateableLogger = newLumberjackOutput(
filepath.Join(filepath.FromSlash(logFilePath), logFilePrefix+name+".log"),
*lfc.Rotation.MaxSize,
*lfc.Rotation.MaxAge,
BoolDefault(lfc.Rotation.Compress, true),
)
lfc.Output = rotateableLogger
}

if lfc.CollationBufferSize == nil {
Expand All @@ -205,20 +219,39 @@ func (lfc *FileLoggerConfig) init(ctx context.Context, level LogLevel, name stri
lfc.CollationBufferSize = &bufferSize
}

ticker := time.NewTicker(time.Hour)
var rotationTicker *time.Ticker
var rotationTickerCh <-chan time.Time
if i := lfc.Rotation.RotationInterval.Value(); i > 0 && rotateableLogger != nil {
rotationTicker = time.NewTicker(i)
rotationTickerCh = rotationTicker.C
}

logDeletionTicker := time.NewTicker(rotatedLogDeletionInterval)
go func() {
defer func() {
if panicked := recover(); panicked != nil {
WarnfCtx(ctx, "Panic when deleting rotated log files: \n %s", panicked, debug.Stack())
WarnfCtx(ctx, "Panic when rotating or deleting rotated log files: %s\n%s", panicked, debug.Stack())
}
}()
if rotationTicker != nil {
defer rotationTicker.Stop()
}
defer logDeletionTicker.Stop()
for {
select {
case <-ticker.C:
case <-ctx.Done():
return
case <-logDeletionTicker.C:
err := runLogDeletion(ctx, logFilePath, level.String(), int(float64(*lfc.Rotation.RotatedLogsSizeLimit)*rotatedLogsLowWatermarkMultiplier), *lfc.Rotation.RotatedLogsSizeLimit)
if err != nil {
WarnfCtx(ctx, "%s", err)
}
case <-rotationTickerCh:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I think this reads just a bit easier to see rotationTicker.C since I had to look a back to see if this was a normal ticker.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I ended up writing it this way because rotationTicker can be nil if you haven't enabled a rotation interval. The select panics trying to access C on a nil ticker.

You can however, safely select over a nil channel, and this avoids having to write a separate conditional select if you have log rotation enabled.

DebugfCtx(ctx, KeyAll, "Rotating log file %s based on interval %q", name, lfc.Rotation.RotationInterval.Value())
err := rotateableLogger.Rotate()
if err != nil {
WarnfCtx(ctx, "Error rotating log file: %v", err)
}
}
}
}()
Expand Down Expand Up @@ -252,15 +285,23 @@ func (lfc *FileLoggerConfig) initRotationConfig(name string, defaultMaxSize, min
return fmt.Errorf(belowMinValueFmt, "RotatedLogsSizeLimit", name, *lfc.Rotation.RotatedLogsSizeLimit, minRotatedLogsSizeLimit)
}

if i := lfc.Rotation.RotationInterval; i != nil {
if i.Value() < minLogRotationInterval {
return fmt.Errorf(belowMinValueFmt, "RotationInterval", name, i.Value().String(), minLogRotationInterval.String())
} else if i.Value() > maxLogRotationInterval {
return fmt.Errorf(aboveMaxValueFmt, "RotationInterval", name, i.Value().String(), maxLogRotationInterval.String())
}
}

return nil
}

func newLumberjackOutput(filename string, maxSize, maxAge int) *lumberjack.Logger {
func newLumberjackOutput(filename string, maxSize, maxAge int, compress bool) *lumberjack.Logger {
return &lumberjack.Logger{
Filename: filename,
MaxSize: maxSize,
MaxAge: maxAge,
Compress: true,
Compress: compress,
}
}

Expand Down
2 changes: 2 additions & 0 deletions base/logging_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ const (
// we flush to the output if we don't fill the buffer.
consoleLoggerCollateFlushTimeout = 1 * time.Millisecond
fileLoggerCollateFlushTimeout = 10 * time.Millisecond

rotatedLogDeletionInterval = time.Hour // not configurable
)

// ErrUnsetLogFilePath is returned when no log_file_path, or --defaultLogFilePath fallback can be used.
Expand Down
69 changes: 68 additions & 1 deletion base/logging_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,17 @@ package base

import (
"bytes"
"context"
"fmt"
"os"
"path/filepath"
"runtime"
"testing"
"time"

"github.com/natefinch/lumberjack"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"gopkg.in/natefinch/lumberjack.v2"
)

func TestRedactedLogFuncs(t *testing.T) {
Expand Down Expand Up @@ -57,6 +59,71 @@ func Benchmark_LoggingPerformance(b *testing.B) {
}
}

func TestLogRotationInterval(t *testing.T) {
LongRunningTest(t)

// override min rotation interval for testing
originalMinRotationInterval := minLogRotationInterval
minLogRotationInterval = time.Millisecond * 10
defer func() { minLogRotationInterval = originalMinRotationInterval }()

rotationInterval := time.Millisecond * 100
config := &FileLoggerConfig{
Enabled: BoolPtr(true),
CollationBufferSize: IntPtr(0),
Rotation: logRotationConfig{
RotationInterval: NewConfigDuration(rotationInterval),
Compress: BoolPtr(false),
},
}

// On Windows, cleanup of t.TempDir() fails due to open log file handle from Lumberjack. Cannot be fixed from SG.
// https://github.com/natefinch/lumberjack/issues/185
var logPath string
if runtime.GOOS == "windows" {
var err error
logPath, err = os.MkdirTemp("", t.Name())
require.NoError(t, err)
t.Cleanup(func() {
if err := os.RemoveAll(logPath); err != nil {
// log instead of error because it's likely this is going to fail on Windows for this test.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// log instead of error because it's likely this is going to fail on Windows for this test.
// log instead of error because it's likely this is going to fail on Windows for this test if lumberjack has not finished rotation. The `time.Sleep` at the end of the tests tries to mitigate this.

I wonder if the windows build machines are set up to clear temporary files, this could end up filing up the disks on the machine. We could consider writing to cwd if the builds do a fresh checkpoint of sync_gateway or do a git clean -fdx

Any reason here to use os.MkdirTemp instead of t.TempDir ?

Copy link
Member Author

@bbrks bbrks Jun 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

t.TempDir fails because it asserts there was no error from RemoveAll. I originally wrote it like that.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I battled with this for a day and ended up on this solution. The sleep doesn't prevent this error, because Lumberjack keeps the current file handle open for some reason, even after calling Close()

On *nix this doesn't matter because the OS will still let us remove the file/directory.

t.Logf("couldn't remove temp dir: %v", err)
}
})
} else {
logPath = t.TempDir()
}

countBefore := numFilesInDir(t, logPath, false)
t.Logf("countBefore: %d", countBefore)

ctx, ctxCancel := context.WithCancel(TestCtx(t))
defer ctxCancel()

fl, err := NewFileLogger(ctx, config, LevelTrace, "test", logPath, 0, nil)
require.NoError(t, err)
defer func() { require.NoError(t, fl.Close()) }()

fl.logf("test 1")
countAfter1 := numFilesInDir(t, logPath, false)
t.Logf("countAfter1: %d", countAfter1)
assert.Greater(t, countAfter1, countBefore)

time.Sleep(rotationInterval * 5)
countAfterSleep := numFilesInDir(t, logPath, false)
t.Logf("countAfterSleep: %d", countAfterSleep)
assert.Greater(t, countAfterSleep, countAfter1)

fl.logf("test 2")
countAfter2 := numFilesInDir(t, logPath, false)
t.Logf("countAfter2: %d", countAfter2)
assert.GreaterOrEqual(t, countAfter2, countAfterSleep)

// Wait for Lumberjack to finish its async log compression work
// we have no way of waiting for this to finish, or even stopping the millRun() process inside Lumberjack.
time.Sleep(time.Millisecond * 500)
}

// Benchmark the time it takes to write x bytes of data to a logger, and optionally rotate and compress it.
func BenchmarkLogRotation(b *testing.B) {

Expand Down
19 changes: 19 additions & 0 deletions base/util_testing.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"errors"
"fmt"
"io"
"io/fs"
"log"
"math/rand"
"os"
Expand Down Expand Up @@ -935,3 +936,21 @@ func MustJSONMarshal(t testing.TB, v interface{}) []byte {
require.NoError(t, err)
return b
}

// numFilesInDir counts the number of files in a given directory
func numFilesInDir(t *testing.T, dir string, recursive bool) int {
numFiles := 0
err := filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error {
if d.Name() == filepath.Base(dir) {
// skip counting the root directory
return nil
}
if !recursive && d.IsDir() {
return fs.SkipDir
}
numFiles++
return nil
})
require.NoError(t, err)
return numFiles
}
7 changes: 7 additions & 0 deletions docs/api/components/schemas.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2317,6 +2317,13 @@ Log-rotation-config-readonly:
description: Max Size (in mb) of log files before deletion
type: integer
default: 1024
rotation_interval:
description: |-
If set, the interval at which log files are rotated, even if max_size is not reached.

This is a duration and therefore can be provided with units "h", "m", "s", "ms", "us", and "ns". For example, 5 hours, 20 minutes, and 30 seconds would be `5h20m30s`.
type: string
default: 0
readOnly: true
title: Log-rotation-config
Console-logging-config:
Expand Down
4 changes: 1 addition & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ require (
github.com/gorilla/mux v1.8.1
github.com/json-iterator/go v1.1.12
github.com/kardianos/service v1.2.2
github.com/natefinch/lumberjack v2.0.0+incompatible
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.16.0
github.com/prometheus/client_model v0.3.0
Expand All @@ -37,11 +36,11 @@ require (
golang.org/x/exp v0.0.0-20230905200255-921286631fa9
golang.org/x/net v0.25.0
golang.org/x/oauth2 v0.20.0
gopkg.in/natefinch/lumberjack.v2 v2.2.1
gopkg.in/square/go-jose.v2 v2.6.0
)

require (
github.com/BurntSushi/toml v1.3.2 // indirect
github.com/aws/aws-sdk-go v1.44.299 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
Expand Down Expand Up @@ -89,7 +88,6 @@ require (
google.golang.org/genproto/googleapis/rpc v0.0.0-20240401170217-c3f982113cda // indirect
google.golang.org/grpc v1.63.2 // indirect
google.golang.org/protobuf v1.33.0 // indirect
gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect
gopkg.in/sourcemap.v1 v1.0.5 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
nhooyr.io/websocket v1.8.10 // indirect
Expand Down
6 changes: 0 additions & 6 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@ github.com/Azure/azure-sdk-for-go/sdk/internal v1.3.0/go.mod h1:okt5dMMTOFjX/aov
github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.0.0 h1:u/LLAOFgsMv7HmNL4Qufg58y+qElGOt5qv0z1mURkRY=
github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.0.0/go.mod h1:2e8rMJtl2+2j+HXbTBwnyGpm5Nou7KhvSfxOq8JpTag=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8=
github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/aws/aws-sdk-go v1.44.299 h1:HVD9lU4CAFHGxleMJp95FV/sRhtg7P4miHD1v88JAQk=
github.com/aws/aws-sdk-go v1.44.299/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI=
github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=
Expand Down Expand Up @@ -154,8 +152,6 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/natefinch/lumberjack v2.0.0+incompatible h1:4QJd3OLAMgj7ph+yZTuX13Ld4UpgHp07nNdFX7mqFfM=
github.com/natefinch/lumberjack v2.0.0+incompatible/go.mod h1:Wi9p2TTF5DG5oU+6YfsmYQpsTIOm0B1VNzQg9Mw6nPk=
github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde/go.mod h1:nZgzbfBr3hhjoZnS66nKrHmduYNpc34ny7RK4z5/HM0=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
Expand Down Expand Up @@ -353,8 +349,6 @@ gopkg.in/square/go-jose.v2 v2.6.0/go.mod h1:M9dMgbHiYLoDGQrXy7OpJDJWiKiU//h+vD76
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
Expand Down
Loading
Loading