Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Purge old Rendered Templates #2167

Merged
merged 11 commits into from Oct 18, 2017
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions cmd/bosun/conf/conf.go
Expand Up @@ -70,6 +70,8 @@ type SystemConfProvider interface {

GetAuthConf() *AuthConf

GetMaxRenderedTemplateAge() int

// Contexts
GetTSDBContext() opentsdb.Context
GetGraphiteContext() graphite.Context
Expand Down
8 changes: 8 additions & 0 deletions cmd/bosun/conf/system.go
Expand Up @@ -52,6 +52,8 @@ type SystemConf struct {

AuthConf *AuthConf

MaxRenderedTemplateAge int // in days

EnableSave bool
EnableReload bool
CommandHookPath string
Expand Down Expand Up @@ -427,6 +429,12 @@ func (sc *SystemConf) GetInternetProxy() string {
return sc.InternetProxy
}

// MaxRenderedTemplateAge returns the maximum time in days to keep rendered templates
// after the incident end date.
func (sc *SystemConf) GetMaxRenderedTemplateAge() int {
return sc.MaxRenderedTemplateAge
}

// SaveEnabled returns if saving via the UI and config editing API endpoints should be enabled
func (sc *SystemConf) SaveEnabled() bool {
return sc.EnableSave
Expand Down
1 change: 0 additions & 1 deletion cmd/bosun/database/search_data.go
Expand Up @@ -161,7 +161,6 @@ func (d *dataAccess) GetMetricTagSets(metric string, tags opentsdb.TagSet) (map[
result[mts] = t
}
}

if cursor == "" || cursor == "0" {
break
}
Expand Down
128 changes: 126 additions & 2 deletions cmd/bosun/database/state_data.go
Expand Up @@ -67,6 +67,9 @@ type StateDataAccess interface {

SetRenderedTemplates(incidentId int64, rt *models.RenderedTemplates) error
GetRenderedTemplates(incidentId int64) (*models.RenderedTemplates, error)
GetRenderedTemplateKeys() ([]string, error)
CleanupOldRenderedTemplates(olderThan time.Duration)
DeleteRenderedTemplates(incidentIds []int64) error

Forget(ak models.AlertKey) error
SetUnevaluated(ak models.AlertKey, uneval bool) error
Expand All @@ -93,16 +96,80 @@ func (d *dataAccess) GetRenderedTemplates(incidentId int64) (*models.RenderedTem
defer conn.Close()

b, err := redis.Bytes(conn.Do("GET", renderedTemplatesKey(incidentId)))
renderedT := &models.RenderedTemplates{}
if err != nil {
if err == redis.ErrNil {
return renderedT, nil
}
return nil, slog.Wrap(err)
}
renderedT := &models.RenderedTemplates{}
if err = json.Unmarshal(b, renderedT); err != nil {
return nil, slog.Wrap(err)
}
return renderedT, nil
}

func (d *dataAccess) GetRenderedTemplateKeys() ([]string, error) {
conn := d.Get()
defer conn.Close()

//ledis uses XSCAN cursor "KV" MATCH foo
//redis uses SCAN cursor MATCH foo
cmd := "SCAN"
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe Redis and Ledis SCAN could be a function outside of this that can be reused (and make this func shorter)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, we did that with some others, but the re-ordering of the args makes it tricky. I will factor out.

args := []interface{}{"0", "MATCH", "renderedTemplatesById:*"}
cursorIdx := 0
if !d.isRedis {
cmd = "XSCAN"
args = append([]interface{}{"KV"}, args...)
cursorIdx = 1
}
found := []string{}
for {
vals, err := redis.Values(conn.Do(cmd, args...))
if err != nil {
return nil, slog.Wrap(err)
}
cursor, err := redis.String(vals[0], nil)
if err != nil {
return nil, slog.Wrap(err)
}
args[cursorIdx] = cursor
keys, err := redis.Strings(vals[1], nil)
if err != nil {
return nil, slog.Wrap(err)
}
found = append(found, keys...)
if cursor == "" || cursor == "0" {
break
}
}
return found, nil
}

func (d *dataAccess) DeleteRenderedTemplates(incidentIds []int64) error {
conn := d.Get()
defer conn.Close()
const batchSize = 1000
args := make([]interface{}, 0, batchSize)
for len(incidentIds) > 0 {
size := len(incidentIds)
if size > batchSize {
size = batchSize
}
thisBatch := incidentIds[:size]
incidentIds = incidentIds[size:]
args = args[:0]
for _, id := range thisBatch {
args = append(args, renderedTemplatesKey(id))
}
_, err := conn.Do("DEL", args...)
if err != nil {
return slog.Wrap(err)
}
}
return nil
}

func (d *dataAccess) State() StateDataAccess {
return d
}
Expand Down Expand Up @@ -380,10 +447,12 @@ func (d *dataAccess) Forget(ak models.AlertKey) error {
return slog.Wrap(err)
}
for _, id := range ids {

if _, err = conn.Do("DEL", incidentStateKey(id)); err != nil {
return slog.Wrap(err)
}
if _, err = conn.Do("DEL", renderedTemplatesKey(id)); err != nil {
return slog.Wrap(err)
}
}
if _, err := conn.Do(d.LCLEAR(), incidentsForAlertKeyKey(ak)); err != nil {
return slog.Wrap(err)
Expand Down Expand Up @@ -447,3 +516,58 @@ func (d *dataAccess) transact(conn redis.Conn, f func() error) error {
}
return nil
}

// CleanupCleanupOldRenderedTemplates will in a loop purge any old rendered templates
func (d *dataAccess) CleanupOldRenderedTemplates(olderThan time.Duration) {
// run after 5 minutes (to let bosun stabilize)
// and then every hour
time.Sleep(time.Minute * 5)
for {
conn := d.Get()
slog.Infof("Cleaning out old rendered templates")
earliestOk := time.Now().UTC().Add(-1 * olderThan)
func() {
toPurge := []int64{}
keys, err := d.GetRenderedTemplateKeys()
if err != nil {
slog.Error(err)
return
}
for _, key := range keys {
parts := strings.Split(key, ":")
if len(parts) != 2 {
slog.Errorf("Invalid rendered template redis key found: %s", key)
continue
}
id, err := strconv.ParseInt(parts[1], 10, 64)
if err != nil {
slog.Error(err)
continue
}
state, err := d.getIncident(id, conn)
if err != nil {
if strings.Contains(err.Error(), "nil returned") {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

strings.Contains on error, combined with the match not being a constant doesn't seem safe in the long run.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, the problem there is that the error will already be wrapped at that point, and the inner error is not accessible. I guess I can also make a package function for that test though.

toPurge = append(toPurge, id)
continue
}
slog.Error(err)
continue
}
if state.End != nil && (*state.End).Before(earliestOk) {
toPurge = append(toPurge, id)
}
}
if len(toPurge) == 0 {
return
}
slog.Infof("Deleting %d old rendered templates", len(toPurge))
if err = d.DeleteRenderedTemplates(toPurge); err != nil {
slog.Error(err)
return
}
}()
conn.Close()
slog.Info("Done cleaning rendered templates")
time.Sleep(time.Hour)
}
}
3 changes: 3 additions & 0 deletions cmd/bosun/main.go
Expand Up @@ -141,6 +141,9 @@ func main() {
if err != nil {
slog.Fatal(err)
}
if sysProvider.GetMaxRenderedTemplateAge() != 0 {
go da.State().CleanupOldRenderedTemplates(time.Hour * 24 * time.Duration(sysProvider.GetMaxRenderedTemplateAge()))
}
var annotateBackend backend.Backend
if sysProvider.AnnotateEnabled() {
index := sysProvider.GetAnnotateIndex()
Expand Down
139 changes: 70 additions & 69 deletions cmd/bosun/web/static.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions cmd/bosun/web/static/js/bosun.js
Expand Up @@ -3281,7 +3281,7 @@ bosunControllers.controller('IncidentCtrl', ['$scope', '$http', '$location', '$r
}
v.error = null;
v.doneLoading = false;
if (i == $scope.lastNonUnknownAbnormalIdx) {
if (i == $scope.lastNonUnknownAbnormalIdx && $scope.body) {
v.subject = $scope.incident.Subject;
v.body = $scope.body;
v.doneLoading = true;
Expand Down Expand Up @@ -3317,7 +3317,7 @@ bosunControllers.controller('IncidentCtrl', ['$scope', '$http', '$location', '$r
$scope.configLink = configUrl($scope.incident.AlertKey, moment.unix($scope.incident.LastAbnormalTime));
for (var i = 0; i < $scope.events.length; i++) {
var e = $scope.events[i];
if (e.Status != 'normal' && e.Status != 'unknown') {
if (e.Status != 'normal' && e.Status != 'unknown' && $scope.body) {
$scope.lastNonUnknownAbnormalIdx = i;
$scope.collapse(i, e); // Expand the panel of the current body
break;
Expand Down