Skip to content

Commit

Permalink
fix: grafonnet-lib -based Postgres query charts
Browse files Browse the repository at this point in the history
  • Loading branch information
chris13524 committed May 20, 2024
1 parent b9a8429 commit ed8043d
Show file tree
Hide file tree
Showing 9 changed files with 162 additions and 9 deletions.
1 change: 1 addition & 0 deletions .github/workflows/cd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ jobs:
TF_VAR_jwt_secret: ${{ secrets.PROD_JWT_SECRET }}
TF_VAR_image_version: ${{ inputs.image_tag }}
TF_VAR_relay_public_key: ${{ secrets.RELAY_PUBLIC_KEY }}
TF_VAR_notification_channels: NNOynGwVz
with:
environment: "prod"

Expand Down
1 change: 1 addition & 0 deletions terraform/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ module "monitoring" {
prometheus_workspace_id = aws_prometheus_workspace.prometheus.id
load_balancer_arn = module.ecs.load_balancer_arn
environment = local.environment
notification_channels = var.notification_channels
}

data "aws_ecr_repository" "repository" {
Expand Down
55 changes: 55 additions & 0 deletions terraform/monitoring/dashboard.jsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
local grafana = import 'grafonnet-lib/grafana.libsonnet';
local panels = import 'panels/panels.libsonnet';

local dashboard = grafana.dashboard;
local row = grafana.row;
local annotation = grafana.annotation;
local layout = grafana.layout;

local ds = {
prometheus: {
type: 'prometheus',
uid: std.extVar('prometheus_uid'),
},
cloudwatch: {
type: 'cloudwatch',
uid: std.extVar('cloudwatch_uid'),
},
};
local vars = {
namespace: 'Push',
environment: std.extVar('environment'),
notifications: std.parseJson(std.extVar('notifications')),
};

////////////////////////////////////////////////////////////////////////////////

local height = 8;
local pos = grafana.layout.pos(height);

////////////////////////////////////////////////////////////////////////////////

dashboard.new(
title = std.extVar('dashboard_title'),
uid = std.extVar('dashboard_uid'),
editable = true,
graphTooltip = dashboard.graphTooltips.sharedCrosshair,
timezone = dashboard.timezones.utc,
)
.addAnnotation(
annotation.new(
target = {
limit: 100,
matchAny: false,
tags: [],
type: 'dashboard',
},
)
)

.addPanels(layout.generate_grid([
//////////////////////////////////////////////////////////////////////////////
row.new('Application'),
panels.app.postgres_query_rate(ds, vars) { gridPos: pos._6 },
panels.app.postgres_query_latency(ds, vars) { gridPos: pos._6 },
]))
34 changes: 25 additions & 9 deletions terraform/monitoring/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,6 @@ locals {
# the Grafana provider e.g.
# net/prod-relay-load-balancer/e9a51c46020a0f85
load_balancer = join("/", slice(split("/", var.load_balancer_arn), 1, 4))
opsgenie_notification_channel = "NNOynGwVz"
notifications = (
var.environment == "prod" ?
[{ uid = local.opsgenie_notification_channel }] :
[]
)
}

resource "grafana_data_source" "prometheus" {
Expand All @@ -34,7 +28,29 @@ resource "grafana_data_source" "cloudwatch" {
})
}

data "jsonnet_file" "dashboard" {
source = "${path.module}/dashboard.jsonnet"

ext_str = {
dashboard_title = "Push Server - ${title(module.this.stage)}"
dashboard_uid = "push-${module.this.stage}"

prometheus_uid = grafana_data_source.prometheus.uid
cloudwatch_uid = grafana_data_source.cloudwatch.uid

environment = module.this.stage
notifications = jsonencode(var.notification_channels)
}
}

resource "grafana_dashboard" "at_a_glance" {
overwrite = true
message = "Updated by Terraform"
config_json = data.jsonnet_file.dashboard.rendered
}


resource "grafana_dashboard" "at_a_glance_old" {
overwrite = true
message = "Updated by Terraform"
config_json = jsonencode({
Expand Down Expand Up @@ -533,7 +549,7 @@ resource "grafana_dashboard" "at_a_glance" {
"name" : "${var.environment} Echo Server 5XX alert",
"noDataState" : "no_data",
"message" : "Echo server - Prod - 5XX error",
"notifications" : local.notifications
"notifications" : var.notification_channels
},
"datasource" : {
"type" : "cloudwatch",
Expand Down Expand Up @@ -804,8 +820,8 @@ resource "grafana_dashboard" "at_a_glance" {
},
"timepicker" : {},
"timezone" : "",
"title" : var.app_name,
"uid" : var.app_name,
"title" : "${var.app_name} - old",
"uid" : "${var.app_name}-old",
"version" : 13,
"weekStart" : ""
})
Expand Down
25 changes: 25 additions & 0 deletions terraform/monitoring/panels/app/postgres_query_latency.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
local grafana = import '../../grafonnet-lib/grafana.libsonnet';
local defaults = import '../../grafonnet-lib/defaults.libsonnet';

local panels = grafana.panels;
local targets = grafana.targets;

{
new(ds, vars)::
panels.timeseries(
title = 'Postgres Query Latency',
datasource = ds.prometheus,
)
.configure(
defaults.configuration.timeseries
.withUnit('ms')
)

.addTarget(targets.prometheus(
datasource = ds.prometheus,
expr = 'sum by (aws_ecs_task_revision, name) (rate(postgres_query_latency_sum[$__rate_interval])) / sum by (aws_ecs_task_revision, name) (rate(postgres_query_latency_count[$__rate_interval]))',
legendFormat = '{{name}} r{{aws_ecs_task_revision}}',
exemplar = false,
refId = 'PostgresQueryLatency',
))
}
33 changes: 33 additions & 0 deletions terraform/monitoring/panels/app/postgres_query_rate.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
local grafana = import '../../grafonnet-lib/grafana.libsonnet';
local defaults = import '../../grafonnet-lib/defaults.libsonnet';

local panels = grafana.panels;
local targets = grafana.targets;

{
new(ds, vars)::
panels.timeseries(
title = 'Postgres Query Rate',
datasource = ds.prometheus,
)
.configure(
defaults.configuration.timeseries
.withUnit('cps')
)

.addTarget(targets.prometheus(
datasource = ds.prometheus,
expr = 'sum by (aws_ecs_task_revision, name) (rate(postgres_queries_total[$__rate_interval]))',
legendFormat = '{{name}} r{{aws_ecs_task_revision}}',
exemplar = true,
refId = 'PostgresQueryRate',
))

.addTarget(targets.prometheus(
datasource = ds.prometheus,
expr = 'sum(rate(postgres_queries_total[$__rate_interval]))',
legendFormat = 'r{{aws_ecs_task_revision}}',
exemplar = true,
refId = 'PostgresQueryRateTotal',
))
}
8 changes: 8 additions & 0 deletions terraform/monitoring/panels/panels.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
local panels = (import '../grafonnet-lib/defaults.libsonnet').panels;

{
app: {
postgres_query_rate: (import 'app/postgres_query_rate.libsonnet' ).new,
postgres_query_latency: (import 'app/postgres_query_latency.libsonnet' ).new,
},
}
5 changes: 5 additions & 0 deletions terraform/monitoring/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,8 @@ variable "prometheus_workspace_id" {
variable "load_balancer_arn" {
type = string
}

variable "notification_channels" {
description = "The notification channels to send alerts to"
type = list(any)
}
9 changes: 9 additions & 0 deletions terraform/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,12 @@ variable "relay_public_key" {
type = string
sensitive = true
}

#-------------------------------------------------------------------------------
# Alerting / Monitoring

variable "notification_channels" {
description = "The notification channels to send alerts to"
type = list(any)
default = []
}

0 comments on commit ed8043d

Please sign in to comment.