Skip to content
This repository was archived by the owner on Apr 28, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,12 @@ workflows:
jobs:
lint:
docker:
- image: grafana/cortex-jsonnet-build-image:e19ece2
- image: grafana/cortex-jsonnet-build-image:3527936
steps:
- checkout
- run:
name: "Check white noise"
command: make check-white-noise
- run:
name: "Lint mixin"
command: make lint-mixin
Expand All @@ -23,7 +26,7 @@ jobs:

build:
docker:
- image: grafana/cortex-jsonnet-build-image:e19ece2
- image: grafana/cortex-jsonnet-build-image:3527936
steps:
- checkout
- run: make build-mixin
Expand All @@ -32,7 +35,7 @@ jobs:

test-readme:
docker:
- image: grafana/cortex-jsonnet-build-image:e19ece2
- image: grafana/cortex-jsonnet-build-image:3527936
steps:
- checkout
- run: make test-readme
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

## master / unreleased

* [ENHANCEMENT] Cortex-mixin: Include `cortex-gw-internal` naming variation in default `gateway` job names. #328
* [ENHANCEMENT] Cortex-mixin: Include `cortex-gw-internal` naming variation in default `gateway` job names. #328
* [CHANGE] `namespace` template variable in dashboards now only selects namespaces for selected clusters. #311
* [CHANGE] Alertmanager: mounted overrides configmap to alertmanager too. #315
* [CHANGE] Memcached: upgraded memcached from `1.5.17` to `1.6.9`. #316
Expand Down
12 changes: 12 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@

JSONNET_FMT := jsonnetfmt

# Support gsed/gfind on OSX (installed via brew), falling back to sed/find. On Linux
# systems gsed/gfind won't be installed, so will use sed/gfind as expected.
SED ?= $(shell which gsed 2>/dev/null || which sed)
FIND ?= $(shell which gfind 2>/dev/null || which find)

lint: lint-mixin lint-playbooks

lint-mixin: lint-mixin-with-mixtool lint-mixin-with-jsonnetfmt
Expand Down Expand Up @@ -50,3 +55,10 @@ test-readme:
cp -r ../cortex ./vendor/ && \
cp vendor/cortex/cortex-manifests.jsonnet.example environments/default/main.jsonnet && \
PAGER=cat tk show environments/default

clean-white-noise:
@$(FIND) . -type f -regextype posix-extended -regex '.*(md|libsonnet)' -print | \
SED_BIN="$(SED)" xargs ./scripts/cleanup-white-noise.sh

check-white-noise: clean-white-noise
@git diff --exit-code --quiet || (echo "Please remove trailing whitespaces running 'make clean-white-noise'" && false)
2 changes: 1 addition & 1 deletion build-image/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ FROM golang:1.15-alpine AS mixtool-builder
RUN GO111MODULE=on go get github.com/monitoring-mixins/mixtool/cmd/mixtool@ae18e31161ea10545b9c1ac0d23c10122f2c12b5

FROM alpine:3.13
RUN apk add --no-cache git make libgcc libstdc++ zip
RUN apk add --no-cache git make libgcc libstdc++ zip findutils sed
COPY --from=jsonnet-builder /usr/bin/jsonnetfmt /usr/bin
COPY --from=jsonnet-builder /usr/bin/jsonnet /usr/bin
COPY --from=jb-builder /usr/bin/jb /usr/bin
Expand Down
2 changes: 1 addition & 1 deletion cortex-mixin/alerts/alerts.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -599,7 +599,7 @@
container_memory_working_set_bytes{container="etcd"}
/
container_spec_memory_limit_bytes{container="etcd"}
) > 0.65
) > 0.65
|||,
'for': '15m',
labels: {
Expand Down
18 changes: 9 additions & 9 deletions cortex-mixin/dashboards/dashboard-utils.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -383,8 +383,8 @@ local utils = import 'mixin-utils/utils.libsonnet';
name="%(cacheName)s"
}[$__rate_interval]
)
)
/
)
/
sum(
rate(
thanos_cache_memcached_requests_total{
Expand All @@ -405,20 +405,20 @@ local utils = import 'mixin-utils/utils.libsonnet';
ignoring(%s) group_right() (
label_replace(
count by(
%s,
%s,
%s,
%s,
device
)
)
(
container_fs_writes_bytes_total{
%s,
container="%s",
device!~".*sda.*"
}
),
"device",
"$1",
"device",
),
"device",
"$1",
"device",
"/dev/(.*)"
) * 0
)
Expand Down
26 changes: 13 additions & 13 deletions cortex-mixin/dashboards/reads.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ local utils = import 'mixin-utils/utils.libsonnet';
Incoming queries travel from the gateway → query frontend → query scheduler → querier → ingester and/or store-gateway (depending on the time range of the query).
<br/>
For each service, there are 3 panels showing (1) requests per second to that service, (2) average, median, and p99 latency of requests to that service, and (3) p99 latency of requests to each instance of that service.
</p>
</p>
<p>
The dashboard also shows metrics for the 4 optional caches that can be deployed with Cortex:
the query results cache, the metadata cache, the chunks cache, and the index cache.
The dashboard also shows metrics for the 4 optional caches that can be deployed with Cortex:
the query results cache, the metadata cache, the chunks cache, and the index cache.
<br/>
These panels will show “no data” if the caches are not deployed.
These panels will show “no data” if the caches are not deployed.
</p>
<p>
Lastly, it also includes metrics for how the ingester and store-gateway interact with object storage.
Lastly, it also includes metrics for how the ingester and store-gateway interact with object storage.
</p>
|||),
)
Expand All @@ -45,7 +45,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
route=~"(prometheus|api_prom)_api_v1_query"
}[$__rate_interval]
)
) +
) +
sum(
rate(
cortex_prometheus_rule_evaluations_total{
Expand All @@ -61,7 +61,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
'Instant queries per second',
|||
Rate of instant queries per second being made to the system.
Includes both queries made to the <tt>/prometheus</tt> API as
Includes both queries made to the <tt>/prometheus</tt> API as
well as queries from the ruler.
|||
),
Expand All @@ -83,8 +83,8 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.panelDescription(
'Range queries per second',
|||
Rate of range queries per second being made to
Cortex via the <tt>/prometheus</tt> API.
Rate of range queries per second being made to
Cortex via the <tt>/prometheus</tt> API.
|||
),
)
Expand Down Expand Up @@ -135,7 +135,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
The query scheduler is an optional service that moves
the internal queue from the query-frontend into a
separate component.
If this service is not deployed,
If this service is not deployed,
these panels will show "No data."
</p>
|||
Expand Down Expand Up @@ -286,8 +286,8 @@ local utils = import 'mixin-utils/utils.libsonnet';
%s
}[$__rate_interval]
)
)
/
)
/
sum by(item_type) (
rate(
thanos_store_index_cache_requests_total{
Expand All @@ -307,7 +307,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
'Hit Ratio',
|||
Even if you do not set up memcached for the blocks index cache, you will still see data in this panel because Cortex by default has an
in-memory blocks index cache.
in-memory blocks index cache.
|||
),
)
Expand Down
20 changes: 10 additions & 10 deletions cortex-mixin/dashboards/writes.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,16 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.textPanel('', |||
<p>
This dashboard shows various health metrics for the Cortex write path.
It is broken into sections for each service on the write path,
It is broken into sections for each service on the write path,
and organized by the order in which the write request flows.
<br/>
Incoming metrics data travels from the gateway → distributor → ingester.
<br/>
For each service, there are 3 panels showing
(1) requests per second to that service,
(2) average, median, and p99 latency of requests to that service, and
(1) requests per second to that service,
(2) average, median, and p99 latency of requests to that service, and
(3) p99 latency of requests to each instance of that service.
</p>
</p>
<p>
It also includes metrics for the key-value (KV) stores used to manage
the high-availability tracker and the ingesters.
Expand Down Expand Up @@ -216,7 +216,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.panelDescription(
'Uploaded blocks / sec',
|||
The rate of blocks being uploaded from the ingesters
The rate of blocks being uploaded from the ingesters
to object storage.
|||
),
Expand All @@ -227,7 +227,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.panelDescription(
'Upload latency',
|||
The average, median (50th percentile), and 99th percentile time
The average, median (50th percentile), and 99th percentile time
the ingesters take to upload blocks to object storage.
|||
),
Expand All @@ -247,7 +247,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
|||
Ingesters maintain a local TSDB per-tenant on disk. Each TSDB maintains a head block for each
active time series; these blocks get periodically compacted (by default, every 2h).
This panel shows the rate of compaction operations across all TSDBs on all ingesters.
This panel shows the rate of compaction operations across all TSDBs on all ingesters.
|||
),
)
Expand Down Expand Up @@ -275,7 +275,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.panelDescription(
'WAL truncations per second',
|||
The WAL is truncated each time a new TSDB block is written. This panel measures the rate of
The WAL is truncated each time a new TSDB block is written. This panel measures the rate of
truncations.
|||
),
Expand All @@ -289,7 +289,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.panelDescription(
'Checkpoints created per second',
|||
Checkpoints are created as part of the WAL truncation process.
Checkpoints are created as part of the WAL truncation process.
This metric measures the rate of checkpoint creation.
|||
),
Expand All @@ -301,7 +301,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.panelDescription(
'WAL truncations latency (including checkpointing)',
|||
Average time taken to perform a full WAL truncation,
Average time taken to perform a full WAL truncation,
including the time taken for the checkpointing to complete.
|||
),
Expand Down
6 changes: 3 additions & 3 deletions cortex-mixin/docs/playbooks.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ In events you're looking for things like:
```
57m Normal NodeControllerEviction Pod Marking for deletion Pod ingester-01 from Node cloud-provider-node-01
37m Normal SuccessfulDelete ReplicaSet (combined from similar events): Deleted pod: ingester-01
32m Normal NodeNotReady Node Node cloud-provider-node-01 status is now: NodeNotReady
32m Normal NodeNotReady Node Node cloud-provider-node-01 status is now: NodeNotReady
28m Normal DeletingAllPods Node Node cloud-provider-node-01 event: Deleting all Pods from Node cloud-provider-node-01.
```

Expand Down Expand Up @@ -313,7 +313,7 @@ gsutil mv gs://BUCKET/TENANT/BLOCK gs://BUCKET/TENANT/corrupted-BLOCK

### CortexBucketIndexNotUpdated

This alert fires when the bucket index, for a given tenant, is not updated since a long time. The bucket index is expected to be periodically updated by the compactor and is used by queriers and store-gateways to get an almost-updated view over the bucket store.
This alert fires when the bucket index, for a given tenant, is not updated since a long time. The bucket index is expected to be periodically updated by the compactor and is used by queriers and store-gateways to get an almost-updated view over the bucket store.

How to **investigate**:
- Ensure the compactor is successfully running
Expand Down Expand Up @@ -557,7 +557,7 @@ metadata:
spec:
accessModes:
- ReadWriteOnce
capacity:
capacity:
storage: 150Gi
gcePersistentDisk:
fsType: ext4
Expand Down
2 changes: 1 addition & 1 deletion cortex-mixin/groups.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
if alert_aggregation_labels_override != null
then std.trace(
|||
Deprecated: _config.alert_aggregation_labels
Deprecated: _config.alert_aggregation_labels
This field has been explicitly overridden to "%s".
Instead, express the override in terms of _config.cluster_labels.
E.g., cluster_labels: %s will automatically convert to "%s".
Expand Down
4 changes: 4 additions & 0 deletions scripts/cleanup-white-noise.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/sh
SED_BIN=${SED_BIN:-sed}

${SED_BIN} -i 's/[ \t]*$//' "$@"