Skip to content

Commit

Permalink
[!] add metrics for PostgreSQL 16 (#669)
Browse files Browse the repository at this point in the history
  • Loading branch information
kmoppel-cognite committed Aug 4, 2023
1 parent f3b8632 commit 2d1f942
Show file tree
Hide file tree
Showing 6 changed files with 557 additions and 2 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

# pgwatch2

Flexible self-contained PostgreSQL metrics monitoring/dashboarding solution. Supports monitoring PG versions 9.0 to 15 out of the box.
Flexible self-contained PostgreSQL metrics monitoring/dashboarding solution. Supports monitoring PG versions 9.0 to 16 out of the box.

# Demo

Expand Down
103 changes: 103 additions & 0 deletions pgwatch2/metrics/index_stats/16/metric.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/* NB! does not return all index stats but biggest, top scanned and biggest unused ones */
WITH q_locked_rels AS ( /* pgwatch2_generated */
select relation from pg_locks where mode = 'AccessExclusiveLock'
),
q_index_details AS (
select
sui.schemaname,
sui.indexrelname,
sui.relname,
sui.indexrelid,
coalesce(pg_relation_size(sui.indexrelid), 0) as index_size_b,
sui.idx_scan,
sui.idx_tup_read,
sui.idx_tup_fetch,
io.idx_blks_read,
io.idx_blks_hit,
i.indisvalid,
i.indisprimary,
i.indisunique,
i.indisexclusion,
extract(epoch from now() - last_idx_scan)::int as last_idx_scan_s
from
pg_stat_user_indexes sui
join pg_statio_user_indexes io on io.indexrelid = sui.indexrelid
join pg_index i on i.indexrelid = sui.indexrelid
where not sui.schemaname like any (array [E'pg\\_temp%', E'\\_timescaledb%'])
and not exists (select * from q_locked_rels where relation = sui.relid or relation = sui.indexrelid)
),
q_top_indexes AS (
/* biggest */
select *
from (
select indexrelid
from q_index_details
where idx_scan > 1
order by index_size_b desc
limit 200
) x
union
/* most block traffic */
select *
from (
select indexrelid
from q_index_details
order by coalesce(idx_blks_read, 0) + coalesce(idx_blks_hit, 0) desc
limit 200
) y
union
/* most scans */
select *
from (
select indexrelid
from q_index_details
order by idx_scan desc nulls last
limit 200
) z
union
/* biggest unused non-constraint */
select *
from (
select q.indexrelid
from q_index_details q
where idx_scan = 0
and not (indisprimary or indisunique or indisexclusion)
order by index_size_b desc
limit 200
) z
union
/* all invalid */
select *
from (
select q.indexrelid
from q_index_details q
where not indisvalid
) zz
)
select /* pgwatch2_generated */
(extract(epoch from now()) * 1e9)::int8 as epoch_ns,
schemaname::text as tag_schema,
indexrelname::text as tag_index_name,
quote_ident(schemaname)||'.'||quote_ident(indexrelname) as tag_index_full_name,
relname::text as tag_table_name,
quote_ident(schemaname)||'.'||quote_ident(relname) as tag_table_full_name,
coalesce(idx_scan, 0) as idx_scan,
coalesce(idx_tup_read, 0) as idx_tup_read,
coalesce(idx_tup_fetch, 0) as idx_tup_fetch,
coalesce(index_size_b, 0) as index_size_b,
quote_ident(schemaname)||'.'||quote_ident(indexrelname) as index_full_name_val,
md5(regexp_replace(regexp_replace(pg_get_indexdef(indexrelid),indexrelname,'X'), '^CREATE UNIQUE','CREATE')) as tag_index_def_hash,
regexp_replace(regexp_replace(pg_get_indexdef(indexrelid),indexrelname,'X'), '^CREATE UNIQUE','CREATE') as index_def,
case when not indisvalid then 1 else 0 end as is_invalid_int,
case when indisprimary then 1 else 0 end as is_pk_int,
case when indisunique or indisexclusion then 1 else 0 end as is_uq_or_exc,
system_identifier::text as tag_sys_id,
last_idx_scan_s
FROM
q_index_details id
JOIN
pg_control_system() ON true
WHERE
indexrelid IN (select indexrelid from q_top_indexes)
ORDER BY
id.schemaname, id.relname, id.indexrelname;
19 changes: 19 additions & 0 deletions pgwatch2/metrics/stat_io/16/metric.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
SELECT /* pgwatch2_generated */
(extract(epoch from now()) * 1e9)::int8 as epoch_ns,
coalesce(backend_type, 'total') as tag_backend_type,
sum(coalesce(reads, 0))::int8 as reads,
(sum(coalesce(reads, 0) * op_bytes) / 1e6)::int8 as read_bytes_mb,
sum(coalesce(read_time, 0))::int8 as read_time_ms,
sum(coalesce(writes, 0))::int8 as writes,
(sum(coalesce(writes, 0) * op_bytes) / 1e6)::int8 as write_bytes_mb,
sum(coalesce(write_time, 0))::int8 as write_time_ms,
sum(coalesce(writebacks, 0))::int8 as writebacks,
(sum(coalesce(writebacks, 0) * op_bytes) / 1e6)::int8 as writeback_bytes_mb,
sum(coalesce(writeback_time, 0))::int8 as writeback_time_ms,
sum(coalesce(fsyncs, 0))::int8 fsyncs,
sum(coalesce(fsync_time, 0))::int8 fsync_time_ms,
max(extract(epoch from now() - stats_reset)::int) as stats_reset_s
FROM
pg_stat_io
GROUP BY
ROLLUP (backend_type);
2 changes: 2 additions & 0 deletions pgwatch2/metrics/stat_io/metric_attrs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
---
is_instance_level: true
140 changes: 140 additions & 0 deletions pgwatch2/metrics/table_stats/16/metric.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
with recursive /* pgwatch2_generated */
q_root_part as (
select c.oid,
c.relkind,
n.nspname root_schema,
c.relname root_relname
from pg_class c
join pg_namespace n on n.oid = c.relnamespace
where relkind in ('p', 'r')
and relpersistence != 't'
and not n.nspname like any (array[E'pg\\_%', 'information_schema', E'\\_timescaledb%'])
and not exists(select * from pg_inherits where inhrelid = c.oid)
and exists(select * from pg_inherits where inhparent = c.oid)
),
q_parts (relid, relkind, level, root) as (
select oid, relkind, 1, oid
from q_root_part
union all
select inhrelid, c.relkind, level + 1, q.root
from pg_inherits i
join q_parts q on inhparent = q.relid
join pg_class c on c.oid = i.inhrelid
),
q_tstats as (
select (extract(epoch from now()) * 1e9)::int8 as epoch_ns,
relid, -- not sent to final output
quote_ident(schemaname) as tag_schema,
quote_ident(ut.relname) as tag_table_name,
quote_ident(schemaname) || '.' || quote_ident(ut.relname) as tag_table_full_name,
pg_table_size(relid) as table_size_b,
abs(greatest(ceil(log((pg_table_size(relid) + 1) / 10 ^ 6)), 0))::text as tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,..
pg_total_relation_size(relid) as total_relation_size_b,
case when c.reltoastrelid != 0 then pg_total_relation_size(c.reltoastrelid) else 0::int8 end as toast_size_b,
(extract(epoch from now() - greatest(last_vacuum, last_autovacuum)))::int8 as seconds_since_last_vacuum,
(extract(epoch from now() - greatest(last_analyze, last_autoanalyze)))::int8 as seconds_since_last_analyze,
case when 'autovacuum_enabled=off' = ANY (c.reloptions) then 1 else 0 end as no_autovacuum,
seq_scan,
seq_tup_read,
coalesce(idx_scan, 0) as idx_scan,
coalesce(idx_tup_fetch, 0) as idx_tup_fetch,
n_tup_ins,
n_tup_upd,
n_tup_del,
n_tup_hot_upd,
n_live_tup,
n_dead_tup,
vacuum_count,
autovacuum_count,
analyze_count,
autoanalyze_count,
age(c.relfrozenxid) as tx_freeze_age,
extract(epoch from now() - last_seq_scan)::int8 as last_seq_scan_s
from pg_stat_user_tables ut
join pg_class c on c.oid = ut.relid
left join pg_class t on t.oid = c.reltoastrelid
left join pg_index ti on ti.indrelid = t.oid
left join pg_class tir on tir.oid = ti.indexrelid
where
-- leaving out fully locked tables as pg_relation_size also wants a lock and would wait
not exists (select 1 from pg_locks where relation = relid and mode = 'AccessExclusiveLock')
and c.relpersistence != 't' -- and temp tables
order by case when c.relkind = 'p' then 1e9::int else coalesce(c.relpages, 0) + coalesce(t.relpages, 0) + coalesce(tir.relpages, 0) end desc
limit 1500 /* NB! When changing the bottom final LIMIT also adjust this limit. Should be at least 5x bigger as approx sizes depend a lot on vacuum frequency.
The general idea is to reduce filesystem "stat"-ing on tables that won't make it to final output anyways based on approximate size */
)

select /* pgwatch2_generated */
epoch_ns,
tag_schema,
tag_table_name,
tag_table_full_name,
0 as is_part_root,
table_size_b,
tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,..
total_relation_size_b,
toast_size_b,
seconds_since_last_vacuum,
seconds_since_last_analyze,
no_autovacuum,
seq_scan,
seq_tup_read,
idx_scan,
idx_tup_fetch,
n_tup_ins,
n_tup_upd,
n_tup_del,
n_tup_hot_upd,
n_live_tup,
n_dead_tup,
vacuum_count,
autovacuum_count,
analyze_count,
autoanalyze_count,
tx_freeze_age,
last_seq_scan_s
from q_tstats
where not tag_schema like E'\\_timescaledb%'
and not exists (select * from q_root_part where oid = q_tstats.relid)

union all

select * from (
select
epoch_ns,
quote_ident(qr.root_schema) as tag_schema,
quote_ident(qr.root_relname) as tag_table_name,
quote_ident(qr.root_schema) || '.' || quote_ident(qr.root_relname) as tag_table_full_name,
1 as is_part_root,
sum(table_size_b)::int8 table_size_b,
abs(greatest(ceil(log((sum(table_size_b) + 1) / 10 ^ 6)),
0))::text as tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,..
sum(total_relation_size_b)::int8 total_relation_size_b,
sum(toast_size_b)::int8 toast_size_b,
min(seconds_since_last_vacuum)::int8 seconds_since_last_vacuum,
min(seconds_since_last_analyze)::int8 seconds_since_last_analyze,
sum(no_autovacuum)::int8 no_autovacuum,
sum(seq_scan)::int8 seq_scan,
sum(seq_tup_read)::int8 seq_tup_read,
sum(idx_scan)::int8 idx_scan,
sum(idx_tup_fetch)::int8 idx_tup_fetch,
sum(n_tup_ins)::int8 n_tup_ins,
sum(n_tup_upd)::int8 n_tup_upd,
sum(n_tup_del)::int8 n_tup_del,
sum(n_tup_hot_upd)::int8 n_tup_hot_upd,
sum(n_live_tup)::int8 n_live_tup,
sum(n_dead_tup)::int8 n_dead_tup,
sum(vacuum_count)::int8 vacuum_count,
sum(autovacuum_count)::int8 autovacuum_count,
sum(analyze_count)::int8 analyze_count,
sum(autoanalyze_count)::int8 autoanalyze_count,
max(tx_freeze_age)::int8 tx_freeze_age,
min(last_seq_scan_s)::int8 last_seq_scan_s
from
q_tstats ts
join q_parts qp on qp.relid = ts.relid
join q_root_part qr on qr.oid = qp.root
group by
1, 2, 3, 4
) x
order by table_size_b desc nulls last limit 300;

0 comments on commit 2d1f942

Please sign in to comment.