Skip to content

Commit

Permalink
No longer use the "user_catalog_table" storage option.
Browse files Browse the repository at this point in the history
Now that CreateInitDecodingContext() accepts "need_full_snapshot" argument, we
no longer need to (mis)use the "user_catalog_table" storage option to protect
the table being squeezed against VACUUM.

While getting rid of the "user_catalog_table" option, also removed the code
that deals with "autovacuum_enabled". That wasn't very useful because the
window between disabling the autovacuum and starting the squeeze is usually
very short, so the chance that the setting it avoids concurrent autovacuum is
rather low. And since we never used any other storage options than these two,
the removal of "autovacuum_enabled" allows for further simplification of the
code.
  • Loading branch information
Antonin Houska committed Apr 9, 2018
1 parent e31d434 commit db733c2
Show file tree
Hide file tree
Showing 7 changed files with 189 additions and 427 deletions.
4 changes: 2 additions & 2 deletions README
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ Following is the complete description of table metadata.
the first processing of the corresponding task failed. Typical reason to
retry the processing is that table definition got changed while the table
was being squeezed. If the number of retries is achieved, processing of the
table is considered complete. The next task is created as soon as time
specified by "task_interval" column of "squeeze.tables" table has elapsed.
table is considered complete. The next task is created as soon as the next
scheduled time is reached.

The default value of "max_retry" is 0 (i.e. do not retry).

Expand Down
26 changes: 3 additions & 23 deletions expected/squeeze.out
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,7 @@ INSERT INTO a(i, j)
SELECT x, x
FROM generate_series(1, 10) AS g(x);
-- The trivial case.
SELECT squeeze.set_reloptions('public', 'a', true, false, false);
set_reloptions
----------------

(1 row)

SELECT squeeze.squeeze_table('public', 'a', NULL, NULL, NULL, false, false);
SELECT squeeze.squeeze_table('public', 'a', NULL, NULL, NULL);
squeeze_table
---------------

Expand All @@ -33,14 +27,7 @@ SELECT * FROM a;

-- Clustering by index.
CREATE INDEX a_i_idx_desc ON a(i DESC);
SELECT squeeze.set_reloptions('public', 'a', true, false, false);
set_reloptions
----------------

(1 row)

SELECT squeeze.squeeze_table('public', 'a', 'a_i_idx_desc', NULL, NULL, false,
false);
SELECT squeeze.squeeze_table('public', 'a', 'a_i_idx_desc', NULL, NULL);
squeeze_table
---------------

Expand Down Expand Up @@ -76,14 +63,7 @@ SELECT reltoastrelid > 0 FROM pg_class WHERE relname='b';
CREATE TABLE b_copy (LIKE b INCLUDING ALL);
INSERT INTO b_copy(i, t) SELECT i, t FROM b;
-- Squeeze.
SELECT squeeze.set_reloptions('public', 'b', true, false, false);
set_reloptions
----------------

(1 row)

SELECT squeeze.squeeze_table('public', 'b', NULL, NULL, NULL, false,
false);
SELECT squeeze.squeeze_table('public', 'b', NULL, NULL, NULL);
squeeze_table
---------------

Expand Down
150 changes: 146 additions & 4 deletions pg_squeeze--1.0--1.1.sql
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,13 @@ ALTER TABLE tables ALTER COLUMN schedule SET NOT NULL;
ALTER TABLE tables DROP COLUMN first_check;
ALTER TABLE tables DROP COLUMN task_interval;

ALTER TABLE tasks DROP COLUMN autovac;
ALTER TABLE tasks DROP COLUMN autovac_toast;

-- No longer needed.
DROP FUNCTION is_autovacuum_enabled(oid);

-- Reflect the changes of both "tables" and "tasks" tables.
CREATE OR REPLACE FUNCTION add_new_tasks() RETURNS void
LANGUAGE sql
AS $$
Expand Down Expand Up @@ -47,7 +53,7 @@ AS $$
u.s <= now()::timetz AND
-- ... and it should be one for which no
-- task was created yet.
(u.s > i.last_task_created::timetz OR
(u.s > i.last_task_created::timetz OR
i.last_task_created ISNULL)
)
-- Ignore tables for which a task currently exists.
Expand Down Expand Up @@ -102,9 +108,145 @@ AS $$

-- now() is supposed to return the same value as it did in the previous
-- query.
INSERT INTO squeeze.tasks(table_id, autovac, autovac_toast)
SELECT table_id, squeeze.is_autovacuum_enabled(i.class_id),
squeeze.is_autovacuum_enabled(i.class_id_toast)
INSERT INTO squeeze.tasks(table_id)
SELECT table_id
FROM squeeze.tables_internal i
WHERE i.last_task_created = now();
$$;

-- No longer used.
DROP FUNCTION set_reloptions(name, name, bool, bool, bool);

-- start_next_task() used to call set_reloptions(), so modify it accordingly.
DROP FUNCTION start_next_task();

CREATE OR REPLACE FUNCTION start_next_task()
RETURNS void
LANGUAGE plpgsql
AS $$
DECLARE
v_tabschema name;
v_tabname name;
BEGIN
PERFORM
FROM squeeze.tasks WHERE active;
IF FOUND THEN
RETURN;
END IF;

UPDATE squeeze.tasks t
INTO v_tabschema, v_tabname
SET active = true
FROM squeeze.tables tb
WHERE
tb.id = t.table_id AND
t.id = (SELECT id FROM squeeze.tasks ORDER BY id LIMIT 1)
RETURNING tb.tabschema, tb.tabname;

IF NOT FOUND THEN
RETURN;
END IF;
END;
$$;


-- process_current_task() used to call set_reloptions(), so modify it
-- accordingly.
DROP FUNCTION process_current_task()

CREATE OR REPLACE FUNCTION process_current_task()
RETURNS void
LANGUAGE plpgsql
AS $$
DECLARE
v_tabschema name;
v_tabname name;
v_cl_index name;
v_rel_tbsp name;
v_ind_tbsps name[][];
v_task_id int;
v_tried int;
v_last_try bool;
v_skip_analyze bool;
v_stmt text;
v_start timestamptz;

-- Error info to be logged.
v_sql_state text;
v_err_msg text;
v_err_detail text;
BEGIN
SELECT tb.tabschema, tb.tabname, tb.clustering_index,
tb.rel_tablespace, tb.ind_tablespaces, t.id, t.tried,
t.tried >= tb.max_retry, tb.skip_analyze
INTO v_tabschema, v_tabname, v_cl_index, v_rel_tbsp, v_ind_tbsps,
v_task_id, v_tried, v_last_try, v_skip_analyze
FROM squeeze.tasks t, squeeze.tables tb
WHERE t.table_id = tb.id AND t.active;

IF NOT FOUND THEN
-- Unexpected deletion by someone else?
RETURN;
END IF;

-- Do the actual work.
BEGIN
v_start := clock_timestamp();

-- Do the actual processing.
--
-- If someone dropped the table in between, the exception
-- handler below should log the error and cleanup the task.
PERFORM squeeze.squeeze_table(v_tabschema, v_tabname,
v_cl_index, v_rel_tbsp, v_ind_tbsps);

INSERT INTO squeeze.log(tabschema, tabname, started, finished)
VALUES (v_tabschema, v_tabname, v_start, clock_timestamp());

PERFORM squeeze.cleanup_task(v_task_id);

IF NOT v_skip_analyze THEN
-- Analyze the new table, unless user rejects it
-- explicitly.
--
-- XXX Besides updating planner statistics in general,
-- this sets pg_class(relallvisible) to 0, so that
-- planner is not too optimistic about this
-- figure. The preferrable solution would be to run
-- (lazy) VACUUM (with the ANALYZE option) to
-- initialize visibility map. However, to make the
-- effort worthwile, we shouldn't do it until all
-- transactions can see all the changes done by
-- squeeze_table() function. What's the most suitable
-- way to wait? Asynchronous execution of the VACUUM
-- is probably needed in any case.
v_stmt := 'ANALYZE "' || v_tabschema || '"."' ||
v_tabname || '"';

EXECUTE v_stmt;
END IF;
EXCEPTION
WHEN OTHERS THEN
GET STACKED DIAGNOSTICS v_sql_state := RETURNED_SQLSTATE;
GET STACKED DIAGNOSTICS v_err_msg := MESSAGE_TEXT;
GET STACKED DIAGNOSTICS v_err_detail := PG_EXCEPTION_DETAIL;

INSERT INTO squeeze.errors(tabschema, tabname,
sql_state, err_msg, err_detail)
VALUES (v_tabschema, v_tabname, v_sql_state, v_err_msg,
v_err_detail);

-- If the active task failed too many times, delete
-- it. start_next_task() will prepare the next one.
IF v_last_try THEN
PERFORM squeeze.cleanup_task(v_task_id);
RETURN;
ELSE
-- Account for the current attempt.
UPDATE squeeze.tasks
SET tried = tried + 1
WHERE id = v_task_id;
END IF;
END;
END;
$$;
73 changes: 7 additions & 66 deletions pg_squeeze--1.1.sql
Original file line number Diff line number Diff line change
Expand Up @@ -141,12 +141,7 @@ CREATE TABLE tasks (

-- How many times did we try to process the task? The common use case
-- is that a concurrent DDL broke the processing.
tried int NOT NULL DEFAULT 0,

-- The initial value of "autovacuum_enabled" relation option is stored
-- here, and will be restored when we're done.
autovac bool NOT NULL,
autovac_toast bool NOT NULL
tried int NOT NULL DEFAULT 0
);

-- Make sure there is at most one active task anytime.
Expand Down Expand Up @@ -202,13 +197,6 @@ COMMENT ON COLUMN errors.err_msg IS
COMMENT ON COLUMN errors.err_detail IS
'Detailed error message, if available.';

CREATE FUNCTION is_autovacuum_enabled (
relid oid
)
RETURNS bool
AS 'MODULE_PATHNAME', 'is_autovacuum_enabled'
LANGUAGE C;

CREATE FUNCTION get_heap_fillfactor(a_relid oid)
RETURNS int
AS 'MODULE_PATHNAME', 'get_heap_fillfactor'
Expand Down Expand Up @@ -334,9 +322,8 @@ AS $$

-- now() is supposed to return the same value as it did in the previous
-- query.
INSERT INTO squeeze.tasks(table_id, autovac, autovac_toast)
SELECT table_id, squeeze.is_autovacuum_enabled(i.class_id),
squeeze.is_autovacuum_enabled(i.class_id_toast)
INSERT INTO squeeze.tasks(table_id)
SELECT table_id
FROM squeeze.tables_internal i
WHERE i.last_task_created = now();
$$;
Expand Down Expand Up @@ -368,21 +355,6 @@ BEGIN
IF NOT FOUND THEN
RETURN;
END IF;

-- squeeze_table() function requires the "user_catalog_option" to be
-- set, but cannot do it in its own transaction. So do it now.
--
-- Also disable autovacuum for the table and its TOAST relation. As
-- we're gonna "squeeze" the table, VACUUM no longer makes sense.
--
-- (The join to pg_class and pg_namespace should ensure that we don't
-- cause ERROR if someone happened to drop the table w/o unregistering
-- it firs.)
PERFORM squeeze.set_reloptions(v_tabschema, v_tabname, true, false,
false)
FROM pg_class c, pg_namespace n
WHERE c.relname = v_tabname AND c.relnamespace = n.oid AND
n.nspname = v_tabschema;
END;
$$;

Expand Down Expand Up @@ -418,8 +390,6 @@ DECLARE
v_ind_tbsps name[][];
v_task_id int;
v_tried int;
v_autovac bool;
v_autovac_toast bool;
v_last_try bool;
v_skip_analyze bool;
v_stmt text;
Expand All @@ -432,9 +402,9 @@ DECLARE
BEGIN
SELECT tb.tabschema, tb.tabname, tb.clustering_index,
tb.rel_tablespace, tb.ind_tablespaces, t.id, t.tried,
t.tried >= tb.max_retry, tb.skip_analyze, t.autovac, t.autovac_toast
t.tried >= tb.max_retry, tb.skip_analyze
INTO v_tabschema, v_tabname, v_cl_index, v_rel_tbsp, v_ind_tbsps,
v_task_id, v_tried, v_last_try, v_skip_analyze, v_autovac, v_autovac_toast
v_task_id, v_tried, v_last_try, v_skip_analyze
FROM squeeze.tasks t, squeeze.tables tb
WHERE t.table_id = tb.id AND t.active;

Expand All @@ -452,7 +422,7 @@ t.tried >= tb.max_retry, tb.skip_analyze, t.autovac, t.autovac_toast
-- If someone dropped the table in between, the exception
-- handler below should log the error and cleanup the task.
PERFORM squeeze.squeeze_table(v_tabschema, v_tabname,
v_cl_index, v_rel_tbsp, v_ind_tbsps, v_autovac, v_autovac_toast);
v_cl_index, v_rel_tbsp, v_ind_tbsps);

INSERT INTO squeeze.log(tabschema, tabname, started, finished)
VALUES (v_tabschema, v_tabname, v_start, clock_timestamp());
Expand Down Expand Up @@ -494,22 +464,6 @@ t.tried >= tb.max_retry, tb.skip_analyze, t.autovac, t.autovac_toast
-- it. start_next_task() will prepare the next one.
IF v_last_try THEN
PERFORM squeeze.cleanup_task(v_task_id);

-- squeeze_table() resets the options on
-- successful completion, but here we must do
-- it explicitly on error.
--
-- (Join to catalog tables should ensure that
-- no ERROR is raised here if the table was
-- dropped recently.)
PERFORM squeeze.set_reloptions(v_tabschema,
v_tabname, false, v_autovac,
v_autovac_toast)
FROM pg_class c, pg_namespace n
WHERE c.relname = v_tabname AND
c.relnamespace = n.oid AND
n.nspname = v_tabschema;

RETURN;
ELSE
-- Account for the current attempt.
Expand All @@ -521,25 +475,12 @@ t.tried >= tb.max_retry, tb.skip_analyze, t.autovac, t.autovac_toast
END;
$$;

CREATE FUNCTION set_reloptions (
tabchema name,
tabname name,
user_cat bool,
autovac bool,
autovac_toast bool
)
RETURNS void
AS 'MODULE_PATHNAME', 'set_reloptions'
LANGUAGE C;

CREATE FUNCTION squeeze_table(
tabchema name,
tabname name,
clustering_index name,
rel_tablespace name,
ind_tablespaces name[][],
autovac bool,
autovac_toast bool)
ind_tablespaces name[][])
RETURNS void
AS 'MODULE_PATHNAME', 'squeeze_table'
LANGUAGE C;
Expand Down
Loading

0 comments on commit db733c2

Please sign in to comment.