Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add locking mechanism for tenant monitoring probabilistic approach #7026

Merged
merged 19 commits into from
Jul 3, 2023
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 10 additions & 11 deletions src/backend/distributed/shared_library_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -2472,17 +2472,6 @@ RegisterCitusConfigVariables(void)
GUC_STANDARD,
NULL, NULL, NULL);


DefineCustomIntVariable(
"citus.stat_tenants_sample_rate_for_new_tenants",
gettext_noop("Sampling rate for new tenants in citus_stat_tenants."),
NULL,
&StatTenantsSampleRateForNewTenants,
100, 1, 100,
PGC_USERSET,
GUC_STANDARD,
NULL, NULL, NULL);

DefineCustomEnumVariable(
"citus.stat_tenants_track",
gettext_noop("Enables/Disables the stats collection for citus_stat_tenants."),
Expand All @@ -2496,6 +2485,16 @@ RegisterCitusConfigVariables(void)
GUC_STANDARD,
NULL, NULL, NULL);

DefineCustomRealVariable(
"citus.stat_tenants_untracked_sample_rate",
gettext_noop("Sampling rate for new tenants in citus_stat_tenants."),
NULL,
&StatTenantsSampleRateForNewTenants,
1, 0, 1,
PGC_USERSET,
GUC_STANDARD,
NULL, NULL, NULL);

DefineCustomBoolVariable(
"citus.subquery_pushdown",
gettext_noop("Usage of this GUC is highly discouraged, please read the long "
Expand Down
22 changes: 19 additions & 3 deletions src/backend/distributed/utils/citus_stat_tenants.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@

#include <time.h>

#if (PG_VERSION_NUM >= PG_VERSION_15)
#include "common/pg_prng.h"
#endif

static void AttributeMetricsIfApplicable(void);

ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
Expand Down Expand Up @@ -80,7 +84,7 @@ int StatTenantsLogLevel = CITUS_LOG_LEVEL_OFF;
int StatTenantsPeriod = (time_t) 60;
int StatTenantsLimit = 100;
int StatTenantsTrack = STAT_TENANTS_TRACK_NONE;
int StatTenantsSampleRateForNewTenants = 100;
double StatTenantsSampleRateForNewTenants = 1;

PG_FUNCTION_INFO_V1(citus_stat_tenants_local);
PG_FUNCTION_INFO_V1(citus_stat_tenants_local_reset);
Expand Down Expand Up @@ -281,13 +285,25 @@ AttributeTask(char *tenantId, int colocationId, CmdType commandType)

MultiTenantMonitor *monitor = GetMultiTenantMonitor();
bool found = false;

/* Acquire the lock in shared mode to check if the tenant is already in the hash table. */
LWLockAcquire(&monitor->lock, LW_SHARED);

hash_search(monitor->tenants, &key, HASH_FIND, &found);

LWLockRelease(&monitor->lock);

/* If the tenant is not found in the hash table, we will track the query with a probability of StatTenantsSampleRateForNewTenants. */
if (!found)
{
int randomValue = rand() % 100;
bool shouldTrackQuery = randomValue < StatTenantsSampleRateForNewTenants;
#if (PG_VERSION_NUM >= PG_VERSION_15)
double randomValue = pg_prng_double(&pg_global_prng_state);
#else

/* Generate a random double between 0 and 1 */
double randomValue = (double) random() / MAX_RANDOM_VALUE;
#endif
bool shouldTrackQuery = randomValue <= StatTenantsSampleRateForNewTenants;
if (!shouldTrackQuery)
{
return;
Expand Down
2 changes: 1 addition & 1 deletion src/include/distributed/utils/citus_stat_tenants.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,6 @@ extern int StatTenantsLogLevel;
extern int StatTenantsPeriod;
extern int StatTenantsLimit;
extern int StatTenantsTrack;
extern int StatTenantsSampleRateForNewTenants;
extern double StatTenantsSampleRateForNewTenants;

#endif /*CITUS_ATTRIBUTE_H */
77 changes: 77 additions & 0 deletions src/test/regress/expected/citus_stat_tenants.out
Original file line number Diff line number Diff line change
Expand Up @@ -1009,6 +1009,83 @@ SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, q

\c - - - :master_port
SET search_path TO citus_stat_tenants;
SET citus.enable_schema_based_sharding TO OFF;
SELECT citus_stat_tenants_reset();
citus_stat_tenants_reset
---------------------------------------------------------------------

(1 row)

-- test sampling
-- set rate to 0 to disable sampling
SELECT result FROM run_command_on_all_nodes('ALTER SYSTEM set citus.stat_tenants_untracked_sample_rate to 0;');
result
---------------------------------------------------------------------
ALTER SYSTEM
ALTER SYSTEM
ALTER SYSTEM
(3 rows)

SELECT result FROM run_command_on_all_nodes('SELECT pg_reload_conf()');
result
---------------------------------------------------------------------
t
t
t
(3 rows)

INSERT INTO dist_tbl VALUES (1, 'abcd');
INSERT INTO dist_tbl VALUES (2, 'abcd');
UPDATE dist_tbl SET b = a + 1 WHERE a = 3;
UPDATE dist_tbl SET b = a + 1 WHERE a = 4;
DELETE FROM dist_tbl WHERE a = 5;
SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stat_tenants ORDER BY tenant_attribute;
tenant_attribute | read_count_in_this_period | read_count_in_last_period | query_count_in_this_period | query_count_in_last_period
---------------------------------------------------------------------
(0 rows)

-- test sampling
-- set rate to 1 to track all tenants
SELECT result FROM run_command_on_all_nodes('ALTER SYSTEM set citus.stat_tenants_untracked_sample_rate to 1;');
result
---------------------------------------------------------------------
ALTER SYSTEM
ALTER SYSTEM
ALTER SYSTEM
(3 rows)

SELECT result FROM run_command_on_all_nodes('SELECT pg_reload_conf()');
result
---------------------------------------------------------------------
t
t
t
(3 rows)

SELECT sleep_until_next_period();
sleep_until_next_period
---------------------------------------------------------------------

(1 row)

INSERT INTO dist_tbl VALUES (1, 'abcd');
INSERT INTO dist_tbl VALUES (2, 'abcd');
UPDATE dist_tbl SET b = a + 1 WHERE a = 3;
UPDATE dist_tbl SET b = a + 1 WHERE a = 4;
DELETE FROM dist_tbl WHERE a = 5;
SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period,
(cpu_usage_in_this_period>0) AS cpu_is_used_in_this_period, (cpu_usage_in_last_period>0) AS cpu_is_used_in_last_period
FROM citus_stat_tenants(true)
ORDER BY tenant_attribute;
tenant_attribute | read_count_in_this_period | read_count_in_last_period | query_count_in_this_period | query_count_in_last_period | cpu_is_used_in_this_period | cpu_is_used_in_last_period
---------------------------------------------------------------------
1 | 0 | 0 | 1 | 0 | t | f
2 | 0 | 0 | 1 | 0 | t | f
3 | 0 | 0 | 1 | 0 | t | f
4 | 0 | 0 | 1 | 0 | t | f
5 | 0 | 0 | 1 | 0 | t | f
(5 rows)

SET client_min_messages TO ERROR;
DROP SCHEMA citus_stat_tenants CASCADE;
DROP SCHEMA citus_stat_tenants_t1 CASCADE;
35 changes: 35 additions & 0 deletions src/test/regress/sql/citus_stat_tenants.sql
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,41 @@ SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, q
\c - - - :master_port
SET search_path TO citus_stat_tenants;

SET citus.enable_schema_based_sharding TO OFF;

SELECT citus_stat_tenants_reset();

-- test sampling
-- set rate to 0 to disable sampling
SELECT result FROM run_command_on_all_nodes('ALTER SYSTEM set citus.stat_tenants_untracked_sample_rate to 0;');
SELECT result FROM run_command_on_all_nodes('SELECT pg_reload_conf()');

INSERT INTO dist_tbl VALUES (1, 'abcd');
INSERT INTO dist_tbl VALUES (2, 'abcd');
UPDATE dist_tbl SET b = a + 1 WHERE a = 3;
UPDATE dist_tbl SET b = a + 1 WHERE a = 4;
DELETE FROM dist_tbl WHERE a = 5;

SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period FROM citus_stat_tenants ORDER BY tenant_attribute;

-- test sampling
-- set rate to 1 to track all tenants
SELECT result FROM run_command_on_all_nodes('ALTER SYSTEM set citus.stat_tenants_untracked_sample_rate to 1;');
SELECT result FROM run_command_on_all_nodes('SELECT pg_reload_conf()');

SELECT sleep_until_next_period();

INSERT INTO dist_tbl VALUES (1, 'abcd');
INSERT INTO dist_tbl VALUES (2, 'abcd');
UPDATE dist_tbl SET b = a + 1 WHERE a = 3;
UPDATE dist_tbl SET b = a + 1 WHERE a = 4;
DELETE FROM dist_tbl WHERE a = 5;

SELECT tenant_attribute, read_count_in_this_period, read_count_in_last_period, query_count_in_this_period, query_count_in_last_period,
(cpu_usage_in_this_period>0) AS cpu_is_used_in_this_period, (cpu_usage_in_last_period>0) AS cpu_is_used_in_last_period
FROM citus_stat_tenants(true)
ORDER BY tenant_attribute;

SET client_min_messages TO ERROR;
DROP SCHEMA citus_stat_tenants CASCADE;
DROP SCHEMA citus_stat_tenants_t1 CASCADE;