diff --git a/README.md b/README.md index b28bda9..2672b5f 100644 --- a/README.md +++ b/README.md @@ -148,6 +148,7 @@ clickhouse: binlog_replicator: data_dir: '/home/user/binlog/' records_per_file: 100000 + binlog_retention_period: 43200 # optional, how long to keep binlog files in seconds, default 12 hours databases: 'database_name_pattern_*' tables: '*' @@ -196,6 +197,7 @@ types_mapping: # optional - `log_level` - log level, default is `info`, you can set to `debug` to get maximum information (allowed values are `debug`, `info`, `warning`, `error`, `critical`) - `optimize_interval` - interval (seconds) between automatic `OPTIMIZE table FINAL` calls. Default 86400 (1 day). This is required to perform all merges guaranteed and avoid increasing of used storage and decreasing performance. - `auto_restart_interval` - interval (seconds) between automatic db_replicator restart. Default 3600 (1 hour). This is done to reduce memory usage. +- `binlog_retention_period` - how long to keep binlog files in seconds. Default 43200 (12 hours). This setting controls how long the local binlog files are retained before being automatically cleaned up. - `indexes` - you may want to add some indexes to accelerate performance, eg. ngram index for full-test search, etc. To apply indexes you need to start replication from scratch. - `http_host`, `http_port` - http endpoint to control replication, use `/docs` for abailable commands - `types_mappings` - custom types mapping, eg. you can map char(36) to UUID instead of String, etc. diff --git a/mysql_ch_replicator/binlog_replicator.py b/mysql_ch_replicator/binlog_replicator.py index 824253d..cc0ab26 100644 --- a/mysql_ch_replicator/binlog_replicator.py +++ b/mysql_ch_replicator/binlog_replicator.py @@ -340,7 +340,6 @@ def save(self): class BinlogReplicator: SAVE_UPDATE_INTERVAL = 60 BINLOG_CLEAN_INTERVAL = 5 * 60 - BINLOG_RETENTION_PERIOD = 12 * 60 * 60 READ_LOG_INTERVAL = 0.3 def __init__(self, settings: Settings): @@ -378,7 +377,7 @@ def clear_old_binlog_if_required(self): return self.last_binlog_clear_time = curr_time - self.data_writer.remove_old_files(curr_time - BinlogReplicator.BINLOG_RETENTION_PERIOD) + self.data_writer.remove_old_files(curr_time - self.replicator_settings.binlog_retention_period) @classmethod def _try_parse_db_name_from_query(cls, query: str) -> str: diff --git a/mysql_ch_replicator/config.py b/mysql_ch_replicator/config.py index 72b23a0..d428fe9 100644 --- a/mysql_ch_replicator/config.py +++ b/mysql_ch_replicator/config.py @@ -75,6 +75,7 @@ def validate(self): class BinlogReplicatorSettings: data_dir: str = 'binlog' records_per_file: int = 100000 + binlog_retention_period: int = 43200 # 12 hours in seconds def validate(self): if not isinstance(self.data_dir, str): @@ -86,6 +87,12 @@ def validate(self): if self.records_per_file <= 0: raise ValueError('binlog_replicator records_per_file should be positive') + if not isinstance(self.binlog_retention_period, int): + raise ValueError(f'binlog_replicator binlog_retention_period should be int and not {stype(self.binlog_retention_period)}') + + if self.binlog_retention_period <= 0: + raise ValueError('binlog_replicator binlog_retention_period should be positive') + class Settings: DEFAULT_LOG_LEVEL = 'info' diff --git a/tests_config.yaml b/tests_config.yaml index 7ec8439..96fd998 100644 --- a/tests_config.yaml +++ b/tests_config.yaml @@ -1,4 +1,3 @@ - mysql: host: 'localhost' port: 9306 @@ -14,6 +13,7 @@ clickhouse: binlog_replicator: data_dir: '/app/binlog/' records_per_file: 100000 + binlog_retention_period: 43200 # 12 hours in seconds databases: '*test*' log_level: 'debug'