From eabcc26e4a7e38f47d9b83e1059d9148bbe3326b Mon Sep 17 00:00:00 2001 From: Adam Janovsky Date: Wed, 29 Mar 2023 16:33:42 +0200 Subject: [PATCH] switch to pydantic in settings management --- docs/configuration.md | 46 ++++- pyproject.toml | 1 + src/sec_certs/cli.py | 25 +-- src/sec_certs/config/__init__.py | 0 src/sec_certs/config/configuration.py | 44 ----- src/sec_certs/config/settings-schema.json | 182 -------------------- src/sec_certs/config/settings.yaml | 59 ------- src/sec_certs/configuration.py | 128 ++++++++++++++ src/sec_certs/dataset/cc.py | 2 +- src/sec_certs/dataset/dataset.py | 2 +- src/sec_certs/dataset/fips.py | 2 +- src/sec_certs/dataset/fips_iut.py | 2 +- src/sec_certs/dataset/fips_mip.py | 2 +- src/sec_certs/dataset/protection_profile.py | 2 +- src/sec_certs/sample/fips.py | 2 +- src/sec_certs/sample/fips_iut.py | 2 +- src/sec_certs/sample/fips_mip.py | 2 +- src/sec_certs/utils/parallel_processing.py | 6 +- src/sec_certs/utils/tqdm.py | 2 +- tests/conftest.py | 6 +- tests/data/settings_tests.yml | 60 +------ tests/test_config.py | 69 ++++++++ 22 files changed, 264 insertions(+), 382 deletions(-) delete mode 100644 src/sec_certs/config/__init__.py delete mode 100644 src/sec_certs/config/configuration.py delete mode 100644 src/sec_certs/config/settings-schema.json delete mode 100644 src/sec_certs/config/settings.yaml create mode 100644 src/sec_certs/configuration.py create mode 100644 tests/test_config.py diff --git a/docs/configuration.md b/docs/configuration.md index 93c926ba..dcf2b8d8 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -6,21 +6,49 @@ mystnb: --- # Configuration -The configuration is stored in yaml file `settings.yaml` at `sec_certs.config` package. Below are the supported options, descriptions and default values. +The configuration class is defined in [configuration.py](https://github.com/crocs-muni/sec-certs/tree/main/src/sec_certs/configuration.py). From CLI, you can load custom configuration yaml with `-c` or `--config` argument. From Python, you can replace the default configuration with + +```python +from pathlib import Path +import sec_certs.configuration as config_module + +config_module.config.load_from_yaml("/path/to/your/config.yaml") + +# or just set the individual key +config_module.config.log_filepath = Path("/some/path/where/log/will/be/stored.txt") +``` + +The configuration yaml is a simple flat dictionary of keys and values. The configuration file can specify only *some* of the fields. For the content of unspecified fields, environment variable with `seccerts_` prefix (case insensitive) will be checked. If such variable is not set, default value will be used. Content in the yaml always beats the environment variable. + +For instance, when user provides the following yaml + +```yaml +log_filepath: my_own_log_file.txt +n_threads: 7 +``` + +and sets `SECCERTS_MINIMAL_TOKEN_LENGTH=4` as environment variable, only these 3 keys will be loaded with `config.load_from_yaml()`, others will be untouched. + +```{tip} +You can load settings even without providing yaml configuration. Simply set the corresponding environment variables or use `.env` file. +``` + +## Configuration keys, types, default values and descriptions ```{code-cell} python -from sec_certs.config import configuration +from sec_certs.configuration import config, Configuration from myst_nb import glue from IPython.display import Markdown +import typing -cfg = configuration.config +type_hints = typing.get_type_hints(Configuration) text = "" -for key in cfg.__dict__: - text += f"`{key}`\n\n- Description: {cfg.get_desription(key)}\n" - text += f"- Default value: `{cfg.__getattribute__(key)}`\n\n" +for field, value in config.__fields__.items(): + text += f"`{field}`\n\n" + text += f"- type: `{type_hints[field]}`\n" + text += f"- default: `{value.default}`\n" + text += f"- description: {value.field_info.description}\n" + text += f"- env name: `{list(value.field_info.extra['env_names'])[0]}`\n\n" glue("text", Markdown(text)) ``` -```{glue:md} text -:format: myst -``` diff --git a/pyproject.toml b/pyproject.toml index 0c7f33d0..7b7715f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,6 +58,7 @@ "pySankeyBeta", "scipy>=1.9.0", "networkx", + "pydantic", ] [project.optional-dependencies] diff --git a/src/sec_certs/cli.py b/src/sec_certs/cli.py index 08ef6abd..d1a1eb90 100644 --- a/src/sec_certs/cli.py +++ b/src/sec_certs/cli.py @@ -9,8 +9,9 @@ from typing import Callable import click +from pydantic import ValidationError -from sec_certs.config.configuration import config +from sec_certs.configuration import config from sec_certs.dataset import CCDataset, FIPSDataset from sec_certs.dataset.dataset import Dataset from sec_certs.utils.helpers import warn_if_missing_poppler, warn_if_missing_tesseract @@ -157,7 +158,7 @@ def build_or_load_dataset( "configpath", default=None, type=click.Path(file_okay=True, dir_okay=False, writable=True, readable=True), - help="Path to your own config yaml file that will override the default one.", + help="Path to your own config yaml file that will override the default config.", ) @click.option( "-i", @@ -176,6 +177,16 @@ def main( quiet: bool, ): try: + if configpath: + try: + config.load_from_yaml(configpath) + except FileNotFoundError: + click.echo("Error: Bad path to configuration file", err=True) + sys.exit(EXIT_CODE_NOK) + except (ValueError, ValidationError) as e: + click.echo(f"Error: Bad format of configuration file: {e}", err=True) + sys.exit(EXIT_CODE_NOK) + file_handler = logging.FileHandler(config.log_filepath) stream_handler = logging.StreamHandler(sys.stderr) formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") @@ -185,16 +196,6 @@ def main( logging.basicConfig(level=logging.INFO, handlers=handlers) start = datetime.now() - if configpath: - try: - config.load(configpath) - except FileNotFoundError: - click.echo("Error: Bad path to configuration file", err=True) - sys.exit(EXIT_CODE_NOK) - except ValueError as e: - click.echo(f"Error: Bad format of configuration file: {e}", err=True) - sys.exit(EXIT_CODE_NOK) - actions_set = ( {"build", "process-aux-dsets", "download", "convert", "analyze"} if "all" in actions else set(actions) ) diff --git a/src/sec_certs/config/__init__.py b/src/sec_certs/config/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/sec_certs/config/configuration.py b/src/sec_certs/config/configuration.py deleted file mode 100644 index 43cfe698..00000000 --- a/src/sec_certs/config/configuration.py +++ /dev/null @@ -1,44 +0,0 @@ -from __future__ import annotations - -import json -from pathlib import Path -from typing import Any - -import jsonschema -import yaml - - -class Configuration: - def load(self, filepath: str | Path) -> None: - with Path(filepath).open("r") as file: - state = yaml.load(file, Loader=yaml.FullLoader) - - script_dir = Path(__file__).parent - - with (Path(script_dir) / "settings-schema.json").open("r") as file: - schema = json.loads(file.read()) - - try: - jsonschema.validate(state, schema) - except jsonschema.exceptions.ValidationError as e: - print(f"{e}\n\nIn file {filepath}") - - for k, v in state.items(): - setattr(self, k, v) - - def __getattribute__(self, key: str) -> Any: - res = object.__getattribute__(self, key) - if isinstance(res, dict) and "value" in res: - return res["value"] - return object.__getattribute__(self, key) - - def get_desription(self, key: str) -> str | None: - res = object.__getattribute__(self, key) - if isinstance(res, dict) and "description" in res: - return res["description"] - return None - - -DEFAULT_CONFIG_PATH = Path(__file__).parent / "settings.yaml" -config = Configuration() -config.load(DEFAULT_CONFIG_PATH) diff --git a/src/sec_certs/config/settings-schema.json b/src/sec_certs/config/settings-schema.json deleted file mode 100644 index 2d5c17a6..00000000 --- a/src/sec_certs/config/settings-schema.json +++ /dev/null @@ -1,182 +0,0 @@ -{ - "title": "settings for sec-certs", - "type": "object", - "definitions": { - "settings_string_entry": { - "required": [ - "description", - "value" - ], - "type": "object", - "properties": { - "description": { - "type": "string" - }, - "value": { - "type": "string" - } - } - }, - "settings_boolean_entry": { - "type": "object", - "required": [ - "description", - "value" - ], - "properties": { - "description": { - "type": "string" - }, - "value": { - "type": "boolean" - } - } - }, - "settings_number_entry": { - "type": "object", - "required": [ - "description", - "value" - ], - "properties": { - "description": { - "type": "string" - }, - "value": { - "type": "number" - } - } - }, - "settings_url_entry": { - "type": "object", - "required": [ - "description", - "value" - ], - "properties": { - "description": { - "type": "string" - }, - "value": { - "type": "string", - "format": "uri", - "pattern": "^(https?|http?)://", - "minLength": 1, - "maxLength": 255 - } - } - } - }, - "properties": { - "log_filepath": { - "$ref": "#/definitions/settings_string_entry" - }, - "always_false_positive_fips_cert_id_threshold": { - "$ref": "#/definitions/settings_number_entry" - }, - "year_difference_between_validations": { - "allOf": [ - { - "$ref": "#/definitions/settings_number_entry" - }, - { - "properties": { - "value": { - "minimum": 0 - } - } - } - ] - }, - "n_threads": { - "allOf": [ - { - "$ref": "#/definitions/settings_number_entry" - }, - { - "properties": { - "value": { - "minimum": -1 - } - } - } - ] - }, - "cpe_n_matching_threshold": { - "allOf": [ - { - "$ref": "#/definitions/settings_number_entry" - }, - { - "properties": { - "value": { - "minimum": 0, - "maximum": 100 - } - } - } - ] - }, - "cpe_n_max_matches": { - "allOf": [ - { - "$ref": "#/definitions/settings_number_entry" - }, - { - "properties": { - "value": { - "exclusiveMinimum": 0 - } - } - } - ] - }, - "cc_latest_snapshot": { - "$ref": "#/definitions/settings_url_entry" - }, - "cc_maintenances_latest_snapshot": { - "$ref": "#/definitions/settings_url_entry" - }, - "pp_latest_snapshot": { - "$ref": "#/definitions/settings_url_entry" - }, - "ignore_first_page": { - "$ref": "#/definitions/settings_boolean_entry" - }, - "cert_threshold": { - "allOf": [ - { - "$ref": "#/definitions/settings_number_entry" - }, - { - "properties": { - "value": { - "minimum": 0 - } - } - } - ] - }, - "fips_latest_snapshot": { - "$ref": "#/definitions/settings_url_entry" - }, - "enable_progress_bars": { - "$ref": "#/definitions/settings_boolean_entry" - } - }, - "required": [ - "log_filepath", - "always_false_positive_fips_cert_id_threshold", - "year_difference_between_validations", - "n_threads", - "cpe_matching_threshold", - "cpe_n_max_matches", - "cc_latest_snapshot", - "cc_maintenances_latest_snapshot", - "pp_latest_snapshot", - "ignore_first_page", - "cert_threshold", - "fips_latest_snapshot", - "enable_progress_bars" - ] -} \ No newline at end of file diff --git a/src/sec_certs/config/settings.yaml b/src/sec_certs/config/settings.yaml deleted file mode 100644 index c69f7cdf..00000000 --- a/src/sec_certs/config/settings.yaml +++ /dev/null @@ -1,59 +0,0 @@ ---- -log_filepath: - description: Path to the file, relative to working directory, where the log will be stored - value: ./cert_processing_log.txt -always_false_positive_fips_cert_id_threshold: - description: - During validation we don't connect certificates with number lower than - _this_ to connections due to these numbers being typically false positives - value: 40 -year_difference_between_validations: - description: - During validation we don't connect certificates with validation dates - difference higher than _this_ - value: 7 -n_threads: - description: How many threads to use for parallel computations. Set to -1 to use all cores (*2 with multithreading). - value: -1 -cpe_matching_threshold: - description: Level of required string similarity between CPE and certificate name on CC CPE matching, 0-100. Lower values yield more false negatives, higher values more false positives - value: 92 -cpe_n_max_matches: - description: Maximum number of candidate CPE items that may be related to given certificate, >0 - value: 99 -cc_latest_snapshot: - description: URL from where to fetch the latest snapshot of fully processed CC dataset - value: https://seccerts.org/cc/dataset.json -cc_maintenances_latest_snapshot: - description: URL from where to fetch the latest snapshot of CC maintenance updates - value: https://seccerts.org/cc/maintenance_updates.json -pp_latest_snapshot: - description: URL from where to fetch the latest snapshot of the PP dataset - value: https://seccerts.org/static/pp.json -ignore_first_page: - description: During keyword search, first page usually contains addresses - ignore it. - value: true -cert_threshold: - description: Used with --higher-precision-results. Determines the amount of mismatched algorithms to be considered faulty. - value: 5 -fips_latest_snapshot: - description: URL for the latest snapshot of FIPS dataset - value: https://seccerts.org/fips/dataset.json -fips_iut_dataset: - description: URL for the dataset of FIPS IUT data - value: https://seccerts.org/fips/iut/dataset.json -fips_iut_latest_snapshot: - description: URL for the latest snapshot of FIPS IUT data - value: https://seccerts.org/fips/iut/latest.json -fips_mip_dataset: - description: URL for the dataset of FIPS MIP data - value: https://seccerts.org/fips/mip/dataset.json -fips_mip_latest_snapshot: - description: URL for the latest snapshot of FIPS MIP data - value: https://seccerts.org/fips/mip/latest.json -minimal_token_length: - description: Minimal length of a string that will be considered as a token during keyword extraction in CVE matching - value: 3 -enable_progress_bars: - description: Whether to enable pretty-printed progress bars while processing. - value: true diff --git a/src/sec_certs/configuration.py b/src/sec_certs/configuration.py new file mode 100644 index 00000000..59ed0d34 --- /dev/null +++ b/src/sec_certs/configuration.py @@ -0,0 +1,128 @@ +from __future__ import annotations + +import json +from pathlib import Path +from typing import Optional + +import yaml +from pydantic import AnyHttpUrl, BaseSettings, Field + + +class Configuration(BaseSettings): + """ + Class that holds configuration. + While not a singleton, the `config` instance from this module is meant to be primarily used. + """ + + class Config: + env_prefix = "seccerts_" + + log_filepath: Path = Field( + "./cert_processing_log.txt", + description="Path to the file, relative to working directory, where the log will be stored.", + ) + always_false_positive_fips_cert_id_threshold: int = Field( + 40, + description="During validation we don't connect certificates with number lower than _this_ to connections due to these numbers being typically false positives.", + ge=0, + ) + year_difference_between_validations: int = Field( + 7, + description=" During validation we don't connect certificates with validation dates difference higher than _this_.", + ) + n_threads: int = Field( + -1, description="How many threads to use for parallel computations. Set to -1 to use all logical cores.", ge=-1 + ) + cpe_matching_threshold: int = Field( + 92, + description="Level of required string similarity between CPE and certificate name on CC CPE matching, 0-100. Lower values yield more false negatives, higher values more false positives", + ge=0, + le=100, + ) + cpe_n_max_matches: int = Field( + 99, description="Maximum number of candidate CPE items that may be related to given certificate, >0", gt=0 + ) + cc_latest_snapshot: AnyHttpUrl = Field( + "https://seccerts.org/cc/dataset.json", + description="URL from where to fetch the latest snapshot of fully processed CC dataset.", + ) + cc_maintenances_latest_snapshot: AnyHttpUrl = Field( + "https://seccerts.org/cc/maintenance_updates.json", + description="URL from where to fetch the latest snapshot of CC maintenance updates", + ) + pp_latest_snapshot: AnyHttpUrl = Field( + "https://seccerts.org/static/pp.json", + description="URL from where to fetch the latest snapshot of the PP dataset.", + ) + fips_latest_snapshot: AnyHttpUrl = Field( + "https://seccerts.org/fips/dataset.json", description="URL for the latest snapshot of FIPS dataset." + ) + fips_iut_dataset: AnyHttpUrl = Field( + "https://seccerts.org/fips/iut/dataset.json", description="URL for the dataset of FIPS IUT data." + ) + fips_iut_latest_snapshot: AnyHttpUrl = Field( + "https://seccerts.org/fips/iut/latest.json", description="URL for the latest snapshot of FIPS IUT data." + ) + fips_mip_dataset: AnyHttpUrl = Field( + "https://seccerts.org/fips/mip/dataset.json", description="URL for the dataset of FIPS MIP data" + ) + fips_mip_latest_snapshot: AnyHttpUrl = Field( + "https://seccerts.org/fips/mip/latest.json", description="URL for the latest snapshot of FIPS MIP data" + ) + minimal_token_length: int = Field( + 3, + description="Minimal length of a string that will be considered as a token during keyword extraction in CVE matching", + ge=0, + ) + ignore_first_page: bool = Field( + True, description="During keyword search, first page usually contains addresses - ignore it." + ) + cc_reference_annotator_dir: Optional[Path] = Field( # noqa: UP007 + None, + description="Path to directory with serialized reference annotator model. If set to `null`, tool will search default directory for the given dataset.", + ) + cc_reference_annotator_should_train: bool = Field( + True, description="True if new reference annotator model shall be build, False otherwise." + ) + + enable_progress_bars: bool = Field( + True, description="If true, progress bars will be printed to stdout during computation." + ) + + def _get_nondefault_keys(self) -> set[str]: + """ + Returns keys of the config that have non-default value, i.e. were provided as kwargs, env. vars. or additionaly set. + """ + return {key for key, value in Configuration.__fields__.items() if getattr(self, key) != value.default} + + def _set_attrs_from_cfg(self, other_cfg: Configuration, fields_to_set: set[str] | None) -> None: + if not fields_to_set: + fields_to_set = set(Configuration.__fields__.keys()) + for field in [x for x in other_cfg.__fields__ if x in fields_to_set]: + setattr(self, field, getattr(other_cfg, field)) + + def load_from_yaml(self, yaml_path: str | Path) -> None: + """ + Will read configuration keys from `yaml_path` and overwrite the corresponding keys in `self`. + Also, will check environment variables with `seccerts_` prefix. + + :param str | Path yaml_path: path to yaml to read for configuration. + """ + with Path(yaml_path).open("r") as handle: + data = yaml.safe_load(handle) + other_cfg = Configuration.parse_obj(data) + keys_to_rewrite = set(data.keys()).union(other_cfg._get_nondefault_keys()) + self._set_attrs_from_cfg(other_cfg, keys_to_rewrite) + + def to_yaml(self, yaml_path: str | Path) -> None: + """ + Will dump the configuration to yaml file. + + :param str | Path yaml_path: path where the configuration will be dumped. + """ + model_dict = json.loads(self.json()) # to assure that we have serializable values + with Path(yaml_path).open("w") as handle: + yaml.safe_dump(model_dict, handle) + + +config = Configuration() diff --git a/src/sec_certs/dataset/cc.py b/src/sec_certs/dataset/cc.py index c2849772..7d0fd207 100644 --- a/src/sec_certs/dataset/cc.py +++ b/src/sec_certs/dataset/cc.py @@ -18,7 +18,7 @@ import sec_certs.utils.sanitization from sec_certs import constants -from sec_certs.config.configuration import config +from sec_certs.configuration import config from sec_certs.dataset.cpe import CPEDataset from sec_certs.dataset.cve import CVEDataset from sec_certs.dataset.dataset import AuxiliaryDatasets, Dataset, logger diff --git a/src/sec_certs/dataset/dataset.py b/src/sec_certs/dataset/dataset.py index 350e5126..d388db2e 100644 --- a/src/sec_certs/dataset/dataset.py +++ b/src/sec_certs/dataset/dataset.py @@ -15,7 +15,7 @@ import pandas as pd from sec_certs import constants -from sec_certs.config.configuration import config +from sec_certs.configuration import config from sec_certs.dataset.cpe import CPEDataset from sec_certs.dataset.cve import CVEDataset from sec_certs.model.cpe_matching import CPEClassifier diff --git a/src/sec_certs/dataset/fips.py b/src/sec_certs/dataset/fips.py index ce7563a0..3c057266 100644 --- a/src/sec_certs/dataset/fips.py +++ b/src/sec_certs/dataset/fips.py @@ -12,7 +12,7 @@ from bs4 import BeautifulSoup, NavigableString from sec_certs import constants -from sec_certs.config.configuration import config +from sec_certs.configuration import config from sec_certs.dataset.cpe import CPEDataset from sec_certs.dataset.cve import CVEDataset from sec_certs.dataset.dataset import AuxiliaryDatasets, Dataset diff --git a/src/sec_certs/dataset/fips_iut.py b/src/sec_certs/dataset/fips_iut.py index ce0f2f76..0251bf9b 100644 --- a/src/sec_certs/dataset/fips_iut.py +++ b/src/sec_certs/dataset/fips_iut.py @@ -8,7 +8,7 @@ import requests from sec_certs import constants -from sec_certs.config.configuration import config +from sec_certs.configuration import config from sec_certs.dataset.dataset import logger from sec_certs.dataset.json_path_dataset import JSONPathDataset from sec_certs.sample.fips_iut import IUTSnapshot diff --git a/src/sec_certs/dataset/fips_mip.py b/src/sec_certs/dataset/fips_mip.py index 05ca5854..1b0d2032 100644 --- a/src/sec_certs/dataset/fips_mip.py +++ b/src/sec_certs/dataset/fips_mip.py @@ -8,7 +8,7 @@ import requests from sec_certs import constants -from sec_certs.config.configuration import config +from sec_certs.configuration import config from sec_certs.dataset.dataset import logger from sec_certs.dataset.json_path_dataset import JSONPathDataset from sec_certs.sample.fips_mip import MIPSnapshot diff --git a/src/sec_certs/dataset/protection_profile.py b/src/sec_certs/dataset/protection_profile.py index 9730a477..af7733a8 100644 --- a/src/sec_certs/dataset/protection_profile.py +++ b/src/sec_certs/dataset/protection_profile.py @@ -8,7 +8,7 @@ from pathlib import Path from sec_certs import constants -from sec_certs.config.configuration import config +from sec_certs.configuration import config from sec_certs.sample.protection_profile import ProtectionProfile from sec_certs.serialization.json import get_class_fullname from sec_certs.utils import helpers diff --git a/src/sec_certs/sample/fips.py b/src/sec_certs/sample/fips.py index f3c82117..82aa1c18 100644 --- a/src/sec_certs/sample/fips.py +++ b/src/sec_certs/sample/fips.py @@ -16,7 +16,7 @@ from sec_certs import constants from sec_certs.cert_rules import FIPS_ALGS_IN_TABLE, fips_rules -from sec_certs.config.configuration import config +from sec_certs.configuration import config from sec_certs.sample.certificate import Certificate, References, logger from sec_certs.sample.certificate import Heuristics as BaseHeuristics from sec_certs.sample.certificate import PdfData as BasePdfData diff --git a/src/sec_certs/sample/fips_iut.py b/src/sec_certs/sample/fips_iut.py index f6010346..a603a2c2 100644 --- a/src/sec_certs/sample/fips_iut.py +++ b/src/sec_certs/sample/fips_iut.py @@ -10,7 +10,7 @@ from bs4 import BeautifulSoup, Tag from sec_certs import constants -from sec_certs.config.configuration import config +from sec_certs.configuration import config from sec_certs.serialization.json import ComplexSerializableType from sec_certs.utils.helpers import to_utc diff --git a/src/sec_certs/sample/fips_mip.py b/src/sec_certs/sample/fips_mip.py index 7e0ddff0..f1ef05c8 100644 --- a/src/sec_certs/sample/fips_mip.py +++ b/src/sec_certs/sample/fips_mip.py @@ -12,7 +12,7 @@ from bs4 import BeautifulSoup, Tag from sec_certs import constants -from sec_certs.config.configuration import config +from sec_certs.configuration import config from sec_certs.constants import FIPS_MIP_STATUS_RE from sec_certs.serialization.json import ComplexSerializableType from sec_certs.utils.helpers import to_utc diff --git a/src/sec_certs/utils/parallel_processing.py b/src/sec_certs/utils/parallel_processing.py index b3016695..ae3f0b44 100644 --- a/src/sec_certs/utils/parallel_processing.py +++ b/src/sec_certs/utils/parallel_processing.py @@ -2,12 +2,10 @@ import time from multiprocessing import cpu_count -from multiprocessing.pool import ThreadPool +from multiprocessing.pool import Pool, ThreadPool from typing import Any, Callable, Iterable -from billiard.pool import Pool - -from sec_certs.config.configuration import config +from sec_certs.configuration import config from sec_certs.utils.tqdm import tqdm diff --git a/src/sec_certs/utils/tqdm.py b/src/sec_certs/utils/tqdm.py index 77eeae94..581295ad 100644 --- a/src/sec_certs/utils/tqdm.py +++ b/src/sec_certs/utils/tqdm.py @@ -1,6 +1,6 @@ from tqdm import tqdm as tqdm_original -from sec_certs.config.configuration import config +from sec_certs.configuration import config def tqdm(*args, **kwargs): diff --git a/tests/conftest.py b/tests/conftest.py index b00c1bbe..1c8d23c0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,10 +3,10 @@ import pytest import tests.data -from sec_certs.config.configuration import config +from sec_certs.configuration import config -@pytest.fixture(scope="session", autouse=True) +@pytest.fixture(scope="module", autouse=True) def load_test_config(): pth = Path(tests.data.__path__[0]) / "settings_tests.yml" - config.load(pth) + config.load_from_yaml(pth) diff --git a/tests/data/settings_tests.yml b/tests/data/settings_tests.yml index cf5a29b7..645f11ab 100644 --- a/tests/data/settings_tests.yml +++ b/tests/data/settings_tests.yml @@ -1,59 +1 @@ ---- -log_filepath: - description: Path to the file, relative to working directory, where the log will be stored - value: ./cert_processing_log.txt -always_false_positive_fips_cert_id_threshold: - description: - During validation we don't connect certificates with number lower than - _this_ to connections due to these numbers being typically false positives - value: 40 -year_difference_between_validations: - description: - During validation we don't connect certificates with validation dates - difference higher than _this_ - value: 7 -n_threads: - description: How many threads to use for parallel computations - value: 8 -cpe_matching_threshold: - description: Level of required string similarity between CPE and certificate name on CC CPE matching, 0-100. Lower values yield more false negatives, higher values more false positives - value: 92 -cpe_n_max_matches: - description: Maximum number of candidate CPE items that may be related to given certificate, >0 - value: 99 -cc_latest_snapshot: - description: URL from where to fetch the latest snapshot of fully processed CC dataset - value: https://seccerts.org/cc/dataset.json -cc_maintenances_latest_snapshot: - description: URL from where to fetch the latest snapshot of CC maintenance updates - value: https://seccerts.org/cc/maintenance_updates.json -pp_latest_snapshot: - description: URL from where to fetch the latest snapshot of the PP dataset - value: https://seccerts.org/static/pp.json -ignore_first_page: - description: During keyword search, first page usually contains addresses - ignore it. - value: true -cert_threshold: - description: Used with --higher-precision-results. Determines the amount of mismatched algorithms to be considered faulty. - value: 5 -fips_latest_snapshot: - description: URL for the latest snapshot of FIPS dataset - value: https://seccerts.org/fips/dataset.json -fips_iut_dataset: - description: URL for the dataset of FIPS IUT data - value: https://seccerts.org/fips/iut/dataset.json -fips_iut_latest_snapshot: - description: URL for the latest snapshot of FIPS IUT data - value: https://seccerts.org/fips/iut/latest.json -fips_mip_dataset: - description: URL for the dataset of FIPS MIP data - value: https://seccerts.org/fips/mip/dataset.json -fips_mip_latest_snapshot: - description: URL for the latest snapshot of FIPS MIP data - value: https://seccerts.org/fips/mip/latest.json -minimal_token_length: - description: Minimal length of a string that will be considered as a token during keyword extraction in CVE matching - value: 3 -enable_progress_bars: - description: Whether to enable pretty-printed progress bars while processing. - value: False +enable_progress_bars: false diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 00000000..39102111 --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,69 @@ +from __future__ import annotations + +import os +from pathlib import Path +from typing import Any + +import pytest +import yaml + +import sec_certs.configuration as config_module +import tests.data + + +@pytest.fixture(autouse=True) +def load_test_config(): + pth = Path(tests.data.__path__[0]) / "settings_tests.yml" + config_module.config.load_from_yaml(pth) + + +@pytest.fixture +def simple_config_dict() -> dict[str, Any]: + return { + "always_false_positive_fips_cert_id_threshold": 42, + "cc_reference_annotator_should_train": False, + } + + +@pytest.fixture +def simple_config_yaml(simple_config_dict, tmp_path) -> Path: + yaml_path = tmp_path / "config.yaml" + with yaml_path.open("w") as handle: + yaml.safe_dump(simple_config_dict, handle) + return yaml_path + + +def test_config_from_yaml(simple_config_dict, simple_config_yaml: Path) -> None: + config_module.config.load_from_yaml(simple_config_yaml) + + for key, val in simple_config_dict.items(): + assert getattr(config_module.config, key) == val + + +def test_load_env_values(simple_config_dict, simple_config_yaml): + os.environ["seccerts_log_filepath"] = "/some/nonsense/path" + os.environ["always_false_positive_fips_cert_id_threshold"] = "10" + + config_module.config.load_from_yaml(simple_config_yaml) + + # this should also beat the env set above + for key, val in simple_config_dict.items(): + assert getattr(config_module.config, key) == val + + assert config_module.config.log_filepath == Path("/some/nonsense/path") + + +def test_complex_config_load(simple_config_dict, simple_config_yaml): + config_module.config.year_difference_between_validations = 123456789 + config_module.config.n_threads = 987654321 + os.environ["seccerts_n_threads"] = "1" + + config_module.config.load_from_yaml(simple_config_yaml) + for key, val in simple_config_dict.items(): + assert getattr(config_module.config, key) == val + + # year_difference_between_validations should not get overwritten + assert config_module.config.year_difference_between_validations == 123456789 + + # n_threads should get overwritten + assert config_module.config.n_threads == 1