From 6ed3e0fc0d9d1501ef7df7f94d2853ebed474b9d Mon Sep 17 00:00:00 2001 From: GeorgeFI Date: Sun, 22 Oct 2023 15:05:18 +0200 Subject: [PATCH] feat: Initial implementation of dataset versioning --- src/sec_certs/dataset/dataset.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/sec_certs/dataset/dataset.py b/src/sec_certs/dataset/dataset.py index 9e49a347..d68aa34f 100644 --- a/src/sec_certs/dataset/dataset.py +++ b/src/sec_certs/dataset/dataset.py @@ -14,6 +14,7 @@ from typing import Any, Generic, Iterator, TypeVar, cast import pandas as pd +from setuptools_scm import get_version from sec_certs import constants from sec_certs.configuration import config @@ -50,6 +51,7 @@ class Dataset(Generic[CertSubType, AuxiliaryDatasetsSubType], ComplexSerializabl @dataclass class DatasetInternalState(ComplexSerializableType): + sec_certs_version: str = get_version(root="..", local_scheme="no-local-version") meta_sources_parsed: bool = False artifacts_downloaded: bool = False pdfs_converted: bool = False @@ -204,6 +206,12 @@ def from_dict(cls: type[DatasetSubType], dct: dict) -> DatasetSubType: @classmethod def from_json(cls: type[DatasetSubType], input_path: str | Path, is_compressed: bool = False) -> DatasetSubType: dset = cast("DatasetSubType", ComplexSerializableType.from_json(input_path, is_compressed)) + dset_version = dset.state.sec_certs_version + tool_version = get_version(root="..", local_scheme="no-local-version") + + if dset_version != tool_version: + logger.warning(f"Dataset version: {dset_version} does not match current version of tool: {tool_version}!") + dset._root_dir = Path(input_path).parent.absolute() dset._set_local_paths() return dset