From 2585d038187a6673b359ea8b7ce4f22dbb8e31fd Mon Sep 17 00:00:00 2001 From: Johannes Nussbaum Date: Thu, 8 Jun 2023 16:07:06 +0200 Subject: [PATCH 1/4] edit --- src/dsp_tools/excel2xml.py | 21 +++++++++++++++---- src/dsp_tools/utils/excel_to_json_lists.py | 5 ++++- .../utils/excel_to_json_properties.py | 11 ++++++++-- .../utils/excel_to_json_resources.py | 15 ++++++++++--- src/dsp_tools/utils/id_to_iri.py | 7 ++++++- src/dsp_tools/utils/project_create.py | 16 +++++++++++--- src/dsp_tools/utils/rosetta.py | 5 ++++- src/dsp_tools/utils/shared.py | 12 +++++++++-- test/e2e/test_00A1_import_scripts.py | 5 ++++- 9 files changed, 79 insertions(+), 18 deletions(-) diff --git a/src/dsp_tools/excel2xml.py b/src/dsp_tools/excel2xml.py index 28e2a9fcf..e6ab4f291 100644 --- a/src/dsp_tools/excel2xml.py +++ b/src/dsp_tools/excel2xml.py @@ -257,7 +257,10 @@ def prepare_value( return [x if isinstance(x, PropertyElement) else PropertyElement(x) for x in value] -def make_root(shortcode: str, default_ontology: str) -> etree._Element: +def make_root( + shortcode: str, + default_ontology: str, +) -> etree._Element: """ Start building your XML document by creating the root element . @@ -444,7 +447,11 @@ def make_bitstream_prop( return prop_ -def _format_bool(unformatted: Union[bool, str, int], name: str, calling_resource: str) -> str: +def _format_bool( + unformatted: Union[bool, str, int], + name: str, + calling_resource: str, +) -> str: """ This method takes an unformatted boolean-like value, and transforms it into the string values "true" or "false". @@ -1769,7 +1776,10 @@ def create_json_list_mapping( return res -def _name_label_mapper_iterator(json_subset: list[dict[str, Any]], language_label: str) -> Iterable[tuple[str, str]]: +def _name_label_mapper_iterator( + json_subset: list[dict[str, Any]], + language_label: str, +) -> Iterable[tuple[str, str]]: """ returns (label, name) pairs of JSON project list entries """ @@ -1785,7 +1795,10 @@ def _name_label_mapper_iterator(json_subset: list[dict[str, Any]], language_labe # the actual values of the name and the label -def write_xml(root: etree._Element, filepath: str) -> None: +def write_xml( + root: etree._Element, + filepath: str +) -> None: """ Write the finished XML to a file diff --git a/src/dsp_tools/utils/excel_to_json_lists.py b/src/dsp_tools/utils/excel_to_json_lists.py index adcce263f..7413de6ad 100644 --- a/src/dsp_tools/utils/excel_to_json_lists.py +++ b/src/dsp_tools/utils/excel_to_json_lists.py @@ -195,7 +195,10 @@ def _get_values_from_excel( return row - 1, parentnode -def _make_json_lists_from_excel(excel_file_paths: list[str], verbose: bool = False) -> list[dict[str, Any]]: +def _make_json_lists_from_excel( + excel_file_paths: list[str], + verbose: bool = False, +) -> list[dict[str, Any]]: """ Reads Excel files and transforms them into a list of dictionaries that can be used as "lists" array of a JSON project file. diff --git a/src/dsp_tools/utils/excel_to_json_properties.py b/src/dsp_tools/utils/excel_to_json_properties.py index 29243217e..e8ab999f5 100644 --- a/src/dsp_tools/utils/excel_to_json_properties.py +++ b/src/dsp_tools/utils/excel_to_json_properties.py @@ -14,7 +14,10 @@ languages = ["en", "de", "fr", "it", "rm"] -def _validate_properties(properties_list: list[dict[str, Any]], excelfile: str) -> bool: +def _validate_properties( + properties_list: list[dict[str, Any]], + excelfile: str, +) -> bool: """ This function checks if the "properties" section of a JSON project file is valid according to the JSON schema, and if the property names are unique. @@ -75,7 +78,11 @@ def _validate_properties(properties_list: list[dict[str, Any]], excelfile: str) return True -def _row2prop(row: pd.Series, row_count: int, excelfile: str) -> dict[str, Any]: +def _row2prop( + row: pd.Series, + row_count: int, + excelfile: str, +) -> dict[str, Any]: """ Takes a row from a pandas DataFrame, reads its content, and returns a dict object of the property diff --git a/src/dsp_tools/utils/excel_to_json_resources.py b/src/dsp_tools/utils/excel_to_json_resources.py index cf7930390..115df9ead 100644 --- a/src/dsp_tools/utils/excel_to_json_resources.py +++ b/src/dsp_tools/utils/excel_to_json_resources.py @@ -14,7 +14,10 @@ languages = ["en", "de", "fr", "it", "rm"] -def _validate_resources(resources_list: list[dict[str, Any]], excelfile: str) -> bool: +def _validate_resources( + resources_list: list[dict[str, Any]], + excelfile: str, +) -> bool: """ This function checks if the "resources" section of a JSON project file is valid according to the JSON schema, and if the resource names are unique. @@ -87,7 +90,10 @@ def _validate_resources(resources_list: list[dict[str, Any]], excelfile: str) -> return True -def _row2resource(row: pd.Series, excelfile: str) -> dict[str, Any]: +def _row2resource( + row: pd.Series, + excelfile: str, +) -> dict[str, Any]: """ Method that reads one row from the "classes" DataFrame, opens the corresponding details DataFrame, @@ -183,7 +189,10 @@ def _row2resource(row: pd.Series, excelfile: str) -> dict[str, Any]: return resource -def excel2resources(excelfile: str, path_to_output_file: Optional[str] = None) -> tuple[list[dict[str, Any]], bool]: +def excel2resources( + excelfile: str, + path_to_output_file: Optional[str] = None, +) -> tuple[list[dict[str, Any]], bool]: """ Converts resources described in an Excel file into a "resources" section which can be inserted into a JSON project file. diff --git a/src/dsp_tools/utils/id_to_iri.py b/src/dsp_tools/utils/id_to_iri.py index d7c5f2c9f..a78ea780f 100644 --- a/src/dsp_tools/utils/id_to_iri.py +++ b/src/dsp_tools/utils/id_to_iri.py @@ -12,7 +12,12 @@ from dsp_tools.models.exceptions import BaseError -def id_to_iri(xml_file: str, json_file: str, out_file: Optional[str], verbose: bool) -> bool: +def id_to_iri( + xml_file: str, + json_file: str, + out_file: Optional[str], + verbose: bool, +) -> bool: """ This function replaces all occurrences of internal IDs with their respective IRIs inside an XML file. It gets the mapping from the JSON file provided as parameter for this function. diff --git a/src/dsp_tools/utils/project_create.py b/src/dsp_tools/utils/project_create.py index f83d41ab5..4f4cc6408 100644 --- a/src/dsp_tools/utils/project_create.py +++ b/src/dsp_tools/utils/project_create.py @@ -126,7 +126,11 @@ def _update_basic_info_of_project( return project, False -def _create_groups(con: Connection, groups: list[dict[str, str]], project: Project) -> tuple[dict[str, Group], bool]: +def _create_groups( + con: Connection, + groups: list[dict[str, str]], + project: Project, +) -> tuple[dict[str, Group], bool]: """ Creates groups on a DSP server from the "groups" section of a JSON project file. If a group cannot be created, it is skipped and a warning is printed, but such a group will still be part of the returned dict. @@ -423,7 +427,10 @@ def _create_users( return overall_success -def _sort_resources(unsorted_resources: list[dict[str, Any]], onto_name: str) -> list[dict[str, Any]]: +def _sort_resources( + unsorted_resources: list[dict[str, Any]], + onto_name: str, +) -> list[dict[str, Any]]: """ This method sorts the resource classes in an ontology according to their inheritance order (parent classes first). @@ -455,7 +462,10 @@ def _sort_resources(unsorted_resources: list[dict[str, Any]], onto_name: str) -> return sorted_resources -def _sort_prop_classes(unsorted_prop_classes: list[dict[str, Any]], onto_name: str) -> list[dict[str, Any]]: +def _sort_prop_classes( + unsorted_prop_classes: list[dict[str, Any]], + onto_name: str, +) -> list[dict[str, Any]]: """ In case of inheritance, parent properties must be uploaded before their children. This method sorts the properties. diff --git a/src/dsp_tools/utils/rosetta.py b/src/dsp_tools/utils/rosetta.py index ff9f4fde2..55f6d4bea 100644 --- a/src/dsp_tools/utils/rosetta.py +++ b/src/dsp_tools/utils/rosetta.py @@ -31,7 +31,10 @@ def _update_possibly_existing_repo(rosetta_folder: Path) -> bool: return is_rosetta_up_to_date -def _clone_repo(rosetta_folder: Path, enclosing_folder: Path) -> None: +def _clone_repo( + rosetta_folder: Path, + enclosing_folder: Path, +) -> None: """ Clones the rosetta repo into the enclosing folder. diff --git a/src/dsp_tools/utils/shared.py b/src/dsp_tools/utils/shared.py index 4668a0f18..afcfe4b92 100644 --- a/src/dsp_tools/utils/shared.py +++ b/src/dsp_tools/utils/shared.py @@ -22,7 +22,11 @@ logger = get_logger(__name__) -def login(server: str, user: str, password: str) -> Connection: +def login( + server: str, + user: str, + password: str, +) -> Connection: """ Creates a connection, makes a login (handling temporary network interruptions), @@ -200,7 +204,11 @@ def _validate_xml_tags_in_text_properties(doc: Union[etree._ElementTree[etree._E return True -def prepare_dataframe(df: pd.DataFrame, required_columns: list[str], location_of_sheet: str) -> pd.DataFrame: +def prepare_dataframe( + df: pd.DataFrame, + required_columns: list[str], + location_of_sheet: str, +) -> pd.DataFrame: """ Takes a pandas DataFrame, strips the column headers from whitespaces and transforms them to lowercase, diff --git a/test/e2e/test_00A1_import_scripts.py b/test/e2e/test_00A1_import_scripts.py index 4b5ce5e99..81e81e901 100644 --- a/test/e2e/test_00A1_import_scripts.py +++ b/test/e2e/test_00A1_import_scripts.py @@ -71,7 +71,10 @@ def test_import_scripts(self) -> None: self.assertTrue(success_on_xmlupload) -def _derandomize_xsd_id(string: str, multiple_occurrences: bool = False) -> str: +def _derandomize_xsd_id( + string: str, + multiple_occurrences: bool = False, +) -> str: """ In some contexts, the random component of the output of make_xsd_id_compatible() is a hindrance, especially for testing. From 24d2b810c295753ab962a8f2f290a7752b6474e0 Mon Sep 17 00:00:00 2001 From: Johannes Nussbaum Date: Thu, 8 Jun 2023 16:10:42 +0200 Subject: [PATCH 2/4] blacken --- src/dsp_tools/excel2xml.py | 12 ++++++------ src/dsp_tools/utils/excel_to_json_lists.py | 2 +- src/dsp_tools/utils/excel_to_json_properties.py | 6 +++--- src/dsp_tools/utils/excel_to_json_resources.py | 6 +++--- src/dsp_tools/utils/id_to_iri.py | 6 +++--- src/dsp_tools/utils/project_create.py | 8 ++++---- src/dsp_tools/utils/rosetta.py | 2 +- src/dsp_tools/utils/shared.py | 8 ++++---- test/e2e/test_00A1_import_scripts.py | 2 +- 9 files changed, 26 insertions(+), 26 deletions(-) diff --git a/src/dsp_tools/excel2xml.py b/src/dsp_tools/excel2xml.py index e6ab4f291..7a1c99f1d 100644 --- a/src/dsp_tools/excel2xml.py +++ b/src/dsp_tools/excel2xml.py @@ -258,7 +258,7 @@ def prepare_value( def make_root( - shortcode: str, + shortcode: str, default_ontology: str, ) -> etree._Element: """ @@ -448,8 +448,8 @@ def make_bitstream_prop( def _format_bool( - unformatted: Union[bool, str, int], - name: str, + unformatted: Union[bool, str, int], + name: str, calling_resource: str, ) -> str: """ @@ -1777,7 +1777,7 @@ def create_json_list_mapping( def _name_label_mapper_iterator( - json_subset: list[dict[str, Any]], + json_subset: list[dict[str, Any]], language_label: str, ) -> Iterable[tuple[str, str]]: """ @@ -1796,8 +1796,8 @@ def _name_label_mapper_iterator( def write_xml( - root: etree._Element, - filepath: str + root: etree._Element, + filepath: str, ) -> None: """ Write the finished XML to a file diff --git a/src/dsp_tools/utils/excel_to_json_lists.py b/src/dsp_tools/utils/excel_to_json_lists.py index 7413de6ad..1c542a363 100644 --- a/src/dsp_tools/utils/excel_to_json_lists.py +++ b/src/dsp_tools/utils/excel_to_json_lists.py @@ -196,7 +196,7 @@ def _get_values_from_excel( def _make_json_lists_from_excel( - excel_file_paths: list[str], + excel_file_paths: list[str], verbose: bool = False, ) -> list[dict[str, Any]]: """ diff --git a/src/dsp_tools/utils/excel_to_json_properties.py b/src/dsp_tools/utils/excel_to_json_properties.py index e8ab999f5..1bd6a7309 100644 --- a/src/dsp_tools/utils/excel_to_json_properties.py +++ b/src/dsp_tools/utils/excel_to_json_properties.py @@ -15,7 +15,7 @@ def _validate_properties( - properties_list: list[dict[str, Any]], + properties_list: list[dict[str, Any]], excelfile: str, ) -> bool: """ @@ -79,8 +79,8 @@ def _validate_properties( def _row2prop( - row: pd.Series, - row_count: int, + row: pd.Series, + row_count: int, excelfile: str, ) -> dict[str, Any]: """ diff --git a/src/dsp_tools/utils/excel_to_json_resources.py b/src/dsp_tools/utils/excel_to_json_resources.py index 115df9ead..c7a3e330b 100644 --- a/src/dsp_tools/utils/excel_to_json_resources.py +++ b/src/dsp_tools/utils/excel_to_json_resources.py @@ -15,7 +15,7 @@ def _validate_resources( - resources_list: list[dict[str, Any]], + resources_list: list[dict[str, Any]], excelfile: str, ) -> bool: """ @@ -91,7 +91,7 @@ def _validate_resources( def _row2resource( - row: pd.Series, + row: pd.Series, excelfile: str, ) -> dict[str, Any]: """ @@ -190,7 +190,7 @@ def _row2resource( def excel2resources( - excelfile: str, + excelfile: str, path_to_output_file: Optional[str] = None, ) -> tuple[list[dict[str, Any]], bool]: """ diff --git a/src/dsp_tools/utils/id_to_iri.py b/src/dsp_tools/utils/id_to_iri.py index a78ea780f..a6efefb49 100644 --- a/src/dsp_tools/utils/id_to_iri.py +++ b/src/dsp_tools/utils/id_to_iri.py @@ -13,9 +13,9 @@ def id_to_iri( - xml_file: str, - json_file: str, - out_file: Optional[str], + xml_file: str, + json_file: str, + out_file: Optional[str], verbose: bool, ) -> bool: """ diff --git a/src/dsp_tools/utils/project_create.py b/src/dsp_tools/utils/project_create.py index 4f4cc6408..6b165396f 100644 --- a/src/dsp_tools/utils/project_create.py +++ b/src/dsp_tools/utils/project_create.py @@ -127,8 +127,8 @@ def _update_basic_info_of_project( def _create_groups( - con: Connection, - groups: list[dict[str, str]], + con: Connection, + groups: list[dict[str, str]], project: Project, ) -> tuple[dict[str, Group], bool]: """ @@ -428,7 +428,7 @@ def _create_users( def _sort_resources( - unsorted_resources: list[dict[str, Any]], + unsorted_resources: list[dict[str, Any]], onto_name: str, ) -> list[dict[str, Any]]: """ @@ -463,7 +463,7 @@ def _sort_resources( def _sort_prop_classes( - unsorted_prop_classes: list[dict[str, Any]], + unsorted_prop_classes: list[dict[str, Any]], onto_name: str, ) -> list[dict[str, Any]]: """ diff --git a/src/dsp_tools/utils/rosetta.py b/src/dsp_tools/utils/rosetta.py index 55f6d4bea..45ed6cc95 100644 --- a/src/dsp_tools/utils/rosetta.py +++ b/src/dsp_tools/utils/rosetta.py @@ -32,7 +32,7 @@ def _update_possibly_existing_repo(rosetta_folder: Path) -> bool: def _clone_repo( - rosetta_folder: Path, + rosetta_folder: Path, enclosing_folder: Path, ) -> None: """ diff --git a/src/dsp_tools/utils/shared.py b/src/dsp_tools/utils/shared.py index afcfe4b92..6958461bd 100644 --- a/src/dsp_tools/utils/shared.py +++ b/src/dsp_tools/utils/shared.py @@ -23,8 +23,8 @@ def login( - server: str, - user: str, + server: str, + user: str, password: str, ) -> Connection: """ @@ -205,8 +205,8 @@ def _validate_xml_tags_in_text_properties(doc: Union[etree._ElementTree[etree._E def prepare_dataframe( - df: pd.DataFrame, - required_columns: list[str], + df: pd.DataFrame, + required_columns: list[str], location_of_sheet: str, ) -> pd.DataFrame: """ diff --git a/test/e2e/test_00A1_import_scripts.py b/test/e2e/test_00A1_import_scripts.py index 81e81e901..8ac548a3f 100644 --- a/test/e2e/test_00A1_import_scripts.py +++ b/test/e2e/test_00A1_import_scripts.py @@ -72,7 +72,7 @@ def test_import_scripts(self) -> None: def _derandomize_xsd_id( - string: str, + string: str, multiple_occurrences: bool = False, ) -> str: """ From f7f98e7b352296259f5b7a478f99f201bd0f08b0 Mon Sep 17 00:00:00 2001 From: Johannes Nussbaum Date: Thu, 8 Jun 2023 16:27:13 +0200 Subject: [PATCH 3/4] markdownlint: reduce line length to 125 --- .markdownlint.yml | 7 +++---- README.md | 3 ++- docs/cli-commands.md | 2 +- docs/developers/git-submodules.md | 3 ++- docs/file-formats/excel2json.md | 4 ++-- docs/file-formats/json-project/caveats.md | 3 ++- docs/file-formats/xml-data-file.md | 9 ++++++--- docs/internal/fast-xmlupload.md | 6 ++++-- src/dsp_tools/import_scripts | 2 +- 9 files changed, 23 insertions(+), 16 deletions(-) diff --git a/.markdownlint.yml b/.markdownlint.yml index bdfcae69e..15fdc30fa 100644 --- a/.markdownlint.yml +++ b/.markdownlint.yml @@ -20,16 +20,15 @@ MD012: false # MD013/line-length - Line length MD013: - line_length: 150 - heading_line_length: 150 - code_block_line_length: 150 + line_length: 125 + heading_line_length: 125 + code_block_line_length: 125 # Include code blocks code_blocks: true # Include tables tables: false # Include headings headings: true - # Include headings headers: true # Strict length checking strict: false diff --git a/README.md b/README.md index f44adb088..9f3ea24bb 100644 --- a/README.md +++ b/README.md @@ -94,7 +94,8 @@ Find more information in the ## Publishing/distribution Publishing is automated with GitHub Actions and should _not_ be done manually. -Please follow the [Pull Request Guidelines](https://docs.dasch.swiss/latest/developers/dsp/contribution/#pull-request-guidelines). +Please follow the +[Pull Request Guidelines](https://docs.dasch.swiss/latest/developers/dsp/contribution/#pull-request-guidelines). If done correctly, when merging a pull request into `main`, the `release-please` action will create or update a release PR. This PR will follow semantic versioning and update the change log. diff --git a/docs/cli-commands.md b/docs/cli-commands.md index ab29970b3..ee1c1d4df 100644 --- a/docs/cli-commands.md +++ b/docs/cli-commands.md @@ -105,7 +105,7 @@ The following options are available: - `-s` | `--server` (optional, default: `0.0.0.0:3333`): URL of the DSP server where DSP-TOOLS sends the data to - `-u` | `--user` (optional, default: `root@example.com`): username (e-mail) used for authentication with the DSP-API - `-p` | `--password` (optional, default: `test`): password used for authentication with the DSP-API -- `-S` | `--sipi` (optional, default: `http://0.0.0.0:1024`): URL of the SIPI server where DSP-TOOLS sends the multimedia files to +- `-S` | `--sipi` (optional, default: `http://0.0.0.0:1024`): URL of SIPI server where DSP-TOOLS sends multimedia files to - `-i` | `--imgdir` (optional, default: `.`): folder from where the paths in the `` tags are evaluated - `-I` | `--incremental` (optional) : The links in the XML file point to IRIs (on the server) instead of IDs (in the same XML file). diff --git a/docs/developers/git-submodules.md b/docs/developers/git-submodules.md index 348e899c5..4de5227e4 100644 --- a/docs/developers/git-submodules.md +++ b/docs/developers/git-submodules.md @@ -2,7 +2,8 @@ # Git submodules -This repository embeds [https://github.com/dasch-swiss/00A1-import-scripts](https://github.com/dasch-swiss/00A1-import-scripts) +This repository embeds +[https://github.com/dasch-swiss/00A1-import-scripts](https://github.com/dasch-swiss/00A1-import-scripts) as a Git submodule in `src/dsp_tools/import_scripts`. That means that `src/dsp_tools/import_scripts` has no contents, but only a reference to a certain commit in the main branch of `00A1-import-scripts`. When you clone DSP-TOOLS from GitHub as usual, `src/dsp_tools/import_scripts` will be empty. diff --git a/docs/file-formats/excel2json.md b/docs/file-formats/excel2json.md index 4f3b2122e..fb8314172 100644 --- a/docs/file-formats/excel2json.md +++ b/docs/file-formats/excel2json.md @@ -119,8 +119,8 @@ Only the first worksheet of the Excel file is considered and only XLSX files are The `properties` section can be inserted into the ontology file and then be uploaded onto a DSP server. **An Excel file template can be found [here](../assets/data_model_templates/rosetta%20(rosetta)/properties.xlsx) -or also in the -[`data_model_files` folder of `00A1-import-scripts`](https://github.com/dasch-swiss/00A1-import-scripts/tree/main/data_model_files). +or also in the `data_model_files` folder of +[`00A1-import-scripts`](https://github.com/dasch-swiss/00A1-import-scripts/tree/main/data_model_files). It is recommended to work from the template.** The Excel sheet must have the following structure: diff --git a/docs/file-formats/json-project/caveats.md b/docs/file-formats/json-project/caveats.md index 9ee4f7a6f..3b6b9f4fc 100644 --- a/docs/file-formats/json-project/caveats.md +++ b/docs/file-formats/json-project/caveats.md @@ -63,7 +63,8 @@ subclassed and used in a resource class. [<link> tag](../xml-data-file.md#link) - `hasGeometry`: Defines a geometry value (a JSON describing a polygon, circle or rectangle). - must be used directly in the XML data file in the [<region> tag](../xml-data-file.md#region) -- `isRegionOf`: A special variant of `hasLinkTo`. It means that the given resource class is a region of interest in an image. +- `isRegionOf`: A special variant of `hasLinkTo`. + It means that the given resource class is a region of interest in an image. - must be used directly in the XML data file in the [<region> tag](../xml-data-file.md#region) - `isAnnotationOf`: A special variant of `hasLinkTo`. It means that the given resource class is an annotation to another resource class. diff --git a/docs/file-formats/xml-data-file.md b/docs/file-formats/xml-data-file.md index 0b368361e..10cb9b912 100644 --- a/docs/file-formats/xml-data-file.md +++ b/docs/file-formats/xml-data-file.md @@ -74,8 +74,10 @@ A group can have exactly one of these rights: - `RV` _restricted view permission_: Same as `V`, but if it is applied to an image, the image is shown with a reduced resolution or with a watermark overlay. - `V` _view permission_: The user can view a resource or a value, but cannot modify it. -- `M` _modify permission_: The user can modify the element, but cannot mark it as deleted. The original resource or value will be preserved. -- `D` _delete permission_: The user is allowed to mark an element as deleted. The original resource or value will be preserved. +- `M` _modify permission_: The user can modify the element, but cannot mark it as deleted. + The original resource or value will be preserved. +- `D` _delete permission_: The user is allowed to mark an element as deleted. + The original resource or value will be preserved. - `CR` _change right permission_: The user can change the permission of a resource or value. The user is also allowed to permanently delete (erase) a resource. @@ -154,7 +156,8 @@ A `` element contains all necessary information to create a resource. - `ark` (optional): a version 0 ARK, used when migrating existing resources. It is not possible to use `iri` and `ark` in the same resource. When `ark` is used, it overrides `iri` (DaSCH-internal only). - `creation_date` (optional): the creation date of the resource, used when migrating existing resources. - It must be formatted according to the constraints of [xsd:dateTimeStamp](https://www.w3.org/TR/xmlschema11-2/#dateTimeStamp), + It must be formatted according to the constraints of + [xsd:dateTimeStamp](https://www.w3.org/TR/xmlschema11-2/#dateTimeStamp), which means that the timezone is required, e.g.: `2005-10-23T13:45:12.502951+02:00` (DaSCH-internal only) A complete `` element may look as follows: diff --git a/docs/internal/fast-xmlupload.md b/docs/internal/fast-xmlupload.md index cb07b1e68..4175e547c 100644 --- a/docs/internal/fast-xmlupload.md +++ b/docs/internal/fast-xmlupload.md @@ -60,7 +60,8 @@ The following options are available: - `--input-dir` (mandatory): path to the input directory where the files should be read from - `--output-dir` (mandatory): path to the output directory where the processed/transformed files should be written to -- `--nthreads` (optional, default computed by the concurrent library, dependent on the machine): number of threads to use for processing +- `--nthreads` (optional, default computed by the concurrent library, dependent on the machine): + number of threads to use for processing All files referenced in the `` tags of the XML are expected to be in the input directory @@ -89,7 +90,8 @@ The following options are available: - `-f` | `--pkl-file` (mandatory): path to the pickle file that was written by the processing step - `-d` | `--processed-dir` (mandatory): path to the directory where the processed files are located (same as `--output-dir` in the processing step) -- `-n` | `--nthreads` (optional, default 4): number of threads to use for uploading (optimum depends on the number of CPUs on the server) +- `-n` | `--nthreads` (optional, default 4): number of threads to use for uploading + (optimum depends on the number of CPUs on the server) - `-s` | `--server` (optional, default: `0.0.0.0:3333`): URL of the DSP server - `-S` | `--sipi-url` (optional, default: `0.0.0.0:1024`): URL of the SIPI server - `-u` | `--user` (optional, default: `root@example.com`): username (e-mail) used for authentication with the DSP-API diff --git a/src/dsp_tools/import_scripts b/src/dsp_tools/import_scripts index 438199bf3..119877793 160000 --- a/src/dsp_tools/import_scripts +++ b/src/dsp_tools/import_scripts @@ -1 +1 @@ -Subproject commit 438199bf3a19aa1b765ba0d6016440ece3cf2d0f +Subproject commit 1198777939d3e171d5034b75192329a84a78bbcf From 1b607617145fb6d6a894375b75d2d853401414c5 Mon Sep 17 00:00:00 2001 From: Johannes Nussbaum Date: Thu, 8 Jun 2023 17:14:39 +0200 Subject: [PATCH 4/4] reduce line lenght of markdown to 120 --- .markdownlint.yml | 6 +-- docs/cli-commands.md | 3 +- docs/developers/git-submodules.md | 7 ++- docs/developers/mkdocs.md | 3 +- docs/excel2xml-module.md | 42 ++++++++++------ docs/file-formats/json-project/ontologies.md | 30 +++++++----- docs/file-formats/json-project/overview.md | 17 ++++--- docs/file-formats/xml-data-file.md | 50 +++++++++++++------- 8 files changed, 101 insertions(+), 57 deletions(-) diff --git a/.markdownlint.yml b/.markdownlint.yml index 15fdc30fa..c06cdffb2 100644 --- a/.markdownlint.yml +++ b/.markdownlint.yml @@ -20,9 +20,9 @@ MD012: false # MD013/line-length - Line length MD013: - line_length: 125 - heading_line_length: 125 - code_block_line_length: 125 + line_length: 120 + heading_line_length: 120 + code_block_line_length: 120 # Include code blocks code_blocks: true # Include tables diff --git a/docs/cli-commands.md b/docs/cli-commands.md index ee1c1d4df..b926a7e07 100644 --- a/docs/cli-commands.md +++ b/docs/cli-commands.md @@ -105,7 +105,8 @@ The following options are available: - `-s` | `--server` (optional, default: `0.0.0.0:3333`): URL of the DSP server where DSP-TOOLS sends the data to - `-u` | `--user` (optional, default: `root@example.com`): username (e-mail) used for authentication with the DSP-API - `-p` | `--password` (optional, default: `test`): password used for authentication with the DSP-API -- `-S` | `--sipi` (optional, default: `http://0.0.0.0:1024`): URL of SIPI server where DSP-TOOLS sends multimedia files to +- `-S` | `--sipi` (optional, default: `http://0.0.0.0:1024`): + URL of the SIPI server where DSP-TOOLS sends the multimedia files to - `-i` | `--imgdir` (optional, default: `.`): folder from where the paths in the `` tags are evaluated - `-I` | `--incremental` (optional) : The links in the XML file point to IRIs (on the server) instead of IDs (in the same XML file). diff --git a/docs/developers/git-submodules.md b/docs/developers/git-submodules.md index 4de5227e4..ac1b990cf 100644 --- a/docs/developers/git-submodules.md +++ b/docs/developers/git-submodules.md @@ -81,8 +81,11 @@ confused that the path to the submodule changed. If this doesn't help, it might ## Actively working with the contents of the submodule -After retrieving the contents of a submodule as described in the paragraph above, it is in "detached HEAD" state. Before -committing to it, the `main` branch needs to be checked out. The order how to proceed is the following: +After retrieving the contents of a submodule as described in the paragraph above, +it is in "detached HEAD" state. +Before committing to it, +the `main` branch needs to be checked out. +The order how to proceed is the following: ```bash cd src/dsp_tools/import_scripts diff --git a/docs/developers/mkdocs.md b/docs/developers/mkdocs.md index bbeec037c..e92b13fdc 100644 --- a/docs/developers/mkdocs.md +++ b/docs/developers/mkdocs.md @@ -79,7 +79,8 @@ without anyone noticing. (see [here](https://facelessuser.github.io/pymdown-extensions/extras/slugs/)). - markdown-link-validator uses [uslug](https://www.npmjs.com/package/uslug) to create the slugs (see [here](https://github.com/webhintio/markdown-link-validator/blob/main/src/lib/mdfile.ts)). -- VS Code targets the CommonMark Markdown specification using the [markdown-it](https://github.com/markdown-it/markdown-it) library +- VS Code targets the CommonMark Markdown specification using the + [markdown-it](https://github.com/markdown-it/markdown-it) library (see [here](https://code.visualstudio.com/docs/languages/markdown#_does-vs-code-support-github-flavored-markdown)). Another useful reading is [here](https://github.com/yzhang-gh/vscode-markdown/issues/807). diff --git a/docs/excel2xml-module.md b/docs/excel2xml-module.md index 535549da9..a3473bbd8 100644 --- a/docs/excel2xml-module.md +++ b/docs/excel2xml-module.md @@ -26,16 +26,19 @@ This can be done with the [CLI command `excel2xml`](./cli-commands.md#excel2xml) ## Module `excel2xml`: Convert a data source to XML -To demonstrate the usage of the `excel2xml` module, there is a GitHub repository named `00A1-import-scripts`. It -contains: +To demonstrate the usage of the `excel2xml` module, +there is a GitHub repository named `00A1-import-scripts`. +It contains: - a sample JSON project file - sample data that fits the data model of the JSON project file - a sample Python script that demonstrates how to use the module `excel2xml`. -Navigate to [https://github.com/dasch-swiss/00A1-import-scripts](https://github.com/dasch-swiss/00A1-import-scripts) and -follow the steps described there. The README will teach you some basics that will be necessary to work with `excel2xml`. -Once you are familiar with the basics, return to this page to learn how the sample Python script works. +Navigate to [https://github.com/dasch-swiss/00A1-import-scripts](https://github.com/dasch-swiss/00A1-import-scripts) +and follow the steps described there. +The README will teach you some basics that will be necessary to work with `excel2xml`. +Once you are familiar with the basics, +return to this page to learn how the sample Python script works. This is the simplified pattern how the Python script works: @@ -93,10 +96,13 @@ here](./file-formats/xml-data-file.md#using-permissions-with-the-permissions-att ## 4. Create list mappings -Let's assume that your data source has a column containing list values named after the "label" of the JSON project list, -instead of the "name" which is needed for the `dsp-tools xmlupload`. You need a way to get the names from the labels. -If your data source uses the labels correctly, this is an easy task: The method `create_json_list_mapping()` creates a -dictionary that maps the labels to the names: +Let's assume that your data source has a column +containing list values named after the "label" of the JSON project list, +instead of the "name" which is needed for the `dsp-tools xmlupload`. +You need a way to get the names from the labels. +If your data source uses the labels correctly, +this is an easy task: +The method `create_json_list_mapping()` creates a dictionary that maps the labels to the names: The list "category" in `00A1-import-scripts/import_project.json` looks as follows: @@ -140,10 +146,13 @@ If you pass this list to `create_json_list_mapping()`, it creates the following ``` -If, however, your data source has spelling variants, you need the more sophisticated approach of -`create_json_excel_list_mapping()`: This method creates a dictionary that maps the list values in your data source to their -correct JSON project node name. This happens based on string similarity. Please carefully check the result if there are -no false matches! +If, however, your data source has spelling variants, +you need the more sophisticated approach of `create_json_excel_list_mapping()`: +This method creates a dictionary +that maps the list values in your data source +to their correct JSON project node name. +This happens based on string similarity. +Please carefully check the result if there are no false matches! The column "Category" in `00A1-import-scripts/data_raw.csv` has spelling mistakes: ![column category](./assets/images/img-excel2xml-raw-data-category.png) @@ -254,8 +263,11 @@ For `make_boolean_prop(cell)`, the following formats are supported: - true: True, "true", "True", "1", 1, "yes", "Yes" - false: False, "false", "False", "0", 0, "no", "No" -N/A-like values will raise an Error. So if your cell is empty, this method will not count it as false, but will raise an -Error. If you want N/A-like values to be counted as false, you may use a construct like this: +N/A-like values will raise an Error. +So if your cell is empty, this method will not count it as false, +but will raise an Error. +If you want N/A-like values to be counted as false, +you may use a construct like this: ```python if excel2xml.check_notna(cell): diff --git a/docs/file-formats/json-project/ontologies.md b/docs/file-formats/json-project/ontologies.md index 872ec0cca..a6680153a 100644 --- a/docs/file-formats/json-project/ontologies.md +++ b/docs/file-formats/json-project/ontologies.md @@ -466,16 +466,18 @@ Example: `"object": "ListValue"` -Represents a node of a (possibly hierarchical) list. See the [xmlupload documentation](../xml-data-file.md#list-prop) +Represents a node of a (possibly hierarchical) list. +See the [xmlupload documentation](../xml-data-file.md#list-prop) for more information. *gui_elements / gui_attributes*: -- `List`: A GUI element for *ListValue*. A dropdown to select a list node. This GUI element should be chosen for - hierarchical lists or flat lists that could be expanded to hierarchical lists in the future. +- `List`: A GUI element for *ListValue*. A dropdown to select a list node. - *gui_attributes*: - `hlist=` (required): The name of a list defined in the ["lists" section](./overview.md#lists). -- `Radio` and `Pulldown` are allowed, too, but they don't have a different behaviour than `List`. It is recommended to use `List`. +- `Radio` and `Pulldown` are allowed, too, + but they don't have a different behaviour than `List`. + It is recommended to use `List`. Example: @@ -713,14 +715,18 @@ Example: `"object": ""` -A special case of linked resources are resources in a part-whole relation, i.e. resources that are composed of -other resources. A `isPartOf` property has to be added to the resource that is part of another resource. In case of -resources that are of type `StillImageRepresentation`, an additional property derived from `seqnum` with object `IntValue` -is required. When defined, the user is able to leaf through the parts of a compound object, p.ex. to leaf through pages -of a book. - -The DSP base properties `isPartOf` and `seqnum` can be used to derive a custom property from them, or they can be used -directly as cardinalities in a resource. The example belows shows both possibilities. +A special case of linked resources are resources in a part-whole relation, +i.e. resources that are composed of other resources. +A `isPartOf` property has to be added to the resource that is part of another resource. +In case of resources that are of type `StillImageRepresentation`, +an additional property derived from `seqnum` with object `IntValue` is required. +When defined, the user is able to leaf through the parts of a compound object, +e.g. to leaf through pages of a book. + +The DSP base properties `isPartOf` and `seqnum` +can be used to derive a custom property from them, +or they can be used directly as cardinalities in a resource. +The example belows shows both possibilities. *gui_elements/gui_attributes*: diff --git a/docs/file-formats/json-project/overview.md b/docs/file-formats/json-project/overview.md index 0e2e8ac70..14e63eb6a 100644 --- a/docs/file-formats/json-project/overview.md +++ b/docs/file-formats/json-project/overview.md @@ -2,12 +2,15 @@ # JSON project definition format -This document describes the structure of a JSON project definition file that can be uploaded to a DSP server +This document describes the structure of a JSON project definition file +that can be uploaded to a DSP server with the [`create`](../../cli-commands.md#create) command. -A project on a DSP server is like a container for data. It defines some basic metadata, the data model(s) and optionally -the user(s) who will be able to access the data. After the creation of a project, data can be uploaded that conforms -with the data model(s). +A project on a DSP server is like a container for data. +It defines some basic metadata, the data model(s) +and optionally the user(s) who will be able to access the data. +After the creation of a project, +data can be uploaded that conforms with the data model(s). This documentation is divided into the following parts: @@ -140,8 +143,10 @@ the DaSCH. `"shortname": ""` -The shortname has to be unique. It should be in the form of a [xsd:NCNAME](https://www.w3.org/TR/xmlschema11-2/#NCName). -This means a string without blanks or special characters but `-` and `_` are allowed (although not as first character). +The shortname has to be unique. +It should be in the form of a [xsd:NCNAME](https://www.w3.org/TR/xmlschema11-2/#NCName). +This means a string without blanks or special characters, +but with `-` and `_` are allowed (although not as first character). diff --git a/docs/file-formats/xml-data-file.md b/docs/file-formats/xml-data-file.md index 10cb9b912..548952f9e 100644 --- a/docs/file-formats/xml-data-file.md +++ b/docs/file-formats/xml-data-file.md @@ -3,10 +3,13 @@ # The XML file format for importing data With the [`xmlupload`](../cli-commands.md#xmlupload) command, -data can be imported into a DSP repository (on a DSP server) from an XML file. The import file is a -standard XML file as described on this page. After a successful upload of the data, an output file is written (called -`id2iri_mapping_[timestamp].json`) with the mapping from the internal IDs used inside the XML to their corresponding IRIs which -uniquely identify them inside DSP. This file should be kept if data is later added with the +data can be imported into a DSP repository (on a DSP server) from an XML file. +The import file is a standard XML file as described on this page. +After a successful upload of the data, +an output file is written (called `id2iri_mapping_[timestamp].json`) +with the mapping from the internal IDs used inside the XML +to their corresponding IRIs which uniquely identify them inside DSP. +This file should be kept if data is later added with the `--incremental` [option](../incremental-xmlupload.md). The import file must start with the standard XML header: @@ -118,9 +121,12 @@ permission `special-permission` can only be viewed by `ProjectAdmin`s: ### Using permissions with the `permissions` attribute -Once defined, the permission IDs can be used as `permissions` attribute in the `` and `` tags. It is -important to note that a resource doesn't inherit its permissions to its properties. Each property must have its own -permissions. So, in the following example, the bitstreams don't inherit the permissions from their resource: +Once defined, the permission IDs can be used as `permissions` attribute +in the `` and `` tags. +It is important to note that a resource doesn't inherit its permissions to its properties. +Each property must have its own permissions. +So, in the following example, +the bitstreams don't inherit the permissions from their resource: ```xml @@ -148,10 +154,15 @@ A `` element contains all necessary information to create a resource. - `label` (required): a human-readable, preferably meaningful short name of the resource - `restype` (required): the resource type as defined within the ontology -- `id` (required): a unique, arbitrary string providing a unique ID to the resource in order to be referencable by other - resources; the ID is only used during the import process and later replaced by the IRI used internally by DSP -- `permissions` (optional, but if omitted, users who are lower than a `ProjectAdmin` have no permissions at all, not - even view rights): a reference to a permission ID +- `id` (required): an arbitrary string providing a unique ID to the resource + in order to be referencable by other resources; + the ID is only used during the import process + and later replaced by the IRI used internally by DSP +- `permissions` + (optional, but if omitted, + users who are lower than a `ProjectAdmin` have no permissions at all, + not even view rights): + a reference to a permission ID - `iri` (optional): a custom IRI, used when migrating existing resources (DaSCH-internal only) - `ark` (optional): a version 0 ARK, used when migrating existing resources. It is not possible to use `iri` and `ark` in the same resource. When `ark` is used, it overrides `iri` (DaSCH-internal only). @@ -347,11 +358,14 @@ Notes: - Internally, a date is always represented as a start and end date. - If start and end date match, it's an exact date. - If start and end date don't match, it's a range. -- If the end date is omitted, it's a range from the earliest possible beginning of the start date to the latest possible -end of the start date. For example: +- If the end date is omitted, + it's a range from the earliest possible beginning of the start date + to the latest possible end of the start date. + For example: - "1893" will be expanded to a range from January 1st 1893 to December 31st 1893. - "1893-01" will be expanded to a range from January 1st 1893 to January 31st 1893. - - "1893-01-01" will be expanded to the exact date January 1st 1893 to January 1st 1893 (technically also a range). + - "1893-01-01" will be expanded to a range from January 1st 1893 to January 1st 1893 + (technically also a range). Attributes: @@ -840,9 +854,11 @@ Example of a property with a public and a hidden URI: ## DSP base resources and base properties to be used directly in the XML file -There is a number of base resources and base properties that must not be subclassed in a project ontology. They are -directly available in the XML data file. Please have in mind that built-in names of the knora-base ontology must be used -without prepended colon. +There is a number of base resources and base properties +that must not be subclassed in a project ontology. +They are directly available in the XML data file. +Please have in mind that built-in names of the knora-base ontology +must be used without prepended colon. See also [the related part of the JSON project documentation](./json-project/caveats.md#dsp-base-resources-and-base-properties-to-be-used-directly-in-the-xml-file)