diff --git a/docs/developers/start-stack-command.md b/docs/developers/start-stack-command.md index 979d467a0..3496e442a 100644 --- a/docs/developers/start-stack-command.md +++ b/docs/developers/start-stack-command.md @@ -8,12 +8,16 @@ in the version that is running on [https://admin.dasch.swiss](https://admin.dasc In addition to the containers, a number of files from the DSP-API GitHub repository is necessary. The version of the docker images and these files must be the same. -The version is hardcoded at the following places in the code: +The version is configured in the following files in `src/dsp_tools/resources/start-stack/`: -- `src/dsp_tools/docker/docker-compose.yml`: +- `docker-compose.yml`: The 4 variables `services/{app,db,sipi,api}/image` must point to the DockerHub image of the last deployed version. -- `src/dsp_tools/utils/stack_handling.py`: - The variable `commit_of_used_api_version` + The versions can be found in the + [ops-deploy repo](https://github.com/dasch-swiss/ops-deploy/blob/main/roles/dsp-deploy/files/RELEASE.json) +- `start-stack-config.yml`: + The variable `DSP-API commit` must be the commit hash of DSP-API of the version that is running on [https://admin.dasch.swiss](https://admin.dasch.swiss/help). + Just take the commit hash of the latest DSP-API release + from the [DSP-API GitHub repo](https://github.com/dasch-swiss/dsp-api/commits/main) diff --git a/poetry.lock b/poetry.lock index f0fc8eef8..ed9c86618 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1564,6 +1564,17 @@ files = [ {file = "types_openpyxl-3.1.0.13-py3-none-any.whl", hash = "sha256:22bde5f20327783a3ac742ec892916c694604bc32d3c87031afd97b824fde0b1"}, ] +[[package]] +name = "types-pyyaml" +version = "6.0.12.10" +description = "Typing stubs for PyYAML" +optional = false +python-versions = "*" +files = [ + {file = "types-PyYAML-6.0.12.10.tar.gz", hash = "sha256:ebab3d0700b946553724ae6ca636ea932c1b0868701d4af121630e78d695fc97"}, + {file = "types_PyYAML-6.0.12.10-py3-none-any.whl", hash = "sha256:662fa444963eff9b68120d70cda1af5a5f2aa57900003c2006d7626450eaae5f"}, +] + [[package]] name = "types-regex" version = "2023.6.3.0" @@ -1843,4 +1854,4 @@ testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "0816bfab0201eb376327dc2b8121353da6bfda6df2e3792dee8b951b9f3cabe0" +content-hash = "e47983c398a8642fcf4d58b9dd86cee3dad6a693659e3dd895e94e74ca4c4ded" diff --git a/pyproject.toml b/pyproject.toml index f6316e189..491fc7f84 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,7 @@ networkx = "^3.1.0" pandas = { version = "^2.0.1", extras = ["excel"] } # extra package that contains xlrd that is necessary for reading old .xls Excel files regex = "^2023.5.5" docker = "^6.1.2" +pyyaml = "^6.0" [tool.poetry.group.dev.dependencies] @@ -50,6 +51,7 @@ types-jsonschema = "^4.17.0.8" types-openpyxl = "^3.1.0.7" types-regex = "^2023.5.5.0" pre-commit = "^3.3.3" +types-pyyaml = "^6.0.12.10" [tool.poetry.scripts] @@ -63,7 +65,8 @@ check-links = """ markdown-link-validator \ ./docs \ -i \\.\\/assets\\/.+ \ - -i .+github\\.com\\/dasch\\-swiss\\/dsp-tools\\/settings\ + -i .+github\\.com\\/dasch\\-swiss\\/dsp-tools\\/settings \ + -i .+github\\.com\\/dasch\\-swiss\\/ops-deploy\\/.+ """ diff --git a/src/dsp_tools/dsp_tools.py b/src/dsp_tools/dsp_tools.py index 4b7ebbb9b..5805192b1 100644 --- a/src/dsp_tools/dsp_tools.py +++ b/src/dsp_tools/dsp_tools.py @@ -24,7 +24,7 @@ from dsp_tools.utils.project_validate import validate_project from dsp_tools.utils.rosetta import upload_rosetta from dsp_tools.utils.shared import validate_xml_against_schema -from dsp_tools.utils.stack_handling import start_stack, stop_stack +from dsp_tools.utils.stack_handling import StackHandling from dsp_tools.utils.xml_upload import xml_upload logger = get_logger(__name__) @@ -433,13 +433,15 @@ def call_requested_action( default_ontology=args.ontology_name, ) elif args.action == "start-stack": - success = start_stack( + stack_handler = StackHandling() + success = stack_handler.start_stack( max_file_size=args.max_file_size, enforce_docker_system_prune=args.prune, suppress_docker_system_prune=args.no_prune, ) elif args.action == "stop-stack": - success = stop_stack() + stack_handler = StackHandling() + success = stack_handler.stop_stack() elif args.action == "template": success = generate_template_repo() elif args.action == "rosetta": diff --git a/src/dsp_tools/resources/start-stack/start-stack-config.yml b/src/dsp_tools/resources/start-stack/start-stack-config.yml new file mode 100644 index 000000000..74fbb25dc --- /dev/null +++ b/src/dsp_tools/resources/start-stack/start-stack-config.yml @@ -0,0 +1,4 @@ +--- + +# take commit hash of latest DSP-API release from https://github.com/dasch-swiss/dsp-api/commits/main +DSP-API commit: e3a19dd975425ca75fca9f5841952fa9429837a7 diff --git a/src/dsp_tools/utils/stack_handling.py b/src/dsp_tools/utils/stack_handling.py index ac3f24a1c..a995d7e64 100644 --- a/src/dsp_tools/utils/stack_handling.py +++ b/src/dsp_tools/utils/stack_handling.py @@ -7,148 +7,295 @@ from typing import Optional import requests +import yaml -from dsp_tools.models.exceptions import BaseError +from dsp_tools.models.exceptions import UserError +from dsp_tools.utils.logging import get_logger -docker_path_of_user = Path.home() / Path(".dsp-tools/start-stack") -docker_path_of_user.mkdir(parents=True, exist_ok=True) - -def start_stack( - max_file_size: Optional[int] = None, - enforce_docker_system_prune: bool = False, - suppress_docker_system_prune: bool = False, -) -> bool: +class StackHandling: + """ + This class contains functions to start and stop the Docker containers of DSP-API and DSP-APP. """ - Start the Docker containers of DSP-API and DSP-APP, and load some basic data models and data. After startup, ask - user if Docker should be pruned or not. - Args: - max_file_size: max. multimedia file size allowed by SIPI, in MB (max: 100'000) - enforce_docker_system_prune: if True, prune Docker without asking the user - suppress_docker_system_prune: if True, don't prune Docker (and don't ask) + docker_path_of_user: Path + url_prefix: str + enforce_docker_system_prune: bool + suppress_docker_system_prune: bool - Raises: - BaseError if the stack cannot be started with the parameters passed by the user + def __init__(self) -> None: + self.logger = get_logger(__name__) + self.docker_path_of_user = Path.home() / Path(".dsp-tools/start-stack") + self.docker_path_of_user.mkdir(parents=True, exist_ok=True) + self.url_prefix = self._get_url_prefix() - Returns: - True if everything went well, False otherwise - """ - # validate input - if max_file_size is not None: - if not 1 <= max_file_size <= 100_000: - raise BaseError("max_file_size must be between 1 and 100000") - if enforce_docker_system_prune and suppress_docker_system_prune: - raise BaseError('The arguments "--prune" and "--no-prune" are mutually exclusive') - - # copy contents of src/dsp_tools/resources/start-stack to ~/.dsp-tools/start-stack - docker_path_of_distribution = importlib.resources.files("dsp_tools").joinpath("resources/start-stack") - for file in docker_path_of_distribution.iterdir(): - with importlib.resources.as_file(file) as f: - file_path = Path(f) - shutil.copy(file_path, docker_path_of_user / file.name) - - # get sipi.docker-config.lua - # take commit hash of latest DSP-API release from https://github.com/dasch-swiss/dsp-api/commits/main - commit_of_used_api_version = "e3a19dd975425ca75fca9f5841952fa9429837a7" - url_prefix = f"https://github.com/dasch-swiss/dsp-api/raw/{commit_of_used_api_version}/" - docker_config_lua_text = requests.get(f"{url_prefix}sipi/config/sipi.docker-config.lua", timeout=5).text - if max_file_size: - max_post_size_regex = r"max_post_size ?= ?[\'\"]\d+M[\'\"]" - if not re.search(max_post_size_regex, docker_config_lua_text): - raise BaseError("Unable to set max_file_size. Please try again without this flag.") - docker_config_lua_text = re.sub( - max_post_size_regex, - f"max_post_size = '{max_file_size}M'", - docker_config_lua_text, + def _validate_input( + self, + max_file_size: Optional[int], + enforce_docker_system_prune: bool, + suppress_docker_system_prune: bool, + ) -> None: + """ + Validate the input parameters passed by the user. + Raises a UserError if one of the parameters is invalid. + """ + if max_file_size is not None: + if not 1 <= max_file_size <= 100_000: + raise UserError("max_file_size must be between 1 and 100000") + if enforce_docker_system_prune and suppress_docker_system_prune: + raise UserError('The arguments "--prune" and "--no-prune" are mutually exclusive') + + def _get_url_prefix(self) -> str: + """ + The start-stack command needs some files from the DSP-API repository. + By default, start-stack starts the latest deployed version of DSP-API. + Since the last deployment, the DSP-API repository may have been updated. + For this reason, we need to know the commit hash of the DSP-API version that is currently deployed, + so that the files can be retrieved from the correct commit. + + This function reads the commit hash of DSP-API + that is configured in start-stack-config.yml, + and constructs the URL prefix necessary to retrieve the files from the DSP-API repository. + + If something goes wrong, + the URL prefix falls back to pointing to the main branch of the DSP-API repository. + + Returns: + URL prefix used to retrieve files from the DSP-API repository + """ + url_prefix_base = "https://github.com/dasch-swiss/dsp-api/raw/" + config_file = Path("src/dsp_tools/resources/start-stack/start-stack-config.yml") + if not config_file.is_file(): + return url_prefix_base + "main/" + + with open("src/dsp_tools/resources/start-stack/start-stack-config.yml", "r", encoding="utf-8") as f: + try: + start_stack_config = yaml.safe_load(f) + except yaml.YAMLError: + start_stack_config = {} + commit_of_used_api_version = start_stack_config.get("DSP-API commit", "main") + url_prefix = f"https://github.com/dasch-swiss/dsp-api/raw/{commit_of_used_api_version}/" + return url_prefix + + def _copy_resources_to_home_dir(self) -> None: + """ + On most systems, Docker is not allowed to access files outside of the user's home directory. + For this reason, copy the contents of src/dsp_tools/resources/start-stack to ~/.dsp-tools/start-stack. + """ + docker_path_of_distribution = importlib.resources.files("dsp_tools").joinpath("resources/start-stack") + for file in docker_path_of_distribution.iterdir(): + with importlib.resources.as_file(file) as f: + file_path = Path(f) + shutil.copy(file_path, self.docker_path_of_user / file.name) + + def _get_sipi_docker_config_lua( + self, + max_file_size: Optional[int], + ) -> None: + """ + Retrieve the config file sipi.docker-config.lua from the DSP-API repository, + and set the max_file_size parameter if necessary. + + Args: + max_file_size: new value for max_file_size to inject into sipi.docker-config.lua + + Raises: + UserError: if max_file_size is set but cannot be injected into sipi.docker-config.lua + """ + docker_config_lua_text = requests.get(f"{self.url_prefix}sipi/config/sipi.docker-config.lua", timeout=5).text + if max_file_size: + max_post_size_regex = r"max_post_size ?= ?[\'\"]\d+M[\'\"]" + if not re.search(max_post_size_regex, docker_config_lua_text): + raise UserError("Unable to set max_file_size. Please try again without this flag.") + docker_config_lua_text = re.sub( + max_post_size_regex, + f"max_post_size = '{max_file_size}M'", + docker_config_lua_text, + ) + with open(self.docker_path_of_user / "sipi.docker-config.lua", "w", encoding="utf-8") as f: + f.write(docker_config_lua_text) + + def _start_up_fuseki(self) -> None: + """ + Start up the Docker container of the fuseki database. + + Raises: + UserError: if the database cannot be started + """ + completed_process = subprocess.run( + "docker compose up db -d", + shell=True, + cwd=self.docker_path_of_user, + check=False, ) - with open(docker_path_of_user / "sipi.docker-config.lua", "w", encoding="utf-8") as f: - f.write(docker_config_lua_text) - - # start up the fuseki database - completed_process = subprocess.run("docker compose up db -d", shell=True, cwd=docker_path_of_user, check=False) - if not completed_process or completed_process.returncode != 0: - raise BaseError("Cannot start the API: Error while executing 'docker compose up db -d'") - - # wait until fuseki is up (same behaviour as dsp-api/webapi/scripts/wait-for-db.sh) - for _ in range(360): - try: - response = requests.get(url="http://0.0.0.0:3030/$/server", auth=("admin", "test"), timeout=5) - if response.ok: - break - except Exception: # pylint: disable=broad-exception-caught + if not completed_process or completed_process.returncode != 0: + msg = "Cannot start the API: Error while executing 'docker compose up db -d'" + self.logger.error(f"{msg}. completed_process = '{completed_process}'") + raise UserError(msg) + + def _wait_for_fuseki(self) -> None: + """ + Wait up to 6 minutes, until the fuseki database is up and running. + This function imitates the behaviour of the script dsp-api/webapi/scripts/wait-for-db.sh. + """ + for _ in range(6 * 60): + try: + response = requests.get(url="http://0.0.0.0:3030/$/server", auth=("admin", "test"), timeout=5) + if response.ok: + break + except Exception: # pylint: disable=broad-exception-caught + time.sleep(1) time.sleep(1) - time.sleep(1) - - # inside fuseki, create the "knora-test" repository - # (same behaviour as dsp-api/webapi/target/docker/stage/opt/docker/scripts/fuseki-init-knora-test.sh) - repo_template = requests.get(f"{url_prefix}webapi/scripts/fuseki-repository-config.ttl.template", timeout=5).text - repo_template = repo_template.replace("@REPOSITORY@", "knora-test") - response = requests.post( - url="http://0.0.0.0:3030/$/datasets", - files={"file": ("file.ttl", repo_template, "text/turtle; charset=utf8")}, - auth=("admin", "test"), - timeout=5, - ) - if not response.ok: - raise BaseError( - "Cannot start DSP-API: Error when creating the 'knora-test' repository. " - "Is DSP-API perhaps running already?" - ) - # load some basic ontos and data into the repository - # (same behaviour as dsp-api/webapi/target/docker/stage/opt/docker/scripts/fuseki-init-knora-test.sh) - graph_prefix = "http://0.0.0.0:3030/knora-test/data?graph=" - ttl_files = [ - ("knora-ontologies/knora-admin.ttl", "http://www.knora.org/ontology/knora-admin"), - ("knora-ontologies/knora-base.ttl", "http://www.knora.org/ontology/knora-base"), - ("knora-ontologies/standoff-onto.ttl", "http://www.knora.org/ontology/standoff"), - ("knora-ontologies/standoff-data.ttl", "http://www.knora.org/data/standoff"), - ("knora-ontologies/salsah-gui.ttl", "http://www.knora.org/ontology/salsah-gui"), - ("test_data/all_data/admin-data.ttl", "http://www.knora.org/data/admin"), - ("test_data/all_data/permissions-data.ttl", "http://www.knora.org/data/permissions"), - ("test_data/ontologies/anything-onto.ttl", "http://www.knora.org/ontology/0001/anything"), - ("test_data/all_data/anything-data.ttl", "http://www.knora.org/data/0001/anything"), - ] - for ttl_file, graph in ttl_files: - ttl_text = requests.get(url_prefix + ttl_file, timeout=5).text + def _create_knora_test_repo(self) -> None: + """ + Inside fuseki, create the "knora-test" repository. + This function imitates the behaviour of the script dsp-api/webapi/scripts/fuseki-init-knora-test.sh. + + Raises: + UserError: in case of failure + """ + repo_template = requests.get( + f"{self.url_prefix}webapi/scripts/fuseki-repository-config.ttl.template", + timeout=5, + ).text + repo_template = repo_template.replace("@REPOSITORY@", "knora-test") response = requests.post( - url=graph_prefix + graph, - files={"file": ("file.ttl", ttl_text, "text/turtle; charset: utf-8")}, + url="http://0.0.0.0:3030/$/datasets", + files={"file": ("file.ttl", repo_template, "text/turtle; charset=utf8")}, auth=("admin", "test"), timeout=5, ) if not response.ok: - raise BaseError(f"Cannot start DSP-API: Error when creating graph '{graph}'") - - # startup all other components - subprocess.run("docker compose up -d", shell=True, cwd=docker_path_of_user, check=True) - print("DSP-API is now running on http://0.0.0.0:3333/ and DSP-APP on http://0.0.0.0:4200/") - - # docker system prune - if enforce_docker_system_prune: - prune_docker = "y" - elif suppress_docker_system_prune: - prune_docker = "n" - else: - prune_docker = None - while prune_docker not in ["y", "n"]: - prune_docker = input( - "Allow dsp-tools to execute 'docker system prune'? This is necessary to keep your Docker clean. " - "If you are unsure what that means, just type y and press Enter. [y/n]" + msg = ( + "Cannot start DSP-API: Error when creating the 'knora-test' repository. " + "Is DSP-API perhaps running already?" ) - if prune_docker == "y": - subprocess.run("docker system prune -f", shell=True, cwd=docker_path_of_user, check=False) + self.logger.error(f"{msg}. response = {response}") + raise UserError(msg) - return True + def _load_data_into_repo(self) -> None: + """ + Load some basic ontologies and data into the repository. + This function imitates the behaviour of the script + dsp-api/webapi/target/docker/stage/opt/docker/scripts/fuseki-init-knora-test.sh. + Raises: + UserError: if one of the graphs cannot be created + """ + graph_prefix = "http://0.0.0.0:3030/knora-test/data?graph=" + ttl_files = [ + ("knora-ontologies/knora-admin.ttl", "http://www.knora.org/ontology/knora-admin"), + ("knora-ontologies/knora-base.ttl", "http://www.knora.org/ontology/knora-base"), + ("knora-ontologies/standoff-onto.ttl", "http://www.knora.org/ontology/standoff"), + ("knora-ontologies/standoff-data.ttl", "http://www.knora.org/data/standoff"), + ("knora-ontologies/salsah-gui.ttl", "http://www.knora.org/ontology/salsah-gui"), + ("test_data/all_data/admin-data.ttl", "http://www.knora.org/data/admin"), + ("test_data/all_data/permissions-data.ttl", "http://www.knora.org/data/permissions"), + ("test_data/ontologies/anything-onto.ttl", "http://www.knora.org/ontology/0001/anything"), + ("test_data/all_data/anything-data.ttl", "http://www.knora.org/data/0001/anything"), + ] + for ttl_file, graph in ttl_files: + ttl_text = requests.get(self.url_prefix + ttl_file, timeout=5).text + response = requests.post( + url=graph_prefix + graph, + files={"file": ("file.ttl", ttl_text, "text/turtle; charset: utf-8")}, + auth=("admin", "test"), + timeout=5, + ) + if not response.ok: + self.logger.error(f"Cannot start DSP-API: Error when creating graph '{graph}'. response = {response}") + raise UserError(f"Cannot start DSP-API: Error when creating graph '{graph}'") -def stop_stack() -> bool: - """ - Shut down the Docker containers of your local DSP stack and delete all data that is in it. + def _initialize_fuseki(self) -> None: + """ + Create the "knora-test" repository and load some basic ontologies and data into it. + """ + self._create_knora_test_repo() + self._load_data_into_repo() - Returns: - True if everything went well, False otherwise - """ - subprocess.run("docker compose down --volumes", shell=True, cwd=docker_path_of_user, check=True) - return True + def _start_remaining_docker_containers(self) -> None: + """ + Start the other Docker containers that are not running yet. + (Fuseki is already running at this point.) + """ + subprocess.run("docker compose up -d", shell=True, cwd=self.docker_path_of_user, check=True) + print("DSP-API is now running on http://0.0.0.0:3333/ and DSP-APP on http://0.0.0.0:4200/") + + def _execute_docker_system_prune(self) -> None: + """ + Depending on the CLI parameters or the user's input, + execute "docker system prune" or not. + """ + if self.enforce_docker_system_prune: + prune_docker = "y" + elif self.suppress_docker_system_prune: + prune_docker = "n" + else: + prune_docker = None + while prune_docker not in ["y", "n"]: + prune_docker = input( + "Allow dsp-tools to execute 'docker system prune'? This is necessary to keep your Docker clean. " + "If you are unsure what that means, just type y and press Enter. [y/n]" + ) + if prune_docker == "y": + subprocess.run("docker system prune -f", shell=True, cwd=self.docker_path_of_user, check=False) + + def _start_docker_containers(self) -> None: + """ + Start the fuseki Docker container, + wait until it is up and running, + load some basic ontologies and data into it, + start the other Docker containers, + and execute "docker system prune" if necessary. + """ + self._start_up_fuseki() + self._wait_for_fuseki() + self._initialize_fuseki() + self._start_remaining_docker_containers() + self._execute_docker_system_prune() + + def start_stack( + self, + max_file_size: Optional[int] = None, + enforce_docker_system_prune: bool = False, + suppress_docker_system_prune: bool = False, + ) -> bool: + """ + Start the Docker containers of DSP-API and DSP-APP, and load some basic data models and data. After startup, ask + user if Docker should be pruned or not. + + Args: + max_file_size: max. multimedia file size allowed by SIPI, in MB (max: 100'000) + enforce_docker_system_prune: if True, prune Docker without asking the user + suppress_docker_system_prune: if True, don't prune Docker (and don't ask) + + Raises: + UserError if the stack cannot be started with the parameters passed by the user + + Returns: + True if everything went well, False otherwise + """ + self._validate_input( + max_file_size=max_file_size, + enforce_docker_system_prune=enforce_docker_system_prune, + suppress_docker_system_prune=suppress_docker_system_prune, + ) + self.enforce_docker_system_prune = enforce_docker_system_prune + self.suppress_docker_system_prune = suppress_docker_system_prune + + self._copy_resources_to_home_dir() + self._get_sipi_docker_config_lua(max_file_size=max_file_size) + self._start_docker_containers() + + return True + + def stop_stack(self) -> bool: + """ + Shut down the Docker containers of your local DSP stack and delete all data that is in it. + + Returns: + True if everything went well, False otherwise + """ + subprocess.run("docker compose down --volumes", shell=True, cwd=self.docker_path_of_user, check=True) + return True