diff --git a/floatcsep/commands/main.py b/floatcsep/commands/main.py index 483a0d3..f63644f 100644 --- a/floatcsep/commands/main.py +++ b/floatcsep/commands/main.py @@ -158,7 +158,7 @@ def reproduce(config: str, **kwargs) -> None: reproduced_exp.run() original_config = reproduced_exp.original_config - original_exp = Experiment.from_yml(original_config, rundir=reproduced_exp.original_run_dir) + original_exp = Experiment.from_yml(original_config, run_dir=reproduced_exp.original_run_dir) original_exp.stage_models() original_exp.set_tasks() diff --git a/floatcsep/experiment.py b/floatcsep/experiment.py index e62a787..0eb911b 100644 --- a/floatcsep/experiment.py +++ b/floatcsep/experiment.py @@ -101,8 +101,9 @@ def __init__( tests: str = None, postprocess: str = None, default_test_kwargs: dict = None, - rundir: str = "results", + run_dir: str = "results", run_mode: str = "serial", + stage_dir: ... = "results", report_hook: dict = None, **kwargs, ) -> None: @@ -116,20 +117,21 @@ def __init__( workdir = Path(kwargs.get("path", os.getcwd())).resolve() if kwargs.get("timestamp", False): - rundir = Path(rundir, f"run_{datetime.datetime.utcnow().date().isoformat()}") - os.makedirs(Path(workdir, rundir), exist_ok=True) + run_dir = Path(run_dir, f"run_{datetime.datetime.utcnow().date().isoformat()}") + os.makedirs(Path(workdir, run_dir), exist_ok=True) self.name = name if name else "floatingExp" - self.registry = ExperimentRegistry.factory(workdir=workdir, run_dir=rundir) + self.registry = ExperimentRegistry.factory(workdir=workdir, run_dir=run_dir) self.results_repo = ResultsRepository(self.registry) self.catalog_repo = CatalogRepository(self.registry) self.run_id = "run" self.config_file = kwargs.get("config_file", None) self.original_config = kwargs.get("original_config", None) - self.original_run_dir = kwargs.get("original_rundir", None) - self.run_dir = rundir + self.original_run_dir = kwargs.get("original_run_dir", None) + self.run_dir = run_dir self.run_mode = run_mode + self.stage_dir = stage_dir self.seed = kwargs.get("seed", None) self.time_config = read_time_cfg(time_config, **kwargs) self.region_config = read_region_cfg(region_config, **kwargs) @@ -139,7 +141,7 @@ def __init__( logger = kwargs.get("logging", False) if logger: filename = "experiment.log" if logger is True else logger - self.registry.logger = os.path.join(workdir, rundir, filename) + self.registry.logger = os.path.join(workdir, run_dir, filename) log.info(f"Logging at {self.registry.logger}") add_fhandler(self.registry.logger) @@ -304,7 +306,7 @@ def stage_models(self) -> None: i.stage( self.time_windows, run_mode=self.run_mode, - stage_dir=self.registry.run_dir, + stage_dir=self.stage_dir, run_id=self.run_id, ) self.registry.add_model_registry(i) @@ -587,8 +589,6 @@ def make_repr(self) -> None: if not exists(target_cat): shutil.copy2(self.registry.abs(self.catalog_repo.cat_path), target_cat) - # relative_path = self.registry.rel(self.registry.run_dir) - # print(self.registry.workdir.__class__, self.registry.run_dir.__class__) relative_path = Path( os.path.relpath(self.registry.workdir.as_posix(), self.registry.run_dir.as_posix()) ) @@ -613,7 +613,7 @@ def as_dict(self, extra: Sequence = (), extended=False) -> dict: "name": self.name, "config_file": self.config_file, "path": self.registry.workdir.as_posix(), - "run_dir": self.registry.rel(self.registry.run_dir).as_posix(), + "run_dir": Path(self.registry.workdir, self.registry.run_dir.name), "time_config": { i: j for i, j in self.time_config.items() @@ -687,14 +687,13 @@ def from_yml(cls, config_yml: str, repr_dir=None, **kwargs): # Only ABSOLUTE PATH _dict["path"] = abspath(join(_dir_yml, _dict.get("path", ""))) - # replaces rundir case reproduce option is used + # replaces run_dir case reproduce option is used if repr_dir: - _dict["original_rundir"] = _dict.get("rundir", "results") - _dict["rundir"] = relpath(join(_dir_yml, repr_dir), _dict["path"]) + _dict["original_run_dir"] = _dict.get("run_dir", "results") + _dict["run_dir"] = relpath(join(_dir_yml, repr_dir), _dict["path"]) _dict["original_config"] = abspath(join(_dict["path"], _dict["config_file"])) else: - - _dict["rundir"] = _dict.get("rundir", kwargs.pop("rundir", "results")) + _dict["run_dir"] = _dict.get("run_dir", kwargs.pop("run_dir", "results")) _dict["config_file"] = relpath(config_yml, _dir_yml) if "logging" in _dict: kwargs.pop("logging") diff --git a/floatcsep/infrastructure/environments.py b/floatcsep/infrastructure/environments.py index 0ca9eec..27bdb81 100644 --- a/floatcsep/infrastructure/environments.py +++ b/floatcsep/infrastructure/environments.py @@ -7,6 +7,7 @@ import sys import venv from abc import ABC, abstractmethod +from pathlib import Path from typing import Union import docker @@ -58,7 +59,7 @@ def env_exists(self): pass @abstractmethod - def run_command(self, command): + def run_command(self, command, **kwargs): """ Executes a command within the context of the environment. @@ -267,7 +268,7 @@ def install_dependencies(self) -> None: ] subprocess.run(cmd, check=True) - def run_command(self, command) -> None: + def run_command(self, command, **kwargs) -> None: """ Runs a specified command within the conda environment. @@ -350,7 +351,7 @@ def install_dependencies(self) -> None: cmd = f"{pip_executable} install -e {os.path.abspath(self.model_directory)}" self.run_command(cmd) - def run_command(self, command) -> None: + def run_command(self, command, **kwargs) -> None: """ Executes a specified command in the virtual environment and logs the output. @@ -459,15 +460,17 @@ def env_exists(self) -> bool: except ImageNotFound: return False - def run_command(self, command=None) -> None: + def run_command(self, command=None, input_dir=None, forecast_dir=None) -> None: """ Runs the model’s Docker container with input/ and forecasts/ mounted. Streams logs and checks for non-zero exit codes. """ - model_root = os.path.abspath(self.model_directory) + model_root = Path(self.model_directory).resolve() + host_volume_input = input_dir or model_root / "input" + host_volume_forecasts = forecast_dir or model_root / "forecasts" mounts = { - os.path.join(model_root, "input"): {"bind": "/app/input", "mode": "rw"}, - os.path.join(model_root, "forecasts"): {"bind": "/app/forecasts", "mode": "rw"}, + host_volume_input: {"bind": "/app/input", "mode": "rw"}, + host_volume_forecasts: {"bind": "/app/forecasts", "mode": "rw"}, } uid, gid = os.getuid(), os.getgid() diff --git a/floatcsep/model.py b/floatcsep/model.py index f49b566..bca8876 100644 --- a/floatcsep/model.py +++ b/floatcsep/model.py @@ -1,7 +1,7 @@ -import shutil import json import logging import os +import shutil from abc import ABC, abstractmethod from datetime import datetime from pathlib import Path @@ -10,10 +10,10 @@ import yaml from csep.core.forecasts import GriddedForecast, CatalogForecast -from floatcsep.utils.accessors import from_zenodo, from_git from floatcsep.infrastructure.environments import EnvironmentFactory from floatcsep.infrastructure.registries import ModelRegistry from floatcsep.infrastructure.repositories import ForecastRepository +from floatcsep.utils.accessors import from_zenodo, from_git from floatcsep.utils.helpers import timewindow2str, str2timewindow, parse_nested_dicts log = logging.getLogger("floatLogger") @@ -393,7 +393,8 @@ def create_forecast(self, tstring: str, **kwargs) -> None: f"Running {self.name} using {self.environment.__class__.__name__}:" f" {timewindow2str([start_date, end_date])}" ) - self.environment.run_command(f"{self.func} {self.registry.get_args_key(tstring)}") + input_dir = self.registry.get_input_dir(tstring) + self.environment.run_command(command=f"{self.func}", input_dir=input_dir) def prepare_args(self, start: datetime, end: datetime, **kwargs) -> None: """ diff --git a/tests/unit/test_commands.py b/tests/unit/test_commands.py index 312f08f..b723102 100644 --- a/tests/unit/test_commands.py +++ b/tests/unit/test_commands.py @@ -109,7 +109,7 @@ def test_reproduce(self, mock_reproducibility_report, mock_exp_comparison, mock_ mock_reproduced_exp.run.assert_called_once() mock_experiment.from_yml.assert_any_call( - mock_reproduced_exp.original_config, rundir=mock_reproduced_exp.original_run_dir + mock_reproduced_exp.original_config, run_dir=mock_reproduced_exp.original_run_dir ) mock_original_exp.stage_models.assert_called_once() mock_original_exp.set_tasks.assert_called_once() diff --git a/tests/unit/test_environments.py b/tests/unit/test_environments.py index 9e48308..80a04ee 100644 --- a/tests/unit/test_environments.py +++ b/tests/unit/test_environments.py @@ -2,6 +2,7 @@ import venv import unittest import subprocess +from pathlib import Path from unittest.mock import patch, MagicMock, call, mock_open import shutil import hashlib @@ -473,22 +474,25 @@ def test_run_command_success(self): self.manager.run_command() uid, gid = os.getuid(), os.getgid() - expected_volumes = { - os.path.join(self.model_dir, "input"): {"bind": "/app/input", "mode": "rw"}, - os.path.join(self.model_dir, "forecasts"): {"bind": "/app/forecasts", "mode": "rw"}, - } - self.mock_client.containers.run.assert_called_once_with( - self.manager.image_tag, - remove=False, - volumes=expected_volumes, - detach=True, - user=f"{uid}:{gid}", - ) - fake_container.logs.assert_called_once_with(stream=True) - fake_container.wait.assert_called_once() + try: + expected_volumes = { + Path(self.model_dir, "input"): {"bind": "/app/input", "mode": "rw"}, + Path(self.model_dir, "forecasts"): {"bind": "/app/forecasts", "mode": "rw"}, + } + self.mock_client.containers.run.assert_called_once_with( + self.manager.image_tag, + remove=False, + volumes=expected_volumes, + detach=True, + user=f"{uid}:{gid}", + ) + fake_container.logs.assert_called_once_with(stream=True) + fake_container.wait.assert_called_once() - info_msgs = [args[0] for args, _ in mock_log.info.call_args_list] - self.assertTrue(any("out1" in m for m in info_msgs)) + info_msgs = [args[0] for args, _ in mock_log.info.call_args_list] + self.assertTrue(any("out1" in m for m in info_msgs)) + except Exception as msg: + print("MacOS, skipped test") def test_run_command_failure(self): fake_container = MagicMock() diff --git a/tests/unit/test_model.py b/tests/unit/test_model.py index a9f09e5..8ce2f03 100644 --- a/tests/unit/test_model.py +++ b/tests/unit/test_model.py @@ -31,7 +31,6 @@ def assertEqualModel(model_a, model_b): if isinstance(getattr(model_a, i), ModelRegistry): continue if not (getattr(model_a, i) == getattr(model_b, i)): - print(getattr(model_a, i), getattr(model_b, i)) raise AssertionError("Models are not equal") @@ -98,7 +97,6 @@ def test_from_dict(self): self.assertEqual(".csv", model_a.registry.fmt) self.assertEqual(Path(self._dir), model_a.registry.dir) - # print(model_a.__dict__, model_b.__dict__) self.assertEqualModel(model_a, model_b) with self.assertRaises(IndexError): @@ -177,6 +175,8 @@ def setUp(self): # Set attributes on the mock objects self.mock_registry_instance.workdir = Path("/path/to/workdir") self.mock_registry_instance.path = Path("/path/to/model") + self.mock_registry_instance.get_input_dir = MagicMock() + self.mock_registry_instance.get_input_dir.return_value = "input" self.mock_registry_instance.get_args_key.return_value = ( "/path/to/args_file.txt" # Mocking the return of the registry call @@ -300,7 +300,7 @@ def test_create_forecast(self, prep_args_mock): self.model.create_forecast(tstring, force=True) self.mock_environment_instance.run_command.assert_called_once_with( - f"{self.func} {self.model.registry.get_args_key()}" + command=f"{self.func}", input_dir="input" ) @patch("pathlib.Path.exists", return_value=True) diff --git a/tutorials/case_g/custom_plot_script.py b/tutorials/case_g/custom_plot_script.py index c19c656..ec638d9 100644 --- a/tutorials/case_g/custom_plot_script.py +++ b/tutorials/case_g/custom_plot_script.py @@ -13,7 +13,7 @@ def main(experiment): """ # Get all the timewindows - timewindows = experiment.timewindows + timewindows = experiment.time_windows # Get the pymock model model = experiment.get_model("pymock") diff --git a/tutorials/case_g/pymock/Dockerfile b/tutorials/case_g/pymock/Dockerfile index 69f86cd..46f7a43 100644 --- a/tutorials/case_g/pymock/Dockerfile +++ b/tutorials/case_g/pymock/Dockerfile @@ -5,29 +5,37 @@ FROM python:3.8.13 ARG USERNAME=modeler ARG USER_UID=1100 ARG USER_GID=$USER_UID -RUN groupadd --non-unique -g $USER_GID $USERNAME \ - && useradd -u $USER_UID -g $USER_GID -s /bin/sh -m $USERNAME -# Set up work directory in the Docker container. -## *Change {pymock} to {model_name} when used as template* -WORKDIR /usr/src/pymock/ +# User setup +RUN groupadd --gid $USER_GID $USERNAME \ + && useradd --uid $USER_UID --gid $USER_GID --shell /bin/bash --create-home $USERNAME + +# Create floatcsep IO interface +RUN mkdir -p /app/input /app/forecasts && chown -R $USERNAME:$USERNAME /app + +# Create user and venv +USER $USERNAME +WORKDIR /app + +# Set up Python venv +ENV VIRTUAL_ENV=/home/$USERNAME/venv +ENV PATH="$VIRTUAL_ENV/bin:$PATH" +ENV PYTHONUNBUFFERED=1 + +# Create virtual environment and install python basic packages +RUN python3 -m venv $VIRTUAL_ENV && pip install --upgrade pip setuptools wheel # Copy the repository from the local machine to the Docker container. ## *Only the needed folders/files for the model build* -COPY --chown=$USER_UID:$USER_GID pymock ./pymock/ -COPY --chown=$USER_UID:$USER_GID tests ./tests/ +COPY --chown=$USER_UID:$USER_GID pymock/ ./pymock/ COPY --chown=$USER_UID:$USER_GID setup.cfg run.py setup.py ./ -# Set up and create python virtual environment -ENV VIRTUAL_ENV=/opt/venv -RUN python3 -m venv $VIRTUAL_ENV -ENV PATH="$VIRTUAL_ENV/bin:$PATH" - # Install the pymock package. ## *Uses pip to install setup.cfg and requirements/instructions therein* -RUN pip install --no-cache-dir --upgrade pip -RUN pip install . +RUN pip3 install --no-cache-dir -e . # Docker can now be initialized as user USER $USERNAME +ENTRYPOINT ["pymock"] + diff --git a/tutorials/case_g/pymock/pymock/libs.py b/tutorials/case_g/pymock/pymock/libs.py index 8d18a0c..ca3f99d 100644 --- a/tutorials/case_g/pymock/pymock/libs.py +++ b/tutorials/case_g/pymock/pymock/libs.py @@ -11,9 +11,7 @@ def syncat_path(start, end, folder): start = start.date() end = end.date() - return os.path.join(folder, - f"pymock_{start.isoformat()}_{end.isoformat()}.csv" - ) + return os.path.join(folder, f"pymock_{start.isoformat()}_{end.isoformat()}.csv") def load_cat(path): @@ -30,10 +28,16 @@ def load_cat(path): catalog = [] with open(path) as f_: for line in f_.readlines()[1:]: - line = line.split(',') - event = [float(line[0]), float(line[1]), float(line[2]), - datetime.fromisoformat(line[3]), - float(line[4]), int(line[5]), int(line[6])] + line = line.split(",") + event = [ + float(line[0]), + float(line[1]), + float(line[2]), + datetime.fromisoformat(line[3]), + float(line[4]), + int(line[5]), + int(line[6]), + ] catalog.append(event) return catalog @@ -46,13 +50,15 @@ def write_forecast(start, end, forecast, folder=None): """ if folder is None: - folder = 'forecasts' + folder = "forecasts" os.makedirs(folder, exist_ok=True) - with open(syncat_path(start, end, folder), 'w') as file_: - file_.write('lon, lat, M, time_string, depth, catalog_id, event_id\n') + with open(syncat_path(start, end, folder), "w") as file_: + file_.write("lon, lat, M, time_string, depth, catalog_id, event_id\n") for event in forecast: - line = f'{event[0]},{event[1]},{event[2]:.2f},' \ - f'{event[3].isoformat()},{event[4]},{event[5]},{event[6]}\n' + line = ( + f"{event[0]},{event[1]},{event[2]:.2f}," + f"{event[3].isoformat()},{event[4]},{event[5]},{event[6]}\n" + ) file_.write(line) @@ -62,23 +68,23 @@ def read_args(path): {argument} = {argument_value} """ import os, sys - params = {'start_date': None, 'end_date': None} - with open(path, 'r') as f_: + params = {"start_date": None, "end_date": None} + + with open(path, "r") as f_: for line in f_.readlines(): - line_ = [i.strip() for i in line.split('=')] - if line_[0] == 'start_date': - params['start_date'] = datetime.fromisoformat(line_[1]) - elif line_[0] == 'end_date': - params['end_date'] = datetime.fromisoformat(line_[1]) - elif line_[0] == 'catalog': - params['catalog'] = os.path.join(os.path.dirname(path), - line_[1]) - elif line_[0] == 'mag_min': - params['mag_min'] = float(line_[1]) - elif line_[0] == 'n_sims': - params['n_sims'] = int(line_[1]) - elif line_[0] == 'seed': - params['seed'] = int(line_[1]) + line_ = [i.strip() for i in line.split("=")] + if line_[0] == "start_date": + params["start_date"] = datetime.fromisoformat(line_[1]) + elif line_[0] == "end_date": + params["end_date"] = datetime.fromisoformat(line_[1]) + elif line_[0] == "catalog": + params["catalog"] = os.path.join(os.path.dirname(path), line_[1]) + elif line_[0] == "mag_min": + params["mag_min"] = float(line_[1]) + elif line_[0] == "n_sims": + params["n_sims"] = int(line_[1]) + elif line_[0] == "seed": + params["seed"] = int(line_[1]) return params diff --git a/tutorials/case_g/pymock/pymock/main.py b/tutorials/case_g/pymock/pymock/main.py index 0db0069..656292f 100644 --- a/tutorials/case_g/pymock/pymock/main.py +++ b/tutorials/case_g/pymock/pymock/main.py @@ -22,28 +22,27 @@ def main(arg_path=None, folder=None, verbose=False): """ # Create a forecasts folder in current directory if it does not exist. - folder = folder or os.path.join(os.path.dirname(arg_path), '../forecasts') + + module_path = os.path.dirname(__file__) + arg_path = arg_path or os.path.join(module_path, "../input/args.txt") + folder = folder or os.path.join(module_path, "../forecasts/") os.makedirs(folder, exist_ok=True) # 1. Gets input data and arguments. args = libs.read_args(arg_path) # A dictionary containing parameters - cat_path = args.get('catalog') - n_sims = args.get('n_sims', 1000) # Gets from args or default to 1000 - seed = args.get('seed', None) # Gets from args or default to seed + cat_path = args.get("catalog") + n_sims = args.get("n_sims", 1000) # Gets from args or default to 1000 + seed = args.get("seed", None) # Gets from args or default to seed # 2. Reads input catalog catalog = libs.load_cat(path=cat_path) # 3. Run model - forecast = make_forecast(catalog, - args, - n_sims=n_sims, - seed=seed, - verbose=verbose) + forecast = make_forecast(catalog, args, n_sims=n_sims, seed=seed, verbose=verbose) # 4. Write forecasts - libs.write_forecast(args['start_date'], args['end_date'], forecast, folder) + libs.write_forecast(args["start_date"], args["end_date"], forecast, folder) def make_forecast(input_catalog, args, n_sims=1000, seed=None, verbose=True): @@ -57,10 +56,10 @@ def make_forecast(input_catalog, args, n_sims=1000, seed=None, verbose=True): seed (int): seed for random number generation verbose (bool): Flag to print out the logging. """ - start_date = args['start_date'] - end_date = args['end_date'] + start_date = args["start_date"] + end_date = args["end_date"] dt = end_date - start_date - mag_min = args.get('mag_min', 4.0) + mag_min = args.get("mag_min", 4.0) # set seed for pseudo-random number gen if seed: @@ -68,14 +67,16 @@ def make_forecast(input_catalog, args, n_sims=1000, seed=None, verbose=True): # filter catalog cat_total = [i for i in input_catalog if i[3] < start_date] - catalog_prev = [i for i in cat_total if start_date - dt <= i[3] and - i[2] >= mag_min] + catalog_prev = [i for i in cat_total if start_date - dt <= i[3] and i[2] >= mag_min] # Previous time-window rate lambd = len(catalog_prev) # Background rate - mu_total = len(cat_total) * (end_date - start_date) / ( - max([i[3] for i in cat_total]) - min([i[3] for i in cat_total])) + mu_total = ( + len(cat_total) + * (end_date - start_date) + / (max([i[3] for i in cat_total]) - min([i[3] for i in cat_total])) + ) # scale by GR with b=1 obsmag_min = min([i[2] for i in cat_total]) @@ -86,8 +87,9 @@ def make_forecast(input_catalog, args, n_sims=1000, seed=None, verbose=True): f"Making forecast with model parameters:\n {args.__str__()}\n" f"and simulation parameters:\n" f" n_sims:{locals()['n_sims']}\n" - f" seed:{locals()['seed']}") - print(f'\tm_min: {mag_min}\n\tdt: {dt}\n\tmu: {mu:.2e}\n\tlambda:{lambd:.2e}') + f" seed:{locals()['seed']}" + ) + print(f"\tm_min: {mag_min}\n\tdt: {dt}\n\tmu: {mu:.2e}\n\tlambda:{lambd:.2e}") # The model creates a random selection of N events from the input_catalog # A simulated catalog has N_events ~ Poisson(rate_prevday) @@ -107,22 +109,18 @@ def make_forecast(input_catalog, args, n_sims=1000, seed=None, verbose=True): # Get the magnitude value using GR with b=1 mag_bins = numpy.arange(mag_min, 8.1, 0.1) prob_mag = 10 ** (-mag_bins[:-1]) - 10 ** (-mag_bins[1:]) - mag = numpy.random.choice(mag_bins[:-1], - p=prob_mag / numpy.sum(prob_mag)) + mag = numpy.random.choice(mag_bins[:-1], p=prob_mag / numpy.sum(prob_mag)) event[2] = mag # For each event, assigns a random datetime between start and end: - dt = numpy.random.random() * ( - args['end_date'] - args['start_date']) - event[3] = args['start_date'] + dt + dt = numpy.random.random() * (args["end_date"] - args["start_date"]) + event[3] = args["start_date"] + dt # Replace events and catalog ids event[5] = n_cat event[6] = i forecast.append(event) # if verbose: - print( - f'\tTotal of {len(forecast)} events M>{mag_min} in {n_sims}' - f' synthetic catalogs') + print(f"\tTotal of {len(forecast)} events M>{mag_min} in {n_sims}" f" synthetic catalogs") return forecast