diff --git a/configs/components/oifs/oifs.yaml b/configs/components/oifs/oifs.yaml index e286569fb..44cf6a54f 100644 --- a/configs/components/oifs/oifs.yaml +++ b/configs/components/oifs/oifs.yaml @@ -1328,7 +1328,7 @@ choose_eternal_run_number: ${oifs.nx} ${oifs.ensemble_id}; ${general.esm_function_dir}/components/oifs/change_rcf_date.sh - ${thisrun_restart_in_dir}/ + ${thisrun_work_dir}/ ${pseudo_initial_date!syear!smonth!sday} ${oifs.time_step} ${oifs.seconds_since_initial} @@ -1347,7 +1347,7 @@ choose_general.standalone: True: slice_icml: " ${general.esm_function_dir}/components/oifs/slice_icmcl_file.sh - ${thisrun_input_dir}/ + ${thisrun_work_dir}/ ${icmcl_dir}/${icmcl_file} ${oifs.input_expid} ${start_date!syear!smonth!sday} diff --git a/configs/defaults/general.yaml b/configs/defaults/general.yaml index 4d7048319..b1a124fad 100644 --- a/configs/defaults/general.yaml +++ b/configs/defaults/general.yaml @@ -1,2 +1,6 @@ use_database: false profile: False +intermediate_movements: +- "config" +- "bin" +- "input" diff --git a/setup.cfg b/setup.cfg index b8a6401eb..26924f1a4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 6.30.0 +current_version = 6.31.0 commit = True tag = True diff --git a/setup.py b/setup.py index b5c195bd9..d01b24782 100644 --- a/setup.py +++ b/setup.py @@ -105,6 +105,6 @@ test_suite="tests", tests_require=test_requirements, url="https://github.com/esm-tools/esm_tools", - version="6.30.0", + version="6.31.0", zip_safe=False, ) diff --git a/src/esm_archiving/__init__.py b/src/esm_archiving/__init__.py index 863313531..d7cdcbbc1 100644 --- a/src/esm_archiving/__init__.py +++ b/src/esm_archiving/__init__.py @@ -4,7 +4,7 @@ __author__ = """Paul Gierz""" __email__ = "pgierz@awi.de" -__version__ = "6.30.0" +__version__ = "6.31.0" from .esm_archiving import (archive_mistral, check_tar_lists, delete_original_data, determine_datestamp_location, diff --git a/src/esm_calendar/__init__.py b/src/esm_calendar/__init__.py index f5715dfb0..9c96ba9f9 100644 --- a/src/esm_calendar/__init__.py +++ b/src/esm_calendar/__init__.py @@ -2,6 +2,6 @@ __author__ = """Dirk Barbi""" __email__ = "dirk.barbi@awi.de" -__version__ = "6.30.0" +__version__ = "6.31.0" from .esm_calendar import * diff --git a/src/esm_cleanup/__init__.py b/src/esm_cleanup/__init__.py index 7eeeb991b..eaa8d55b9 100644 --- a/src/esm_cleanup/__init__.py +++ b/src/esm_cleanup/__init__.py @@ -2,4 +2,4 @@ __author__ = """Dirk Barbi""" __email__ = "dirk.barbi@awi.de" -__version__ = "6.30.0" +__version__ = "6.31.0" diff --git a/src/esm_database/__init__.py b/src/esm_database/__init__.py index 60e2323e0..c20fe754b 100644 --- a/src/esm_database/__init__.py +++ b/src/esm_database/__init__.py @@ -2,4 +2,4 @@ __author__ = """Dirk Barbi""" __email__ = "dirk.barbi@awi.de" -__version__ = "6.30.0" +__version__ = "6.31.0" diff --git a/src/esm_environment/__init__.py b/src/esm_environment/__init__.py index 719dc84a9..b8c44c88f 100644 --- a/src/esm_environment/__init__.py +++ b/src/esm_environment/__init__.py @@ -2,6 +2,6 @@ __author__ = """Dirk Barbi""" __email__ = "dirk.barbi@awi.de" -__version__ = "6.30.0" +__version__ = "6.31.0" from .esm_environment import * diff --git a/src/esm_master/__init__.py b/src/esm_master/__init__.py index b972c55c6..b828a2520 100644 --- a/src/esm_master/__init__.py +++ b/src/esm_master/__init__.py @@ -2,7 +2,7 @@ __author__ = """Dirk Barbi""" __email__ = "dirk.barbi@awi.de" -__version__ = "6.30.0" +__version__ = "6.31.0" from . import database diff --git a/src/esm_motd/__init__.py b/src/esm_motd/__init__.py index 394c96b37..408274776 100644 --- a/src/esm_motd/__init__.py +++ b/src/esm_motd/__init__.py @@ -2,6 +2,6 @@ __author__ = """Dirk Barbi""" __email__ = "dirk.barbi@awi.de" -__version__ = "6.30.0" +__version__ = "6.31.0" from .esm_motd import * diff --git a/src/esm_parser/__init__.py b/src/esm_parser/__init__.py index 8024be907..911f39b4b 100644 --- a/src/esm_parser/__init__.py +++ b/src/esm_parser/__init__.py @@ -2,7 +2,7 @@ __author__ = """Dirk Barbi""" __email__ = "dirk.barbi@awi.de" -__version__ = "6.30.0" +__version__ = "6.31.0" from .esm_parser import * diff --git a/src/esm_plugin_manager/__init__.py b/src/esm_plugin_manager/__init__.py index 6324f5b07..f8d56d300 100644 --- a/src/esm_plugin_manager/__init__.py +++ b/src/esm_plugin_manager/__init__.py @@ -2,6 +2,6 @@ __author__ = """Dirk Barbi, Paul Gierz, Sebastian Wahl""" __email__ = "dirk.barbi@awi.de" -__version__ = "6.30.0" +__version__ = "6.31.0" from .esm_plugin_manager import * diff --git a/src/esm_profile/__init__.py b/src/esm_profile/__init__.py index a09e289c1..42b9ae3f7 100644 --- a/src/esm_profile/__init__.py +++ b/src/esm_profile/__init__.py @@ -2,6 +2,6 @@ __author__ = """Dirk Barbi""" __email__ = "dirk.barbi@awi.de" -__version__ = "6.30.0" +__version__ = "6.31.0" from .esm_profile import * diff --git a/src/esm_runscripts/__init__.py b/src/esm_runscripts/__init__.py index 70841a11a..5fac48d2c 100644 --- a/src/esm_runscripts/__init__.py +++ b/src/esm_runscripts/__init__.py @@ -2,7 +2,7 @@ __author__ = """Dirk Barbi""" __email__ = "dirk.barbi@awi.de" -__version__ = "6.30.0" +__version__ = "6.31.0" from .batch_system import * from .chunky_parts import * diff --git a/src/esm_runscripts/filelists.py b/src/esm_runscripts/filelists.py index 1315347dd..40ec350ea 100644 --- a/src/esm_runscripts/filelists.py +++ b/src/esm_runscripts/filelists.py @@ -901,11 +901,51 @@ def resolve_symlinks(config, file_source): def copy_files(config, filetypes, source, target): + """ + This function has a misleading name. It is not only used for copying, but also + for moving or linking, depending on what was specified for the particular file + or file type vie the ``file_movements``. + + Note: when the ``target`` is ``thisrun`` (intermediate folders) check whether the + type of file is included in ``intermediate_movements``. If it's not, instead of + moving the file to the intermediate folder it moves it to ``work``. This is an + ugly fix to provide a fast solution to the problem that files are + copied/moved/linked twice unnecessarily, and this affects inmensely the performance + of high resolution simulations. A better fix is not made because ``filelists`` are + being entirely reworked, but the fix cannot wait. + + Note + ---- + Relevant variables in this function: + + intermediate_movements : list + List of file types that will be considered in the intermediate step (copy from source to intermediate and then to work, rather than directly to work) + + Parameters + ---------- + config : dict + The general configuration + filetypes : list + List of file types to be copied/linked/moved + source : str + Specifies the source type, to be chosen between ``init``, ``thisrun``, + ``work``. + target : str + Specifies the target type, to be chosen between ``init``, ``thisrun``, + ``work``. + """ logger.debug("\n::: Copying files") helpers.print_datetime(config) + successful_files = [] missing_files = {} + # See the default intermediate movements list in `configs/defaults/general.yaml` + intermediate_movements = config["general"].get( + "intermediate_movements", + [], + ) + if source == "init": text_source = "sources" elif source == "thisrun": @@ -918,11 +958,27 @@ def copy_files(config, filetypes, source, target): elif target == "work": text_target = "targets" + # Loop through the different filetypes (input, forcing, restart_in/out, ...) + files_to_be_moved = [] for filetype in [filetype for filetype in filetypes if not filetype == "ignore"]: + # Loop through the components for model in config["general"]["valid_model_names"] + ["general"]: + # If there is a source of this file type in the model if filetype + "_" + text_source in config[model]: + this_text_target = text_target + this_intermediate_movements = config[model].get( + "intermediate_movements", intermediate_movements + ) + skip_intermediate = False + if filetype not in intermediate_movements: + if text_target == "intermediate": + this_text_target = "targets" + skip_intermediate = True + elif text_source == "intermediate": + continue sourceblock = config[model][filetype + "_" + text_source] - targetblock = config[model][filetype + "_" + text_target] + targetblock = config[model][filetype + "_" + this_text_target] + # Loop through categories (file keys) for category in sourceblock: movement_method = get_method( get_movement(config, model, category, filetype, source, target) @@ -933,6 +989,7 @@ def copy_files(config, filetypes, source, target): logger.debug(f"- source: {file_source}") logger.debug(f"- target: {file_target}") helpers.print_datetime(config) + # Skip movement if file exist if file_source == file_target: logger.debug( f"Source and target paths are identical, skipping {file_source}", @@ -963,8 +1020,21 @@ def copy_files(config, filetypes, source, target): ) helpers.print_datetime(config) continue + files_to_be_moved.append({ + "movement_method": movement_method, + "file_source": file_source, + "file_target": file_target, + }) + + # To avoid overwriting in general experiment folder + if skip_intermediate == True: + file_target = avoid_overwriting( + config, file_source, file_target + ) + + # Execute movement movement_method(file_source, file_target) - # shutil.copy2(file_source, file_target) + successful_files.append(file_source) except IOError: logger.error( @@ -988,6 +1058,57 @@ def copy_files(config, filetypes, source, target): return config +def avoid_overwriting(config, source, target): + """ + Function that appends the date stamp to ``target`` if the target already exists. + Additionally, if the target exists, it renames it with the previous run time stamp, + and creates a link named ``target`` that points at the target with the current time + stamp. + + Note + ---- + This function does not execute the file movement. + + Parameters + ---------- + config : dict + Simulation configuration + source : str + Path of the source of the file that will be copied/moved/linked + target : src + Path of the target of the file that will be copied/moved/linked + """ + if os.path.isfile(target): + if filecmp.cmp(source, target): + return target + + date_stamped_target = f"{target}_{config['general']['run_datestamp']}" + if os.path.isfile(date_stamped_target): + esm_parser.user_error( + "File movement conflict", + f"The file ``{date_stamped_target}`` already exists. Skipping movement:\n" + f"{source} -> {date_stamped_target}" + ) + return target + + if os.path.islink(target): + os.remove(target) + else: + os.rename(target, f"{target}_{config['general']['last_run_datestamp']}") + + os.symlink(date_stamped_target, target) + target = date_stamped_target + + elif os.path.isdir(target): + esm_parser.user_error( + "File operation not supported", + f"The target ``{target}`` is a folder, and this should not be happening " + "here. Please, open an issue in www.github.com/esm-tools/esm_tools" + ) + + return target + + def filter_allowed_missing_files(config): """ Filters the general.files_missing_when_preparing_run dictionary to move any diff --git a/src/esm_runscripts/prepcompute.py b/src/esm_runscripts/prepcompute.py index a8bc4d939..51b4f249e 100644 --- a/src/esm_runscripts/prepcompute.py +++ b/src/esm_runscripts/prepcompute.py @@ -212,6 +212,17 @@ def wait_for_iterative_coupling(config): def copy_files_to_thisrun(config): + """ + This function was used to copy to intermediate folders in the past. Now the + ``copy_files`` function used within, in all file movements, might escape moving + files to the intermediate folders, and move them directly to ``work`` if the file + type of the file is not included in the variable ``general.intermediate_movements``. + + This is a fast fix, pretty ugly, but works. The reason for not making it better is + that we are reworking the whole file movement logic, so it is not worth the time to + do a partial rework here. + """ + logger.debug("PREPARING EXPERIMENT") # Copy files: logger.debug("\n" "- File lists populated, proceeding with copy...") diff --git a/src/esm_tests/__init__.py b/src/esm_tests/__init__.py index a2c08e5e7..1458ec4af 100644 --- a/src/esm_tests/__init__.py +++ b/src/esm_tests/__init__.py @@ -2,7 +2,7 @@ __author__ = """Miguel Andres-Martinez""" __email__ = "miguel.andres-martinez@awi.de" -__version__ = "6.30.0" +__version__ = "6.31.0" from .initialization import * from .read_shipped_data import * diff --git a/src/esm_tools/__init__.py b/src/esm_tools/__init__.py index 8c98e71ad..5ebe92a84 100644 --- a/src/esm_tools/__init__.py +++ b/src/esm_tools/__init__.py @@ -23,7 +23,7 @@ __author__ = """Dirk Barbi, Paul Gierz""" __email__ = "dirk.barbi@awi.de" -__version__ = "6.30.0" +__version__ = "6.31.0" import functools import inspect diff --git a/src/esm_utilities/__init__.py b/src/esm_utilities/__init__.py index 2de28cb51..471a808e9 100644 --- a/src/esm_utilities/__init__.py +++ b/src/esm_utilities/__init__.py @@ -2,6 +2,6 @@ __author__ = """Paul Gierz""" __email__ = "pgierz@awi.de" -__version__ = "6.30.0" +__version__ = "6.31.0" from .utils import *