Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improvements on filemovements #1162

Merged
merged 15 commits into from
May 10, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions configs/components/oifs/oifs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1328,7 +1328,7 @@ choose_eternal_run_number:
${oifs.nx}
${oifs.ensemble_id};
${general.esm_function_dir}/components/oifs/change_rcf_date.sh
${thisrun_restart_in_dir}/
${thisrun_work_dir}/
${pseudo_initial_date!syear!smonth!sday}
${oifs.time_step}
${oifs.seconds_since_initial}
Expand All @@ -1347,7 +1347,7 @@ choose_general.standalone:
True:
slice_icml: "
${general.esm_function_dir}/components/oifs/slice_icmcl_file.sh
${thisrun_input_dir}/
${thisrun_work_dir}/
${icmcl_dir}/${icmcl_file}
${oifs.input_expid}
${start_date!syear!smonth!sday}
Expand Down
4 changes: 4 additions & 0 deletions configs/defaults/general.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
use_database: false
profile: False
intermediate_movements:
- "config"
- "bin"
- "input"
115 changes: 113 additions & 2 deletions src/esm_runscripts/filelists.py
Original file line number Diff line number Diff line change
Expand Up @@ -901,11 +901,43 @@ def resolve_symlinks(config, file_source):


def copy_files(config, filetypes, source, target):
"""
This function has a misleading name. It is not only used for copying, but also
for moving or linking, depending on what was specified for the particular file
or file type vie the ``file_movements``.

Note: when the ``target`` is ``thisrun`` (intermediate folders) check whether the
type of file is included in ``intermediate_movements``. If it's not, instead of
moving the file to the intermediate folder it moves it to ``work``. This is an
ugly fix to provide a fast solution to the problem that files are
copied/moved/linked twice unnecessarily, and this affects inmensely the performance
of high resolution simulations. A better fix is not made because ``filelists`` are
being entirely reworked, but the fix cannot wait.

mandresm marked this conversation as resolved.
Show resolved Hide resolved
Parameters
----------
config : dict
The general configuration
filetypes : list
List of file types to be copied/linked/moved
source : str
Specifies the source type, to be chosen between ``init``, ``thisrun``,
``work``.
target : str
Specifies the target type, to be chosen between ``init``, ``thisrun``,
``work``.
mandresm marked this conversation as resolved.
Show resolved Hide resolved
"""
logger.debug("\n::: Copying files")
helpers.print_datetime(config)

successful_files = []
missing_files = {}

intermediate_movements = config["general"].get(
"intermediate_movements",
[],
)

mandresm marked this conversation as resolved.
Show resolved Hide resolved
if source == "init":
text_source = "sources"
elif source == "thisrun":
Expand All @@ -918,11 +950,27 @@ def copy_files(config, filetypes, source, target):
elif target == "work":
text_target = "targets"

# Loop through the different filetypes (input, forcing, restart_in/out, ...)
files_to_be_moved = []
for filetype in [filetype for filetype in filetypes if not filetype == "ignore"]:
# Loop through the components
for model in config["general"]["valid_model_names"] + ["general"]:
# If there is a source of this file type in the model
if filetype + "_" + text_source in config[model]:
this_text_target = text_target
this_intermediate_movements = config[model].get(
"intermediate_movements", intermediate_movements
)
skip_intermediate = False
if filetype not in intermediate_movements:
if text_target == "intermediate":
this_text_target = "targets"
skip_intermediate = True
elif text_source == "intermediate":
continue
sourceblock = config[model][filetype + "_" + text_source]
targetblock = config[model][filetype + "_" + text_target]
targetblock = config[model][filetype + "_" + this_text_target]
# Loop through categories (file keys)
for category in sourceblock:
movement_method = get_method(
get_movement(config, model, category, filetype, source, target)
Expand All @@ -933,6 +981,7 @@ def copy_files(config, filetypes, source, target):
logger.debug(f"- source: {file_source}")
logger.debug(f"- target: {file_target}")
helpers.print_datetime(config)
# Skip movement if file exist
if file_source == file_target:
logger.debug(
f"Source and target paths are identical, skipping {file_source}",
Expand Down Expand Up @@ -963,8 +1012,21 @@ def copy_files(config, filetypes, source, target):
)
helpers.print_datetime(config)
continue
files_to_be_moved.append({
"movement_method": movement_method,
"file_source": file_source,
"file_target": file_target,
})

# To avoid overwriting in general experiment folder
if skip_intermediate == True:
file_target = avoid_overwriting(
config, file_source, file_target
)

# Execute movement
movement_method(file_source, file_target)
# shutil.copy2(file_source, file_target)

successful_files.append(file_source)
except IOError:
logger.error(
Expand All @@ -988,6 +1050,55 @@ def copy_files(config, filetypes, source, target):
return config


def avoid_overwriting(config, source, target):
"""
Function that appends the date stamp to ``target`` if the target already exists.
Additionally, if the target exists, it renames it with the previous run time stamp,
and creates a link named ``target`` that points at the target with the current time
stamp.

Note: This function does not execute the file movement.
mandresm marked this conversation as resolved.
Show resolved Hide resolved

Parameters
----------
config : dict
Simulation configuration
source : str
Path of the source of the file that will be copied/moved/linked
target : src
Path of the target of the file that will be copied/moved/linked
"""
if os.path.isfile(target):
if filecmp.cmp(source, target):
return target

date_stamped_target = f"{target}_{config['general']['run_datestamp']}"
if os.path.isfile(date_stamped_target):
mandresm marked this conversation as resolved.
Show resolved Hide resolved
esm_parser.user_note(
mandresm marked this conversation as resolved.
Show resolved Hide resolved
"File movement conflict",
f"The file ``{date_stamped_target}`` already exists. Skipping movement:\n"
f"{soucer} -> {date_stamped_target}"
mandresm marked this conversation as resolved.
Show resolved Hide resolved
)
return target

if os.path.islink(target):
os.remove(target)
else:
os.rename(target, f"{target}_{config['general']['last_run_datestamp']}")

os.symlink(date_stamped_target, target)
target = date_stamped_target

elif os.path.isdir(target):
esm_parser.user_error(
"File operation not supported",
f"The target ``{target}`` is a folder, and this should not be happening "
"here. Please, open an issue in www.github.com/esm-tools/esm_tools"
)

return target


def filter_allowed_missing_files(config):
"""
Filters the general.files_missing_when_preparing_run dictionary to move any
Expand Down
11 changes: 11 additions & 0 deletions src/esm_runscripts/prepcompute.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,17 @@ def wait_for_iterative_coupling(config):


def copy_files_to_thisrun(config):
"""
This function was used to copy to intermediate folders in the past. Now the
``copy_files`` function used within, in all file movements, might escape moving
files to the intermediate folders, and move them directly to ``work`` if the file
type of the file is not included in the variable ``general.intermediate_movements``.

This is a fast fix, pretty ugly, but works. The reason for not making it better is
that we are reworking the whole file movement logic, so it is not worth the time to
do a partial rework here.
"""

logger.debug("PREPARING EXPERIMENT")
# Copy files:
logger.debug("\n" "- File lists populated, proceeding with copy...")
Expand Down