Skip to content

Commit

Permalink
Merge branch 'development' into add-np-checksum-integration-tests
Browse files Browse the repository at this point in the history
  • Loading branch information
stsnel authored May 6, 2024
2 parents db74f1c + 3f69191 commit 8c051c1
Show file tree
Hide file tree
Showing 19 changed files with 487 additions and 214 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/api-documentation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:

- name: Set up Python
# setup-python stopped supporting Python 2.7, use https://github.com/MatteoH2O1999/setup-python
uses: MatteoH2O1999/setup-python@v3.2.0
uses: MatteoH2O1999/setup-python@v3.2.1
with:
python-version: ${{ matrix.python-version }}
allow-build: info
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/python2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:

- name: Set up Python
# setup-python stopped supporting Python 2.7, use https://github.com/MatteoH2O1999/setup-python
uses: MatteoH2O1999/setup-python@v3.2.0
uses: MatteoH2O1999/setup-python@v3.2.1
with:
python-version: ${{ matrix.python-version }}
allow-build: info
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:

- name: Set up Python
# setup-python stopped supporting Python 2.7, use https://github.com/MatteoH2O1999/setup-python
uses: MatteoH2O1999/setup-python@v3.2.0
uses: MatteoH2O1999/setup-python@v3.2.1
with:
python-version: ${{ matrix.python-version }}
allow-build: info
Expand Down
25 changes: 22 additions & 3 deletions groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -807,9 +807,28 @@ def removeExternalUser(ctx, username, userzone):
return str(response.status_code)


def rule_group_remove_external_user(rule_args, ctx, rei):
"""Remove external user."""
log.write(ctx, removeExternalUser(ctx, rule_args[0], rule_args[1]))
@rule.make(inputs=[0, 1], outputs=[])
def rule_group_remove_external_user(ctx, username, userzone):
"""Remove external user from EUS
:param ctx: Combined type of a ctx and rei struct
:param username: Name of user to remove
:param userzone: Zone of user to remove
:returns: HTTP status code of remove request, or "0"
if insufficient permissions.
"""
if user.is_admin(ctx):
ret = removeExternalUser(ctx, username, userzone)
ctx.writeLine("serverLog", "Status code for removing external user "
+ username + "#" + userzone
+ " : " + ret)
return ret
else:
ctx.writeLine("serverLog", "Cannot remove external user "
+ username + "#" + userzone
+ " : need admin permissions.")
return '0'


@rule.make(inputs=[0], outputs=[1])
Expand Down
12 changes: 8 additions & 4 deletions iiVault.r
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,10 @@ iiIngestObject(*itemParent, *itemName, *itemIsCollection, *buffer, *error) {
} else {
# Copy data object to vault and compute checksum.
*resource = "";
*err = errorcode(rule_resource_vault(*resource));
*error = errorcode(msiDataObjCopy(*sourcePath, *destPath, "destRescName=" ++ *resource ++ "++++verifyChksum=", *status));
*numThreads = "";
*err1 = errorcode(rule_resource_vault(*resource));
*err2 = errorcode(rule_vault_copy_numthreads(*numThreads));
*error = errorcode(msiDataObjCopy(*sourcePath, *destPath, "destRescName=" ++ *resource ++ "++++numThreads=" ++ *numThreads ++ "++++verifyChksum=", *status));
if (*error < 0) {
*buffer.msg = "Failed to copy *sourcePath to *destPath";
}
Expand Down Expand Up @@ -111,8 +113,10 @@ iiCopyObject(*itemParent, *itemName, *itemIsCollection, *buffer, *error) {
}
} else {
*resource = "";
*err = errorcode(rule_resource_research(*resource));
*error = errorcode(msiDataObjCopy(*sourcePath, *destPath, "destRescName=" ++ *resource ++ "++++verifyChksum=", *status));
*numThreads = "";
*err1 = errorcode(rule_resource_research(*resource));
*err2 = errorcode(rule_vault_copy_numthreads(*numThreads));
*error = errorcode(msiDataObjCopy(*sourcePath, *destPath, "destRescName=" ++ *resource ++ "++++numThreads=" ++ *numThreads ++ "++++verifyChksum=", *status));
if (*error < 0) {
*buffer.msg = "Failed to copy *sourcePath to *destPath";
}
Expand Down
80 changes: 79 additions & 1 deletion integration_tests.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
"""Integration tests for the development environment."""

__copyright__ = 'Copyright (c) 2019-2023, Utrecht University'
__copyright__ = 'Copyright (c) 2019-2024, Utrecht University'
__license__ = 'GPLv3, see LICENSE'

__all__ = ['rule_run_integration_tests']
Expand All @@ -11,7 +11,82 @@
from util import collection, config, data_object, log, msi, resource, rule, user


def _call_msvc_stat_vault(ctx, resc_name, data_path):
ret = msi.stat_vault(ctx, resc_name, data_path, '', '')
return (ret['arguments'][2], ret['arguments'][3])


def _call_msvc_stat_vault_check_exc(ctx, resc_name, data_path):
"""Verifies whether a call to the stat vault microservices raises an exception"""
try:
msi.stat_vault(ctx, resc_name, data_path, '', '')
return False
except Exception:
return True


def _call_msvc_json_arrayops(ctx, jsonstr, val, ops, index, argument_index):
"""Returns an output argument from the json_arrayops microservice"""
return ctx.msi_json_arrayops(jsonstr, val, ops, index)["arguments"][argument_index]


def _call_msvc_json_objops(ctx, jsonstr, val, ops, argument_index):
"""Returns an output argument from the json_objops microservice"""
return ctx.msi_json_objops(jsonstr, val, ops)["arguments"][argument_index]


basic_integration_tests = [
{"name": "msvc.json_arrayops.add",
"test": lambda ctx: _call_msvc_json_arrayops(ctx, '["a", "b", "c"]', "d", "add", 0, 0),
"check": lambda x: x == '["a", "b", "c", "d"]'},
{"name": "msvc.json_arrayops.find_exist",
"test": lambda ctx: _call_msvc_json_arrayops(ctx, '["a", "b", "c"]', "b", "find", 0, 3),
"check": lambda x: x == 1},
{"name": "msvc.json_arrayops.find_notexist",
"test": lambda ctx: _call_msvc_json_arrayops(ctx, '["a", "b", "c"]', "d", "find", 0, 3),
"check": lambda x: x == -1},
{"name": "msvc.json_arrayops.get",
"test": lambda ctx: _call_msvc_json_arrayops(ctx, '["a", "b", "c"]', "", "get", 1, 1),
"check": lambda x: x == 'b'},
{"name": "msvc.json_arrayops.rm_exist",
"test": lambda ctx: _call_msvc_json_arrayops(ctx, '["a", "b", "c"]', "b", "rm", 0, 0),
"check": lambda x: x == '["a", "c"]'},
{"name": "msvc.json_arrayops.rm_notexist",
"test": lambda ctx: _call_msvc_json_arrayops(ctx, '["a", "b", "c"]', "d", "rm", 0, 0),
"check": lambda x: x == '["a", "b", "c"]'},
{"name": "msvc.json_arrayops.size",
"test": lambda ctx: _call_msvc_json_arrayops(ctx, '["a", "b", "c"]', "", "size", 0, 3),
"check": lambda x: x == 3},
{"name": "msvc.json_objops.add_notexist_empty",
"test": lambda ctx: _call_msvc_json_objops(ctx, '', msi.kvpair(ctx, "e", "f"), 'add', 0),
"check": lambda x: x == '{"e": "f"}'},
{"name": "msvc.json_objops.add_notexist_nonempty",
"test": lambda ctx: _call_msvc_json_objops(ctx, '{"a": "b"}', msi.kvpair(ctx, "e", "f"), 'add', 0),
"check": lambda x: x == '{"a": "b", "e": "f"}'},
{"name": "msvc.json_objops.add_exist_nonempty",
"test": lambda ctx: _call_msvc_json_objops(ctx, '{"a": "b"}', msi.kvpair(ctx, "e", "g"), 'add', 0),
"check": lambda x: x == '{"a": "b", "e": "g"}'},
{"name": "msvc.json_objops.get_exist",
"test": lambda ctx: _call_msvc_json_objops(ctx, '{"a": "b", "c": "d"}', msi.kvpair(ctx, "c", ""), 'get', 1),
"check": lambda x: str(x) == "(['c'], ['d'])"},
{"name": "msvc.json_objops.get_notexist",
"test": lambda ctx: _call_msvc_json_objops(ctx, '{"a": "b", "c": "d"}', msi.kvpair(ctx, "e", ""), 'get', 1),
"check": lambda x: str(x) == "(['e'], [''])"},
{"name": "msvc.json_objops.rm_exist",
"test": lambda ctx: _call_msvc_json_objops(ctx, '{"a": "b", "c": "d"}', msi.kvpair(ctx, "c", "d"), 'rm', 0),
"check": lambda x: x == '{"a": "b"}'},
{"name": "msvc.json_objops.rm_notexist",
"test": lambda ctx: _call_msvc_json_objops(ctx, '{"a": "b", "c": "d"}', msi.kvpair(ctx, "c", "e"), 'rm', 0),
"check": lambda x: x == '{"a": "b", "c": "d"}'},
{"name": "msvc.json_objops.set_notexist_empty",
"test": lambda ctx: _call_msvc_json_objops(ctx, '', msi.kvpair(ctx, "e", "f"), 'set', 0),
"check": lambda x: x == '{"e": "f"}'},
{"name": "msvc.json_objops.set_notexist_nonempty",
"test": lambda ctx: _call_msvc_json_objops(ctx, '{"a": "b"}', msi.kvpair(ctx, "e", "f"), 'set', 0),
"check": lambda x: x == '{"a": "b", "e": "f"}'},
{"name": "msvc.json_objops.set_exist_nonempty",
"test": lambda ctx: _call_msvc_json_objops(ctx, '{"a": "b"}', msi.kvpair(ctx, "e", "g"), 'set', 0),
"check": lambda x: x == '{"a": "b", "e": "g"}'},
{"name": "msvc.msi_vault_stat.file",
"test": lambda ctx: (_call_msvc_stat_vault(ctx, "dev001_1", "/var/lib/irods/Vault1_1/yoda/licenses/GNU General Public License v3.0.uri"),
_call_msvc_stat_vault(ctx, "dev001_2", "/var/lib/irods/Vault1_2/yoda/licenses/GNU General Public License v3.0.uri")),
Expand Down Expand Up @@ -65,6 +140,9 @@
{"name": "util.data_object.size",
"test": lambda ctx: data_object.size(ctx, "/tempZone/home/research-initial/testdata/lorem.txt"),
"check": lambda x: x == 1003240},
{"name": "util.data_object.get_group_owners",
"test": lambda ctx: data_object.get_group_owners(ctx, "/tempZone/home/research-initial/testdata/lorem.txt"),
"check": lambda x: x == [['research-initial', 'tempZone']]},
{"name": "util.resource.exists.yes",
"test": lambda ctx: resource.exists(ctx, "irodsResc"),
"check": lambda x: x},
Expand Down
2 changes: 1 addition & 1 deletion meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ def api_meta_clone_file(ctx, target_coll):
return

try:
msi.data_obj_copy(ctx, source_data, target_data, '', irods_types.BytesBuf())
data_object.copy(ctx, source_data, target_data)
except msi.Error as e:
raise api.Error('copy_failed', 'The metadata file could not be copied', str(e))

Expand Down
6 changes: 3 additions & 3 deletions replication.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def rule_replicate_batch(ctx, verbose, balance_id_min, balance_id_max, batch_siz
minimum_timestamp = int(time.time() - config.async_replication_delay_time)

log.write(ctx, "verbose = {}".format(verbose))
if verbose:
if print_verbose:
log.write(ctx, "async_replication_delay_time = {} seconds".format(config.async_replication_delay_time))
log.write(ctx, "max_rss = {} bytes".format(config.async_replication_max_rss))
log.write(ctx, "dry_run = {}".format(dry_run))
Expand Down Expand Up @@ -153,7 +153,7 @@ def rule_replicate_batch(ctx, verbose, balance_id_min, balance_id_max, batch_siz
# Skip this one and go to the next data object to be replicated.
continue

# For totalization only count the dataobjects that are within the specified balancing range
# For totalization only count the data objects that are within the specified balancing range
count += 1
data_resc_name = row[4]

Expand Down Expand Up @@ -231,7 +231,7 @@ def rule_replicate_batch(ctx, verbose, balance_id_min, balance_id_max, batch_siz


def is_replication_blocked_by_admin(ctx):
"""Admin can put the replication process on a hold by adding a file called 'stop_replication' in collection /yoda/flags.
"""Admin can put the replication process on hold by adding a file called 'stop_replication' in collection /yoda/flags.
:param ctx: Combined type of a callback and rei struct
Expand Down
2 changes: 1 addition & 1 deletion revision_strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def get_revision_strategy(strategy_name):
object can be used to obtain information about the revision strategy.
:param strategy_name: Name of the strategy ("A", B", "Simple"). See
See https://github.com/UtrechtUniversity/yoda/blob/development/docs/design/processes/revisions.md
https://github.com/UtrechtUniversity/yoda/blob/development/docs/design/processes/revisions.md
for an explanation.
:returns: RevisionStrategy object
Expand Down
94 changes: 84 additions & 10 deletions revision_utils.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,63 @@
# -*- coding: utf-8 -*-
"""Utility functions for revision management."""

__copyright__ = 'Copyright (c) 2019-2023, Utrecht University'
__copyright__ = 'Copyright (c) 2019-2024, Utrecht University'
__license__ = 'GPLv3, see LICENSE'


import datetime
import hashlib
import os

from revision_strategies import get_revision_strategy
from util import constants, log
from util import constants, log, pathutil


def calculate_end_of_calendar_day(ctx):
"""Calculate the unix timestamp for the end of the current day (Same as start of next day).
def revision_eligible(max_size, data_obj_exists, size, path, groups, revision_store_exists):
"""Determine whether can create a revision of given data object.
:param max_size: Max size that file can be to create a revision (in bytes)
:param data_obj_exists: Whether the data object exists
:param size: Size of the data object
:param path: Path to the given data object (for logging)
:param groups: List of groups retrieved for this data object
:param revision_store_exists: Whether revision store for this group exists
:returns: 2-tuple containing True / False whether a revision should be created,
and the message (if this is a error condition)
"""

if not data_obj_exists:
return False, "Data object <{}> was not found or path was collection".format(path)

if len(groups) == 0:
return False, "Cannot find owner of data object <{}>. It may have been removed. Skipping.".format(path)

if len(groups) > 1:
return False, "Cannot find unique owner of data object <{}>. Skipping.".format(path)

if not revision_store_exists:
return False, "Revision store collection does not exist for data object <{}>".format(path)

_, zone, _, _ = pathutil.info(path)

# A revision should not be created when the data object is too big,
# but this is not an error condition
if int(size) > max_size:
return False, ""

# Only create revisions for research space
if not path.startswith("/{}/home/{}".format(zone, constants.IIGROUPPREFIX)):
return False, ""

if pathutil.basename(path) in constants.UUBLOCKLIST:
return False, ""

return True, ""

:param ctx: Combined type of a callback and rei struct

def calculate_end_of_calendar_day():
"""Calculate the unix timestamp for the end of the current day (Same as start of next day).
:returns: End of calendar day - Timestamp of the end of the current day
"""
Expand All @@ -26,10 +68,9 @@ def calculate_end_of_calendar_day(ctx):
return int(tomorrow.strftime("%s"))


def get_revision_store_path(ctx, zone, trailing_slash=False):
def get_revision_store_path(zone, trailing_slash=False):
"""Produces the logical path of the revision store
:param ctx: Combined type of a callback and rei struct
:param zone: zone name
:param trailing_slash: Add a trailing slash (default: False)
Expand All @@ -47,7 +88,7 @@ def get_deletion_candidates(ctx, revision_strategy, revisions, initial_upper_tim
:param ctx: Combined type of a callback and rei struct
:param revision_strategy: Revision strategy object
:param revisions: List of revisions for a particular data object. Each revision is represented by a 3-tupel
:param revisions: List of revisions for a particular data object. Each revision is represented by a 3-tuple
(revision ID, modification time in epoch time, original path)
:param initial_upper_time_bound: Initial upper time bound for first bucket
:param verbose: Whether to print additional information for troubleshooting (boolean)
Expand Down Expand Up @@ -144,7 +185,7 @@ def revision_cleanup_prefilter(ctx, revisions_list, revision_strategy_name, verb
:param ctx: Combined type of a callback and rei struct
:param revisions_list: List of versioned data objects. Each versioned data object is represented as a list of revisions,
with each revision represented as a 3-tupel (revision ID, modification time in epoch time, original
with each revision represented as a 3-tuple (revision ID, modification time in epoch time, original
path)
:param revision_strategy_name: Select a revision strategy based on a string ('A', 'B', 'Simple'). See
https://github.com/UtrechtUniversity/yoda/blob/development/docs/design/processes/revisions.md
Expand All @@ -153,11 +194,44 @@ def revision_cleanup_prefilter(ctx, revisions_list, revision_strategy_name, verb
:returns: List of versioned data objects, after prefiltered versioned data objects / revisions have been
removed. Each versioned data object is represented as a list of revisions,
with each revision represented as a 3-tupel (revision ID, modification time in epoch time, original
with each revision represented as a 3-tuple (revision ID, modification time in epoch time, original
path)
"""
minimum_bucket_size = get_revision_strategy(revision_strategy_name).get_minimum_bucket_size()
if verbose:
log.write(ctx, "Removing following revisioned data objects in prefiltering for cleanup: "
+ str([object for object in revisions_list if len(object) <= minimum_bucket_size]))
return [object for object in revisions_list if len(object) > min(minimum_bucket_size, 1)]


def get_resc(row):
"""Get the resc id for a data object given the metadata provided (for revision job).
:param row: metadata for the data object
:returns: resc
"""
info = row[3].split(',')
if len(info) == 2:
return info[0]

# Backwards compatibility with revision metadata created in v1.8 or earlier.
return row[3]


def get_balance_id(row, path):
"""Get the balance id for a data object given the metadata provided (for revision job).
:param row: metadata for the data object
:param path: path to the data object
:returns: Balance id
"""
info = row[3].split(',')
if len(info) == 2:
return int(info[1])

# Backwards compatibility with revision metadata created in v1.8 or earlier.
# Determine a balance_id for this dataobject based on its path.
# This will determine whether this dataobject will be taken into account in this job/range or another that is running parallel
return int(hashlib.md5(path.encode('utf-8')).hexdigest(), 16) % 64 + 1
Loading

0 comments on commit 8c051c1

Please sign in to comment.