Merge branch 'development' into add-np-checksum-integration-tests

UtrechtUniversity · May 6, 2024 · 8c051c1 · 8c051c1
2 parents db74f1c + 3f69191
commit 8c051c1
Show file tree

Hide file tree

Showing 19 changed files with 487 additions and 214 deletions.
diff --git a/.github/workflows/api-documentation.yml b/.github/workflows/api-documentation.yml
@@ -25,7 +25,7 @@ jobs:
 
       - name: Set up Python
         # setup-python stopped supporting Python 2.7, use https://github.com/MatteoH2O1999/setup-python
-        uses: MatteoH2O1999/setup-python@v3.2.0
+        uses: MatteoH2O1999/setup-python@v3.2.1
         with:
           python-version: ${{ matrix.python-version }}
           allow-build: info

diff --git a/.github/workflows/python2.yml b/.github/workflows/python2.yml
@@ -19,7 +19,7 @@ jobs:
 
       - name: Set up Python
         # setup-python stopped supporting Python 2.7, use https://github.com/MatteoH2O1999/setup-python
-        uses: MatteoH2O1999/setup-python@v3.2.0
+        uses: MatteoH2O1999/setup-python@v3.2.1
         with:
           python-version: ${{ matrix.python-version }}
           allow-build: info

diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
@@ -19,7 +19,7 @@ jobs:
 
       - name: Set up Python
         # setup-python stopped supporting Python 2.7, use https://github.com/MatteoH2O1999/setup-python
-        uses: MatteoH2O1999/setup-python@v3.2.0
+        uses: MatteoH2O1999/setup-python@v3.2.1
         with:
           python-version: ${{ matrix.python-version }}
           allow-build: info

diff --git a/groups.py b/groups.py
@@ -807,9 +807,28 @@ def removeExternalUser(ctx, username, userzone):
     return str(response.status_code)
 
 
-def rule_group_remove_external_user(rule_args, ctx, rei):
-    """Remove external user."""
-    log.write(ctx, removeExternalUser(ctx, rule_args[0], rule_args[1]))
+@rule.make(inputs=[0, 1], outputs=[])
+def rule_group_remove_external_user(ctx, username, userzone):
+    """Remove external user from EUS
+
+      :param ctx:      Combined type of a ctx and rei struct
+      :param username: Name of user to remove
+      :param userzone: Zone of user to remove
+
+      :returns:        HTTP status code of remove request, or "0"
+                       if insufficient permissions.
+   """
+    if user.is_admin(ctx):
+        ret = removeExternalUser(ctx, username, userzone)
+        ctx.writeLine("serverLog", "Status code for removing external user "
+                                   + username + "#" + userzone
+                                   + " : " + ret)
+        return ret
+    else:
+        ctx.writeLine("serverLog", "Cannot remove external user "
+                                   + username + "#" + userzone
+                                   + " : need admin permissions.")
+        return '0'
 
 
 @rule.make(inputs=[0], outputs=[1])

diff --git a/iiVault.r b/iiVault.r
@@ -69,8 +69,10 @@ iiIngestObject(*itemParent, *itemName, *itemIsCollection, *buffer, *error) {
 	} else {
 	    # Copy data object to vault and compute checksum.
 	    *resource = "";
-	    *err = errorcode(rule_resource_vault(*resource));
-	    *error = errorcode(msiDataObjCopy(*sourcePath, *destPath, "destRescName=" ++ *resource ++ "++++verifyChksum=", *status));
+		*numThreads = "";
+	    *err1 = errorcode(rule_resource_vault(*resource));
+		*err2 = errorcode(rule_vault_copy_numthreads(*numThreads));
+	    *error = errorcode(msiDataObjCopy(*sourcePath, *destPath, "destRescName=" ++ *resource ++ "++++numThreads=" ++ *numThreads ++ "++++verifyChksum=", *status));
 	    if (*error < 0) {
 		    *buffer.msg = "Failed to copy *sourcePath to *destPath";
 	    }
@@ -111,8 +113,10 @@ iiCopyObject(*itemParent, *itemName, *itemIsCollection, *buffer, *error) {
 		}
 	} else {
 		*resource = "";
-		*err = errorcode(rule_resource_research(*resource));
-		*error = errorcode(msiDataObjCopy(*sourcePath, *destPath, "destRescName=" ++ *resource ++ "++++verifyChksum=", *status));
+		*numThreads = "";
+		*err1 = errorcode(rule_resource_research(*resource));
+		*err2 = errorcode(rule_vault_copy_numthreads(*numThreads));
+	    *error = errorcode(msiDataObjCopy(*sourcePath, *destPath, "destRescName=" ++ *resource ++ "++++numThreads=" ++ *numThreads ++ "++++verifyChksum=", *status));
 		if (*error < 0) {
 			*buffer.msg = "Failed to copy *sourcePath to *destPath";
 		}

diff --git a/integration_tests.py b/integration_tests.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 """Integration tests for the development environment."""
 
-__copyright__ = 'Copyright (c) 2019-2023, Utrecht University'
+__copyright__ = 'Copyright (c) 2019-2024, Utrecht University'
 __license__   = 'GPLv3, see LICENSE'
 
 __all__ = ['rule_run_integration_tests']
@@ -11,7 +11,82 @@
 from util import collection, config, data_object, log, msi, resource, rule, user
 
 
+def _call_msvc_stat_vault(ctx, resc_name, data_path):
+    ret = msi.stat_vault(ctx, resc_name, data_path, '', '')
+    return (ret['arguments'][2], ret['arguments'][3])
+
+
+def _call_msvc_stat_vault_check_exc(ctx, resc_name, data_path):
+    """Verifies whether a call to the stat vault microservices raises an exception"""
+    try:
+        msi.stat_vault(ctx, resc_name, data_path, '', '')
+        return False
+    except Exception:
+        return True
+
+
+def _call_msvc_json_arrayops(ctx, jsonstr, val, ops, index, argument_index):
+    """Returns an output argument from the json_arrayops microservice"""
+    return ctx.msi_json_arrayops(jsonstr, val, ops, index)["arguments"][argument_index]
+
+
+def _call_msvc_json_objops(ctx, jsonstr, val, ops, argument_index):
+    """Returns an output argument from the json_objops microservice"""
+    return ctx.msi_json_objops(jsonstr, val, ops)["arguments"][argument_index]
+
+
 basic_integration_tests = [
+    {"name": "msvc.json_arrayops.add",
+     "test": lambda ctx: _call_msvc_json_arrayops(ctx, '["a", "b", "c"]', "d", "add", 0, 0),
+     "check": lambda x: x == '["a", "b", "c", "d"]'},
+    {"name": "msvc.json_arrayops.find_exist",
+     "test": lambda ctx: _call_msvc_json_arrayops(ctx, '["a", "b", "c"]', "b", "find", 0, 3),
+     "check": lambda x: x == 1},
+    {"name": "msvc.json_arrayops.find_notexist",
+     "test": lambda ctx: _call_msvc_json_arrayops(ctx, '["a", "b", "c"]', "d", "find", 0, 3),
+     "check": lambda x: x == -1},
+    {"name": "msvc.json_arrayops.get",
+     "test": lambda ctx: _call_msvc_json_arrayops(ctx, '["a", "b", "c"]', "", "get", 1, 1),
+     "check": lambda x: x == 'b'},
+    {"name": "msvc.json_arrayops.rm_exist",
+     "test": lambda ctx: _call_msvc_json_arrayops(ctx, '["a", "b", "c"]', "b", "rm", 0, 0),
+     "check": lambda x: x == '["a", "c"]'},
+    {"name": "msvc.json_arrayops.rm_notexist",
+     "test": lambda ctx: _call_msvc_json_arrayops(ctx, '["a", "b", "c"]', "d", "rm", 0, 0),
+     "check": lambda x: x == '["a", "b", "c"]'},
+    {"name": "msvc.json_arrayops.size",
+     "test": lambda ctx: _call_msvc_json_arrayops(ctx, '["a", "b", "c"]', "", "size", 0, 3),
+     "check": lambda x: x == 3},
+    {"name": "msvc.json_objops.add_notexist_empty",
+     "test": lambda ctx: _call_msvc_json_objops(ctx, '', msi.kvpair(ctx, "e", "f"), 'add',  0),
+     "check": lambda x: x == '{"e": "f"}'},
+    {"name": "msvc.json_objops.add_notexist_nonempty",
+     "test": lambda ctx: _call_msvc_json_objops(ctx, '{"a": "b"}', msi.kvpair(ctx, "e", "f"), 'add',  0),
+     "check": lambda x: x == '{"a": "b", "e": "f"}'},
+    {"name": "msvc.json_objops.add_exist_nonempty",
+     "test": lambda ctx: _call_msvc_json_objops(ctx, '{"a": "b"}', msi.kvpair(ctx, "e", "g"), 'add',  0),
+     "check": lambda x: x == '{"a": "b", "e": "g"}'},
+    {"name": "msvc.json_objops.get_exist",
+     "test": lambda ctx: _call_msvc_json_objops(ctx, '{"a": "b", "c": "d"}', msi.kvpair(ctx, "c", ""), 'get',  1),
+     "check": lambda x: str(x) == "(['c'], ['d'])"},
+    {"name": "msvc.json_objops.get_notexist",
+     "test": lambda ctx: _call_msvc_json_objops(ctx, '{"a": "b", "c": "d"}', msi.kvpair(ctx, "e", ""), 'get',  1),
+     "check": lambda x: str(x) == "(['e'], [''])"},
+    {"name": "msvc.json_objops.rm_exist",
+     "test": lambda ctx: _call_msvc_json_objops(ctx, '{"a": "b", "c": "d"}', msi.kvpair(ctx, "c", "d"), 'rm',  0),
+     "check": lambda x: x == '{"a": "b"}'},
+    {"name": "msvc.json_objops.rm_notexist",
+     "test": lambda ctx: _call_msvc_json_objops(ctx, '{"a": "b", "c": "d"}', msi.kvpair(ctx, "c", "e"), 'rm',  0),
+     "check": lambda x: x == '{"a": "b", "c": "d"}'},
+    {"name": "msvc.json_objops.set_notexist_empty",
+     "test": lambda ctx: _call_msvc_json_objops(ctx, '', msi.kvpair(ctx, "e", "f"), 'set',  0),
+     "check": lambda x: x == '{"e": "f"}'},
+    {"name": "msvc.json_objops.set_notexist_nonempty",
+     "test": lambda ctx: _call_msvc_json_objops(ctx, '{"a": "b"}', msi.kvpair(ctx, "e", "f"), 'set',  0),
+     "check": lambda x: x == '{"a": "b", "e": "f"}'},
+    {"name": "msvc.json_objops.set_exist_nonempty",
+     "test": lambda ctx: _call_msvc_json_objops(ctx, '{"a": "b"}', msi.kvpair(ctx, "e", "g"), 'set',  0),
+     "check": lambda x: x == '{"a": "b", "e": "g"}'},
     {"name": "msvc.msi_vault_stat.file",
      "test": lambda ctx: (_call_msvc_stat_vault(ctx, "dev001_1", "/var/lib/irods/Vault1_1/yoda/licenses/GNU General Public License v3.0.uri"),
                           _call_msvc_stat_vault(ctx, "dev001_2", "/var/lib/irods/Vault1_2/yoda/licenses/GNU General Public License v3.0.uri")),
@@ -65,6 +140,9 @@
     {"name":   "util.data_object.size",
      "test": lambda ctx: data_object.size(ctx, "/tempZone/home/research-initial/testdata/lorem.txt"),
      "check": lambda x: x == 1003240},
+    {"name":   "util.data_object.get_group_owners",
+     "test": lambda ctx: data_object.get_group_owners(ctx, "/tempZone/home/research-initial/testdata/lorem.txt"),
+     "check": lambda x: x == [['research-initial', 'tempZone']]},
     {"name":   "util.resource.exists.yes",
      "test": lambda ctx: resource.exists(ctx, "irodsResc"),
      "check": lambda x: x},

diff --git a/meta.py b/meta.py
@@ -305,7 +305,7 @@ def api_meta_clone_file(ctx, target_coll):
         return
 
     try:
-        msi.data_obj_copy(ctx, source_data, target_data, '', irods_types.BytesBuf())
+        data_object.copy(ctx, source_data, target_data)
     except msi.Error as e:
         raise api.Error('copy_failed', 'The metadata file could not be copied', str(e))
 

diff --git a/replication.py b/replication.py
@@ -96,7 +96,7 @@ def rule_replicate_batch(ctx, verbose, balance_id_min, balance_id_max, batch_siz
         minimum_timestamp = int(time.time() - config.async_replication_delay_time)
 
         log.write(ctx, "verbose = {}".format(verbose))
-        if verbose:
+        if print_verbose:
             log.write(ctx, "async_replication_delay_time = {} seconds".format(config.async_replication_delay_time))
             log.write(ctx, "max_rss = {} bytes".format(config.async_replication_max_rss))
             log.write(ctx, "dry_run = {}".format(dry_run))
@@ -153,7 +153,7 @@ def rule_replicate_batch(ctx, verbose, balance_id_min, balance_id_max, batch_siz
                 # Skip this one and go to the next data object to be replicated.
                 continue
 
-            # For totalization only count the dataobjects that are within the specified balancing range
+            # For totalization only count the data objects that are within the specified balancing range
             count += 1
             data_resc_name = row[4]
 
@@ -231,7 +231,7 @@ def rule_replicate_batch(ctx, verbose, balance_id_min, balance_id_max, batch_siz
 
 
 def is_replication_blocked_by_admin(ctx):
-    """Admin can put the replication process on a hold by adding a file called 'stop_replication' in collection /yoda/flags.
+    """Admin can put the replication process on hold by adding a file called 'stop_replication' in collection /yoda/flags.
 
     :param ctx: Combined type of a callback and rei struct
 

diff --git a/revision_strategies.py b/revision_strategies.py
@@ -11,7 +11,7 @@ def get_revision_strategy(strategy_name):
        object can be used to obtain information about the revision strategy.
 
        :param strategy_name: Name of the strategy ("A", B", "Simple"). See
-                             See https://github.com/UtrechtUniversity/yoda/blob/development/docs/design/processes/revisions.md
+                             https://github.com/UtrechtUniversity/yoda/blob/development/docs/design/processes/revisions.md
                              for an explanation.
 
        :returns: RevisionStrategy object

diff --git a/revision_utils.py b/revision_utils.py
@@ -1,21 +1,63 @@
 # -*- coding: utf-8 -*-
 """Utility functions for revision management."""
 
-__copyright__ = 'Copyright (c) 2019-2023, Utrecht University'
+__copyright__ = 'Copyright (c) 2019-2024, Utrecht University'
 __license__   = 'GPLv3, see LICENSE'
 
 
 import datetime
+import hashlib
 import os
 
 from revision_strategies import get_revision_strategy
-from util import constants, log
+from util import constants, log, pathutil
 
 
-def calculate_end_of_calendar_day(ctx):
-    """Calculate the unix timestamp for the end of the current day (Same as start of next day).
+def revision_eligible(max_size, data_obj_exists, size, path, groups, revision_store_exists):
+    """Determine whether can create a revision of given data object.
+
+    :param max_size:              Max size that file can be to create a revision (in bytes)
+    :param data_obj_exists:       Whether the data object exists
+    :param size:                  Size of the data object
+    :param path:                  Path to the given data object (for logging)
+    :param groups:                List of groups retrieved for this data object
+    :param revision_store_exists: Whether revision store for this group exists
+
+    :returns: 2-tuple containing True / False whether a revision should be created,
+              and the message (if this is a error condition)
+    """
+
+    if not data_obj_exists:
+        return False, "Data object <{}> was not found or path was collection".format(path)
+
+    if len(groups) == 0:
+        return False, "Cannot find owner of data object <{}>. It may have been removed. Skipping.".format(path)
+
+    if len(groups) > 1:
+        return False, "Cannot find unique owner of data object <{}>. Skipping.".format(path)
+
+    if not revision_store_exists:
+        return False, "Revision store collection does not exist for data object <{}>".format(path)
+
+    _, zone, _, _ = pathutil.info(path)
+
+    # A revision should not be created when the data object is too big,
+    # but this is not an error condition
+    if int(size) > max_size:
+        return False, ""
+
+    # Only create revisions for research space
+    if not path.startswith("/{}/home/{}".format(zone, constants.IIGROUPPREFIX)):
+        return False, ""
+
+    if pathutil.basename(path) in constants.UUBLOCKLIST:
+        return False, ""
+
+    return True, ""
 
-    :param ctx: Combined type of a callback and rei struct
+
+def calculate_end_of_calendar_day():
+    """Calculate the unix timestamp for the end of the current day (Same as start of next day).
 
     :returns: End of calendar day - Timestamp of the end of the current day
     """
@@ -26,10 +68,9 @@ def calculate_end_of_calendar_day(ctx):
     return int(tomorrow.strftime("%s"))
 
 
-def get_revision_store_path(ctx, zone, trailing_slash=False):
+def get_revision_store_path(zone, trailing_slash=False):
     """Produces the logical path of the revision store
 
-       :param ctx: Combined type of a callback and rei struct
        :param zone: zone name
        :param trailing_slash: Add a trailing slash (default: False)
 
@@ -47,7 +88,7 @@ def get_deletion_candidates(ctx, revision_strategy, revisions, initial_upper_tim
 
     :param ctx:                      Combined type of a callback and rei struct
     :param revision_strategy:        Revision strategy object
-    :param revisions:                List of revisions for a particular data object. Each revision is represented by a 3-tupel
+    :param revisions:                List of revisions for a particular data object. Each revision is represented by a 3-tuple
                                      (revision ID, modification time in epoch time, original path)
     :param initial_upper_time_bound: Initial upper time bound for first bucket
     :param verbose:                  Whether to print additional information for troubleshooting (boolean)
@@ -144,7 +185,7 @@ def revision_cleanup_prefilter(ctx, revisions_list, revision_strategy_name, verb
 
        :param ctx:                    Combined type of a callback and rei struct
        :param revisions_list:         List of versioned data objects. Each versioned data object is represented as a list of revisions,
-                                      with each revision represented as a 3-tupel (revision ID, modification time in epoch time, original
+                                      with each revision represented as a 3-tuple (revision ID, modification time in epoch time, original
                                       path)
        :param revision_strategy_name: Select a revision strategy based on a string ('A', 'B', 'Simple'). See
                                       https://github.com/UtrechtUniversity/yoda/blob/development/docs/design/processes/revisions.md
@@ -153,11 +194,44 @@ def revision_cleanup_prefilter(ctx, revisions_list, revision_strategy_name, verb
 
        :returns:                      List of versioned data objects, after prefiltered versioned data objects / revisions have been
                                       removed. Each versioned data object is represented as a list of revisions,
-                                      with each revision represented as a 3-tupel (revision ID, modification time in epoch time, original
+                                      with each revision represented as a 3-tuple (revision ID, modification time in epoch time, original
                                       path)
        """
     minimum_bucket_size = get_revision_strategy(revision_strategy_name).get_minimum_bucket_size()
     if verbose:
         log.write(ctx, "Removing following revisioned data objects in prefiltering for cleanup: "
                   + str([object for object in revisions_list if len(object) <= minimum_bucket_size]))
     return [object for object in revisions_list if len(object) > min(minimum_bucket_size, 1)]
+
+
+def get_resc(row):
+    """Get the resc id for a data object given the metadata provided (for revision job).
+
+    :param row: metadata for the data object
+
+    :returns: resc
+    """
+    info = row[3].split(',')
+    if len(info) == 2:
+        return info[0]
+
+    # Backwards compatibility with revision metadata created in v1.8 or earlier.
+    return row[3]
+
+
+def get_balance_id(row, path):
+    """Get the balance id for a data object given the metadata provided (for revision job).
+
+    :param row:  metadata for the data object
+    :param path: path to the data object
+
+    :returns: Balance id
+    """
+    info = row[3].split(',')
+    if len(info) == 2:
+        return int(info[1])
+
+    # Backwards compatibility with revision metadata created in v1.8 or earlier.
+    # Determine a balance_id for this dataobject based on its path.
+    # This will determine whether this dataobject will be taken into account in this job/range or another that is running parallel
+    return int(hashlib.md5(path.encode('utf-8')).hexdigest(), 16) % 64 + 1