# This is a proof-of-concept checksum policy implemented in iRODS rules. # # Whenever a data object replica is created or modified in any way, a checksum # is calculated for the object and stored in the catalog. @include 'json' # # CHECKSUMS # # Ensures that all of the replcias of the given data object have a checksum # # Parameters: # *DataPath (string) the absolute path to the data object # _ensureReplicasChecksum(*DataPath) { msiAddKeyValToMspStr('ChksumAll', '', *opts); if (errormsg(msiDataObjChksum(*DataPath, *opts, *_), *err) < 0) { writeLine('serverLog', 'Failed to generate checksums for the replicas of *DataPath (*err)'); } } # Ensures that all of the replicas of the given data object on the given storage # resource have a checksum # # Parameters: # *DataPath (string) the absolute path to the data object # *RescHier (string) the resource hierarchy of the storage resource, if empty # all replicas will be checksummed. # _ensureReplicasChecksum(*DataPath, *RescHier) { if (*RescHier == '') { _ensureReplicasChecksum(*DataPath); } else { msiSplitPath(*DataPath, *collPath, *dataName); foreach ( *rec in SELECT DATA_REPL_NUM WHERE COLL_NAME == *collPath AND DATA_NAME == *dataName AND DATA_RESC_HIER == *RescHier ) { msiAddKeyValToMspStr('replNum', *rec.DATA_REPL_NUM, *opts); if (errormsg(msiDataObjChksum(*DataPath, *opts, *_), *err) < 0) { writeLine( 'serverLog', 'Failed to generate checksums for the replicas of *DataPath on *RescHier (*err)' ); } } } } ## PEP SUPPORTING FUNCTIONS AND RULES ## # Tests to see if the given map contains the given key # #_hasKey : `KEYVALPAIR_MS_T` * string -> bool _hasKey(*KVMap, *Key) = errorcode(*KVMap.'*Key') == 0 # Retrieves the value of the given key from the given map. If the key isn't # found, it returns the empty string. # #_getValue : `KEYVALPAIR_MS_T` * string -> string _getValue(*KVMap, *Key) = if _hasKey(*KVMap, *Key) then *KVMap.'*Key' else '' # Using an API PEP's data object operation input map, it determines if # data object(s) will need a checksum. # #_needsChecksum : `KEYVALPAIR_MS_T` -> bool _needsChecksum(*DataObjOpInp) = !_hasKey(*DataObjOpInp, 'regChksum') && !_hasKey(*DataObjOpInp, 'verifyChksum') # Indicates that a file was created # # _FILE_CREATE : string _FILE_CREATE = '1' # Indicates that a file was opened for writing # # _FILE_OPEN_WRITE : string _FILE_OPEN_WRITE = '3' # Indicates that a replica's open mode is 'r', i.e., read-only # # _OPEN_FLAG_R : string _OPEN_FLAG_R = '0' # Indicates that a replica's open mode is 'r+' or 'a+' no create, i.e., # read-write, where writes append. # # _OPEN_FLAG_RP : string _OPEN_FLAG_RP = '2' # Indicates that a replicas's open mode is 'w' no create, i.e., write-only, # where the replica is truncated. # # _OPEN_FLAG_W : string _OPEN_FLAG_W = '513' # Indicates that a replica's open mode is 'w' create, i.e., write-only, where # the replica need not exist, but if it does, it will be truncated. # # _OPEN_FLAG_W_CREATE : string _OPEN_FLAG_W_CREATE = '577' # Indicates that a replica's open mode is 'w+' no create, i.e., read-write, # where the replica is truncated. # # _OPEN_FLAG_WP : string _OPEN_FLAG_WP = '514' # Indicates that a replica's open mode is 'w+' create, i.e., read-write, where # the replica need not exist, but if it does, it will be truncated. # # _OPEN_FLAG_WP_CREATE : string _OPEN_FLAG_WP_CREATE = '578' # Indicates that a replica's open mode is 'a' no create, i.e., write-only, where # writes append. # # _OPEN_FLAG_A : string _OPEN_FLAG_A = '1' # Indicates that a replica's open mode is 'a' create, i.e., write-only, where # the replica need not exist and writes append. # # _OPEN_FLAG_A_CREATE : string _OPEN_FLAG_A_CREATE = '65' # Indicates that a replica's open mode is 'a+' create, i.e., read-write, where # the replica need not exist and writes append. # # _OPEN_FLAG_AP_CREATE : string _OPEN_FLAG_AP_CREATE = '66' # Determines if a data object was truncted on open. # # Parameters: # *OpenFlags the open flag set # # _replTruncated: string -> bool _replTruncated(*OpenFlags) = *OpenFlags == _OPEN_FLAG_W || *OpenFlags == _OPEN_FLAG_W_CREATE || *OpenFlags == _OPEN_FLAG_WP || *OpenFlags == _OPEN_FLAG_WP_CREATE ## PEPS ## # ALGORITHM: # # If neither *BulkOpInp.regChksum nor *BulkOpInp.verifyChksum exist, calculate # the checksum of replica on *BulkOpInp.resc_hier for each entry of # *BulkOpInp.logical_path. # # *BulkOpInp: # https://docs.irods.org/4.2.10/doxygen/group__data__object.html#gafeecbd87f6ba164e8c1d189c42a8c93e # # N.B. This can be triggered by `iput -b -r`. # N.B. `-k` adds `regChksum` to BulkOpInp. # N.B. `-K` adds `verifyChksum` to BUKOPRINP. # N.B. Overwriting a replica that has a checksum clears the checksum. # N.B. `-X` handled transparently # N.B. large files are not passed through rcBulkDataObjPut # pep_api_bulk_data_obj_put_post(*Instance, *Comm, *BulkOpInp, *BulkOpInpBBuf) { if (_needsChecksum(*BulkOpInp)) { foreach (*key in *BulkOpInp) { if (*key like 'logical_path_*') { _ensureReplicasChecksum(_getValue(*BulkOpInp, *key), _getValue(*BulkOpInp, 'resc_hier')); } } } } # ALGORITHM: # # If neither *DataObjCopyInp.regChksum nor *DataObjCopyInp.verifyChksum exist, # calculate the checksum of *DataObjCopyInp.dst_obj_path on # *DataObjCopyInp.dst_resc_hier. # *DataObjCopyInp: # https://docs.irods.org/4.2.10/doxygen/group__data__object.html#gaad62fbc609d67726e15e7330bbbdf98d # pep_api_data_obj_copy_post(*Instance, *Comm, *DataObjCopyInp, *TransStat) { *destPath = _getValue(*DataObjCopyInp, 'dst_obj_path'); if (*destPath == '') { failmsg( -1, 'Could not determine path to created data object, (DataObjCopyInp = *DataObjCopyInp)' ); } else { if (_needsChecksum(*DataObjCopyInp)) { _ensureReplicasChecksum(*destPath, _getValue(*DataObjCopyInp, 'dst_resc_hier')); } } } # data_obj_create and data_obj_close are used together # # ALGORITHM: # # Always compute checksum. Store the path to the data object and the selected # resource hierarchy for its replica in temporyStorage using the keys # `dataObjClose_objPath` and `dataObjClose_selectedHierarchy`, respectively. # Also, set the temporaryStorage key `dataObjClose_needsChecksum` to some value. # `data_obj_close` will use the existence of this key and the other two values # to compute the checksum of the indicated replica. # *DataObjInp: # https://docs.irods.org/4.2.10/doxygen/group__data__object.html#gab5b8db16a4951cf048e88c8538d8aa56 # pep_api_data_obj_create_post(*Instance, *Comm, *DataObjInp) { temporaryStorage.dataObjClose_objPath = _getValue(*DataObjInp, 'obj_path'); temporaryStorage.dataObjClose_selectedHierarchy = _getValue(*DataObjInp, 'selected_hierarchy'); temporaryStorage.dataObjClose_needsChecksum = 'checksum'; } # data_obj_open, data_obj_write, and data_obj_close are used together # # How *DataObjInp.open_flags maps to the open mode, and what this means when # combined with the value of *DataObjInp.openType. # # case open_flags) # 'r': do nothing # 'r+': a write is possible # 'w': # no create: truncated # create: created or truncated # 'w+': # no create: truncated # create: created or truncated # 'a': # no create: a write is possible # create: possibly created and a write is possible # 'a+': # no create: indistinct from r+ # create: possibly created and a write is possible # # CHECKSUM ALGORITHM: # # A checksum can only be computed after the replica has been modified, so this # needs to happen in data_obj_close. Only when a change occurs does a checksum # need to be computed. A change won't occur if the open mode was 'r'. If the # mode isn't 'r', store the data object's path and the resource holding its # replica for use by data_obj_close. A change definitely occured when a replica # is created or truncated, so if this happens, store a flag to let # `data_obj_close` know that it needs to perform a checksum. If a replica is # written to, it has also been modified, so `data_obj_write` needs to store a # flag to let `data_obj_close` know this has happened. # *DataObjInp: # https://docs.irods.org/4.2.10/doxygen/group__data__object.html#gab869f78a9d131b1e973d425cd1ebf1f2 # pep_api_data_obj_open_post(*Instance, *Comm, *DataObjInp) { *flags = _getValue(*DataObjInp, 'open_flags'); if (*flags != _OPEN_FLAG_R) { temporaryStorage.dataObjClose_objPath = _getValue(*DataObjInp, 'obj_path'); temporaryStorage.dataObjClose_selectedHierarchy = _getValue(*DataObjInp, 'selected_hierarchy'); if (_getValue(*DataObjInp, 'openType') == _FILE_CREATE) { temporaryStorage.dataObjClose_needsChecksum = 'checksum'; } else if (_replTruncated(*flags)) { temporaryStorage.dataObjClose_needsChecksum = 'checksum'; } } } # *DataObjWriteInp: # https://docs.irods.org/4.2.10/doxygen/group__data__object.html#gaaa88dd8ad00161d5c48115bebbe6866c # pep_api_data_obj_write_post(*Instance, *Comm, *DataObjWriteInp, *DataObjWriteInpBBuf) { temporaryStorage.dataObjClose_needsChecksum = 'checksum'; } # *DataObjCloseInp: # https://docs.irods.org/4.2.10/doxygen/group__data__object.html#ga9dcea65009d7cc49ed0106f88540f431 # pep_api_data_obj_close_post(*Instance, *Comm, *DataObjCloseInp) { *path = _getValue(temporaryStorage, 'dataObjClose_objPath'); if (*path != '') { if (_getValue(temporaryStorage, 'dataObjClose_needsChecksum') != '') { *resc = _getValue(temporaryStorage, 'dataObjClose_selectedHierarchy'); _ensureReplicasChecksum(*path, *resc); } temporaryStorage.dataObjClose_selectedHierarchy = ''; temporaryStorage.dataObjClose_needsChecksum = ''; temporaryStorage.dataObjClose_objPath = ''; } } # ALGORITHM: # # If neither *DataObjInp.regChksum nor *DataObjInp.verifyChksum exist, calculate # the checksum of *DataObjInp.obj_path on *DataObjInp.resc_hier. # # *DataObjInp: # https://docs.irods.org/4.2.10/doxygen/group__data__object.html#ga1b1d0d95bd1cbc6f07860d6f8174371f # pep_api_data_obj_put_post(*Instance, *Comm, *DataObjInp, *DataObjInpBBuf, *PORTAL_OPR_OUT) { *path = _getValue(*DataObjInp, 'obj_path'); if (*path == '') { failmsg(-1, 'Could not determine path to created data object, (DataObjInp = *DataObjInp)'); } else { if (_needsChecksum(*DataObjInp)) { _ensureReplicasChecksum(*path, _getValue(*DataObjInp, 'resc_hier')); } } } # ALGORITHM: # # If none of PhyPathRegInp.regRepl, PhyPathRegInp.regChksum, or # PhyPathRegInp.verifyChksum are set, calculate the checksum of replica of # PhyPathRegInp.obj_path on PhyPathRegInp.resc_hier. # pep_api_phy_path_reg_post(*Instance, *Comm, *PhyPathRegInp) { if (!_hasKey(*PhyPathRegInp, 'regRepl') && _needsChecksum(*PhyPathRegInp)) { _ensureReplicasChecksum( _getValue(*PhyPathRegInp, 'obj_path'), _getValue(*PhyPathRegInp, 'resc_hier') ); } } # replica_open and replica_close work together. # # ALGORITHM: # # When replica_open_post is called, if *DataObjInp.destRescName is defined, then # store it and *DataObjInp.obj_path in temporaryStorage. When replica_close_post # is called, if destRescName and obj_path are in temporaryStorage, and # *JSON_INPUT.buf.compute_checksum != true, compute the checksum of obj_path. # # N.B. These can be triggered by istream. # N.B. Only `istream write` needs to be considered. # N.B. This can create new, overwrite existing, modify existing, and append to # existing data objects. # N.B. This can target a specific resource, e.g., `istream write -R`, or # existing replica, e.g., `istream write -n`. # N.B. This can create a checksum, e.g., `istream write -k`. # N.B. When a data object with a checksum is overwritten, modified or appened # to, the checksum is cleared. # *DataObjInp: https://docs.irods.org/4.2.10/doxygen/structDataObjInp.html # pep_api_replica_open_post(*Instance, *Comm, *DataObjInp, *JSON_OUTPUT) { *path = _getValue(*DataObjInp, 'obj_path'); if (*path != '') { temporaryStorage.replica_dataObjPath = *path; temporaryStorage.replica_rescHier = _getValue(*DataObjInp, 'destRescName'); } } pep_api_replica_close_post(*Instance, *Comm, *JsonInput) { *path = _getValue(temporaryStorage, 'replica_dataObjPath'); if (*path != '') { *chksumComputed = match json_deserialize(*JsonInput.buf) with | json_deserialize_val(*input, *_) => match json_getValue(*input, 'compute_checksum') with | json_empty => false | json_bool(*v) => *v; if (!*chksumComputed) { _ensureReplicasChecksum(*path, _getValue(temporaryStorage, 'replica_rescHier')); } temporaryStorage.replica_rescHier = ''; temporaryStorage.replica_dataObjPath = ''; } } # ALGORITHM: # # Check to see if JSON_INFUT.buf.options.no_create is false. If it is, check to # see if neither options.replica_number nor options.leaf_resource_name is set. # If that's the case, check to see if the data object's 0 replica has a # checksum. If it doesn't compute its checksum. # pep_api_touch_post(*Instance, *Comm, *JsonInput) { *input = match json_deserialize(*JsonInput.buf) with | json_deserialize_val(*v, *_) => *v; *dataPath = match json_getValue(*input, 'logical_path') with | json_empty => '' | json_str(*s) => *s; if (*dataPath != '') { *options = json_getValue(*input, 'options'); *noCreate = match json_getValue(*options, 'no_create') with | json_empty => false | json_bool(*b) => *b; *replNumSet = match json_getValue(*options, 'replica_number') with | json_empty => false | json_num(*n) => true; *rescNameSet = match json_getValue(*options, 'leaf_resource_name') with | json_empty => false | json_str(*_) => true; if (!*noCreate && !*replNumSet && !*rescNameSet) { msiSplitPath(*dataPath, *collPath, *dataName); foreach ( *rec in SELECT DATA_CHECKSUM, DATA_RESC_HIER WHERE COLL_NAME = *collPath AND DATA_NAME = *dataName ) { if (*rec.DATA_CHECKSUM == '') { _ensureReplicasChecksum(*dataPath, *rec.DATA_RESC_HIER); } } } } } # N.B. These aren't used by iCommands or any official API, so let's not # implement them. # pep_api_bulk_data_obj_reg_post(*Instance, *Comm, *BulkDataObjRegInp, *BULK_DATA_OBJ_REG_OUT) {} pep_api_data_obj_create_and_stat_post(*Instance, *Comm, *DataObjInp, *OpenStat) {}