diff --git a/CHANGELOG b/CHANGELOG index 55366f1e4e5f..1bbaadc4e631 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,8 @@ devel ----- +* added "random" masking to mask any data type, added wildcard masking + * report run-time openssl version (for dynamically linked executables) * added greeting warning about maintainer mode diff --git a/Documentation/Books/Manual/Programs/Arangodump/Maskings.md b/Documentation/Books/Manual/Programs/Arangodump/Maskings.md index 627c92167c15..341b40ab9953 100644 --- a/Documentation/Books/Manual/Programs/Arangodump/Maskings.md +++ b/Documentation/Books/Manual/Programs/Arangodump/Maskings.md @@ -109,6 +109,8 @@ Path `path` defines which field to obfuscate. There can only be a single path per masking, but an unlimited amount of maskings per collection. +Top-level **system attributes** (`_key`, `_from`, etc.) are +never masked. To mask a top-level attribute value, the path is simply the attribute name, for instance `"name"` to mask the value `"foobar"`: @@ -149,8 +151,11 @@ is not a leaf attribute. If the attribute value is an **array** then the masking is applied to **all array elements individually**. -If you have an attribute name that contains a dot, you need to quote the -name with either a tick or a backtick. For example: +If the path is `"*"` then it matches all leaf attribute. + +If you have an attribute name that contains a dot or a top-level attribute +with a single asterisk as name (`"*": ...`) then you need to quote the name in +ticks or backticks to escape it. For example: "path": "´name.with.dots´" @@ -219,6 +224,8 @@ get masked if there is only a masking configured for the attribute `email` but not its nested attributes. {% endhint %} +You can use the special path `"*"` to match all leaf attributes. + **Examples** Masking `email` with the _Xify Front_ function will convert: @@ -280,7 +287,6 @@ including the elements of the sub-array. The document: To mask the email address, you could use the paths `email.address` or `.address`. - Masking Functions ----------------- @@ -298,17 +304,19 @@ The following masking functions are only available in the - [Phone Number](#phone-number) - [Email Address](#email-address) -The masking function: +The masking functions: - [Random String](#random-string) +- [Random](#random) -… is available in the Community Edition as well as the Enterprise Edition. +… are available in the Community Edition as well as the Enterprise Edition. ### Random String -This masking type will replace all values of attributes with key -`name` with an anonymized string. It is not guaranteed that the string -will be of the same length. +This masking type will replace all values of attributes whose values are strings +with key `name` with an anonymized string. It is not guaranteed that the +string will be of the same length. Attribute whose values are not strings +are not modified. A hash of the original string is computed. If the original string is shorter then the hash will be used. This will result in a longer @@ -391,6 +399,12 @@ A document like: } ``` +### Random + +This masking type works like random string for attributes with string +values. Values Attributes with integer, decimal or boolean values are +replaced by random integers, decimals or boolean. + ### Xify Front This masking type replaces the front characters with `x` and diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index a6d24ce8032c..26dc37b4b83f 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -237,6 +237,7 @@ add_library(${LIB_ARANGO} STATIC Maskings/Collection.cpp Maskings/Maskings.cpp Maskings/Path.cpp + Maskings/RandomMask.cpp Maskings/RandomStringMask.cpp ProgramOptions/Option.cpp ProgramOptions/ProgramOptions.cpp diff --git a/lib/Maskings/AttributeMasking.cpp b/lib/Maskings/AttributeMasking.cpp index c95e843b922e..79727e1e1d68 100644 --- a/lib/Maskings/AttributeMasking.cpp +++ b/lib/Maskings/AttributeMasking.cpp @@ -25,12 +25,14 @@ #include "Basics/StringUtils.h" #include "Logger/Logger.h" #include "Maskings/RandomStringMask.h" +#include "Maskings/RandomMask.h" using namespace arangodb; using namespace arangodb::maskings; void arangodb::maskings::InstallMaskings() { AttributeMasking::installMasking("randomString", RandomStringMask::create); + AttributeMasking::installMasking("random", RandomMask::create); } std::unordered_map (*)(Path, Maskings*, VPackSlice const&)> AttributeMasking::_maskings; diff --git a/lib/Maskings/Maskings.cpp b/lib/Maskings/Maskings.cpp index fec258bfc0f9..5d84617fbbf9 100644 --- a/lib/Maskings/Maskings.cpp +++ b/lib/Maskings/Maskings.cpp @@ -180,17 +180,14 @@ VPackValue Maskings::maskedItem(Collection& collection, std::vector std::string& buffer, VPackSlice const& data) { static std::string xxxx("xxxx"); - if (path.size() == 1) { - if (path[0] == "_key" || path[0] == "_id" || path[0] == "_rev" || - path[0] == "_from" || path[0] == "_to") { - if (data.isString()) { - velocypack::ValueLength length; - char const* c = data.getString(length); - buffer = std::string(c, length); - return VPackValue(buffer); - } else if (data.isInteger()) { - return VPackValue(data.getInt()); - } + if (path.size() == 1 && path[0].size() >= 1 && path[0][0] == '_') { + if (data.isString()) { + velocypack::ValueLength length; + char const* c = data.getString(length); + buffer = std::string(c, length); + return VPackValue(buffer); + } else if (data.isInteger()) { + return VPackValue(data.getInt()); } } diff --git a/lib/Maskings/Path.cpp b/lib/Maskings/Path.cpp index bd51c91e25a6..aa978f3a3008 100644 --- a/lib/Maskings/Path.cpp +++ b/lib/Maskings/Path.cpp @@ -35,6 +35,12 @@ ParseResult Path::parse(std::string const& def) { "path must not be empty"); } + std::vector components; + + if (def == "*") { + return ParseResult(Path(false, true, components)); + } + bool wildcard = false; if (def[0] == '.') { @@ -50,7 +56,6 @@ ParseResult Path::parse(std::string const& def) { U8_NEXT(p, off, len, ch); } - std::vector components; std::string buffer; while (off < len) { @@ -68,7 +73,7 @@ ParseResult Path::parse(std::string const& def) { components.push_back(buffer); buffer.clear(); - } else if (ch == 96 || ch == 180) { // windows does not like U'`' and U'´' + } else if (ch == 96 || ch == 180) { // windows does not like U'`' and U'´' UChar32 quote = ch; U8_NEXT(p, off, len, ch); @@ -116,13 +121,17 @@ ParseResult Path::parse(std::string const& def) { "path '" + def + "' contains no component"); } - return ParseResult(Path(wildcard, components)); + return ParseResult(Path(wildcard, false, components)); } bool Path::match(std::vector const& path) const { size_t cs = _components.size(); size_t ps = path.size(); + if (_any) { + return true; + } + if (!_wildcard) { if (ps != cs) { return false; diff --git a/lib/Maskings/Path.h b/lib/Maskings/Path.h index 6e9816b46ea0..cfb2ae306c53 100644 --- a/lib/Maskings/Path.h +++ b/lib/Maskings/Path.h @@ -36,13 +36,14 @@ class Path { public: Path() : _wildcard(false) {} - Path(bool wildcard, std::vector const& components) - : _wildcard(wildcard), _components(components) {} + Path(bool wildcard, bool any, std::vector const& components) + : _wildcard(wildcard), _any(any), _components(components) {} bool match(std::vector const& path) const; private: bool _wildcard; + bool _any; std::vector _components; }; } // namespace maskings diff --git a/lib/Maskings/RandomMask.cpp b/lib/Maskings/RandomMask.cpp new file mode 100644 index 000000000000..a79da86b778b --- /dev/null +++ b/lib/Maskings/RandomMask.cpp @@ -0,0 +1,55 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2018 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Frank Celler +//////////////////////////////////////////////////////////////////////////////// + +#include "RandomMask.h" + +#include "Maskings/Maskings.h" +#include "Random/RandomGenerator.h" + +using namespace arangodb; +using namespace arangodb::maskings; + +ParseResult RandomMask::create(Path path, Maskings* maskings, + VPackSlice const&) { + return ParseResult(AttributeMasking(path, new RandomMask(maskings))); +} + +VPackValue RandomMask::mask(bool value, std::string&) const { + int64_t result = + RandomGenerator::interval(static_cast(0), static_cast(1)); + + return VPackValue(result % 2 == 0); +} + +VPackValue RandomMask::mask(int64_t, std::string&) const { + int64_t result = RandomGenerator::interval(static_cast(-1000), + static_cast(1000)); + + return VPackValue(result); +} + +VPackValue RandomMask::mask(double, std::string&) const { + int64_t result = RandomGenerator::interval(static_cast(-1000), + static_cast(1000)); + + return VPackValue(1.0 * result / 100); +} diff --git a/lib/Maskings/RandomMask.h b/lib/Maskings/RandomMask.h new file mode 100644 index 000000000000..910d2816bdfd --- /dev/null +++ b/lib/Maskings/RandomMask.h @@ -0,0 +1,45 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2018 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Frank Celler +//////////////////////////////////////////////////////////////////////////////// + +#ifndef ARANGODB_MASKINGS_ATTRIBUTE_RANDOM_MASK_H +#define ARANGODB_MASKINGS_ATTRIBUTE_RANDOM_MASK_H 1 + +#include "Maskings/RandomStringMask.h" + +namespace arangodb { +namespace maskings { +class RandomMask : public RandomStringMask { + public: + static ParseResult create(Path, Maskings*, VPackSlice const& def); + + public: + VPackValue mask(bool, std::string& buffer) const override; + VPackValue mask(int64_t, std::string& buffer) const override; + VPackValue mask(double, std::string& buffer) const override; + + private: + explicit RandomMask(Maskings* maskings) : RandomStringMask(maskings) {} +}; +} // namespace maskings +} // namespace arangodb + +#endif diff --git a/lib/Maskings/RandomStringMask.cpp b/lib/Maskings/RandomStringMask.cpp index 9bff26354acf..3e8c347c8ce3 100644 --- a/lib/Maskings/RandomStringMask.cpp +++ b/lib/Maskings/RandomStringMask.cpp @@ -26,8 +26,6 @@ #include "Basics/fasthash.h" #include "Maskings/Maskings.h" -static std::string const xxxx("xxxx"); - using namespace arangodb; using namespace arangodb::maskings; diff --git a/lib/Maskings/RandomStringMask.h b/lib/Maskings/RandomStringMask.h index fdf745263a51..8a8c26e7677f 100644 --- a/lib/Maskings/RandomStringMask.h +++ b/lib/Maskings/RandomStringMask.h @@ -39,7 +39,7 @@ class RandomStringMask : public MaskingFunction { VPackValue mask(int64_t, std::string& buffer) const override; VPackValue mask(double, std::string& buffer) const override; - private: + protected: explicit RandomStringMask(Maskings* maskings) : MaskingFunction(maskings) {} }; } // namespace maskings diff --git a/tests/js/server/shell/shell-database-mmfiles-noncluster.js b/tests/js/server/shell/shell-database-mmfiles-noncluster.js index cca308f07e6c..d1db16912207 100644 --- a/tests/js/server/shell/shell-database-mmfiles-noncluster.js +++ b/tests/js/server/shell/shell-database-mmfiles-noncluster.js @@ -72,7 +72,8 @@ function DatabaseSuite () { "icu-version", "jemalloc", "maintainer-mode", - "openssl-version", + "openssl-version-run-time", + "openssl-version-compile-time", "platform", "reactor-type", "rocksdb-version", diff --git a/tests/js/server/shell/shell-database-rocksdb-noncluster.js b/tests/js/server/shell/shell-database-rocksdb-noncluster.js index 859e8370e961..67646737a9b5 100644 --- a/tests/js/server/shell/shell-database-rocksdb-noncluster.js +++ b/tests/js/server/shell/shell-database-rocksdb-noncluster.js @@ -72,7 +72,8 @@ function DatabaseSuite () { "icu-version", "jemalloc", "maintainer-mode", - "openssl-version", + "openssl-version-run-time", + "openssl-version-compile-time", "platform", "reactor-type", "rocksdb-version",