Skip to content

Commit

Permalink
Add redactor utility
Browse files Browse the repository at this point in the history
Redaction is about keeping sensitive data contained within result sets
and not allowing it to leak out into logs or the web ui. This utility
will be used in future patches.

Change-Id: I763db090ab5389c188dd291729b07215c1208560
Reviewed-on: http://gerrit.sjc.cloudera.com:8080/5947
Reviewed-by: Casey Ching <casey@cloudera.com>
Tested-by: jenkins
  • Loading branch information
casey authored and jenkins committed Feb 6, 2015
1 parent b9fb6a8 commit 803a95c
Show file tree
Hide file tree
Showing 7 changed files with 845 additions and 0 deletions.
4 changes: 4 additions & 0 deletions be/src/util/CMakeLists.txt
Expand Up @@ -57,6 +57,7 @@ add_library(Util
# TODO: not supported on RHEL 5
# perf-counters.cc
progress-updater.cc
redactor.cc
runtime-profile.cc
simple-logger.cc
symbols-util.cc
Expand Down Expand Up @@ -113,3 +114,6 @@ ADD_BE_TEST(symbols-util-test)
#ADD_BE_TEST(perf-counters-test)
ADD_BE_TEST(webserver-test)
ADD_BE_TEST(pretty-printer-test)
ADD_BE_TEST(redactor-config-parser-test)
ADD_BE_TEST(redactor-test)
ADD_BE_TEST(redactor-unconfigured-test)
150 changes: 150 additions & 0 deletions be/src/util/redactor-config-parser-test.cc
@@ -0,0 +1,150 @@
// Copyright 2015 Cloudera Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "redactor.cc"

#include <cstdio> // tmpnam

#include <gtest/gtest.h>

#include "redactor-test-utils.h"

namespace impala {

using std::string;

TEST(ParserTest, FileNotFound) {
string non_existant_path = tmpnam(NULL);
string error = SetRedactionRulesFromFile(non_existant_path);
ASSERT_ERROR_MESSAGE_CONTAINS(error, "No such file");
}

TEST(ParserTest, EmptyFile) {
TempRulesFile rules_file("");
string error = SetRedactionRulesFromFile(rules_file.name());
ASSERT_EQ("", error);
ASSERT_EQ(0, g_rules->size());
ASSERT_UNREDACTED("foo33");

rules_file.OverwriteContents(" \t\n ");
error = SetRedactionRulesFromFile(rules_file.name());
ASSERT_ERROR_MESSAGE_CONTAINS(error, "Text only contains white space");
}

TEST(ParserTest, DescriptionPropertyIgnored) {
TempRulesFile rules_file(
"{"
" \"version\": 1,"
" \"rules\": ["
" {\"search\": \"foo\", \"replace\": \"bar\", \"description\": \"def\"}"
" ]"
"}");
string error = SetRedactionRulesFromFile(rules_file.name());
ASSERT_EQ("", error);
ASSERT_REDACTED_EQ("foo", "bar");
}

TEST(ParserTest, InvalidJson) {
TempRulesFile rules_file(
"\"version\": 100,"
"\"rules\": ["
" {\"search\": \"[0-9]\", \"replace\": \"#\"}"
"]");
string error = SetRedactionRulesFromFile(rules_file.name());
ASSERT_ERROR_MESSAGE_CONTAINS(error, "either an object or array at root");

rules_file.OverwriteContents(
"[{"
" \"version\": 1.0,"
" \"rules\": ["
" {\"search\": \"[0-9]\", \"replace\": \"#\"}"
" ]"
"}]");
error = SetRedactionRulesFromFile(rules_file.name());
ASSERT_ERROR_MESSAGE_CONTAINS(error, "root element must be a JSON Object");

rules_file.OverwriteContents(
"{"
" \"version\": 1,"
" \"ules\": ["
" {\"search\": \"[0-9]\", \"replace\": \"#\"}"
" ]"
"}");
error = SetRedactionRulesFromFile(rules_file.name());
ASSERT_ERROR_MESSAGE_CONTAINS(error, "unexpected property 'ules'");

rules_file.OverwriteContents(
"{"
" \"version\": 1,"
" \"rules\": ["
" {\"earch\": \"[0-9]\", \"replace\": \"#\"}"
" ]"
"}");
error = SetRedactionRulesFromFile(rules_file.name());
ASSERT_ERROR_MESSAGE_CONTAINS(error, "unexpected property 'earch'");

rules_file.OverwriteContents("{!@#$}");
error = SetRedactionRulesFromFile(rules_file.name());
ASSERT_ERROR_MESSAGE_CONTAINS(error, "Name of an object member must be a string");
}

TEST(ParserTest, BadVersion) {
TempRulesFile rules_file(
"{"
" \"version\": 100,"
" \"rules\": ["
" {\"search\": \"[0-9]\", \"replace\": \"#\"}"
" ]"
"}");
string error = SetRedactionRulesFromFile(rules_file.name());
ASSERT_ERROR_MESSAGE_CONTAINS(error, "only version 1");

rules_file.OverwriteContents(
"{"
" \"version\": 1.0,"
" \"rules\": ["
" {\"search\": \"[0-9]\", \"replace\": \"#\"}"
" ]"
"}");
error = SetRedactionRulesFromFile(rules_file.name());
ASSERT_ERROR_MESSAGE_CONTAINS(error, "must be an integer");

rules_file.OverwriteContents(
"{"
" \"rules\": ["
" {\"search\": \"[0-9]\", \"replace\": \"#\"}"
" ]"
"}");
error = SetRedactionRulesFromFile(rules_file.name());
ASSERT_ERROR_MESSAGE_CONTAINS(error, "version is required");
}

TEST(ParserTest, BadRegex) {
TempRulesFile rules_file(
"{"
" \"version\": 1,"
" \"rules\": ["
" {\"search\": \"[0-9\", \"replace\": \"#\"}"
" ]"
"}");
string error = SetRedactionRulesFromFile(rules_file.name());
ASSERT_ERROR_MESSAGE_CONTAINS(error, "missing ]");
}

}

int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
115 changes: 115 additions & 0 deletions be/src/util/redactor-test-utils.h
@@ -0,0 +1,115 @@
// Copyright 2015 Cloudera Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <cstdlib> // rand
#include <cstdio> // file stuff
#include <pthread.h>
#include <time.h>

#include <gtest/gtest.h>

#ifndef IMPALA_REDACTOR_TEST_UTILS_H
#define IMPALA_REDACTOR_TEST_UTILS_H

namespace impala {

// Utility class for creating a redaction config file that will be automatically deleted
// upon test completion.
class TempRulesFile {
public:
TempRulesFile(const std::string& contents) {
name_ = tmpnam(NULL);
OverwriteContents(contents);
}

~TempRulesFile() { remove(name_); }

void OverwriteContents(const std::string& contents) {
FILE* handle = fopen(name_, "w");
if (handle == NULL) {
std::cout << "Error creating temp file; " << strerror(errno) << std::endl;
abort();
}
int status = fputs(contents.c_str(), handle);
if (status < 0) {
std::cout << "Error writing to temp file; " << strerror(errno) << std::endl;
abort();
}
status = fclose(handle);
if (status != 0) {
std::cout << "Error closing temp file; " << strerror(errno) << std::endl;
abort();
}
}

// Returns the absolute path to the file.
const char* name() const { return name_; }

private:
const char* name_;
};

unsigned int RandSeed() {
struct timespec now;
clock_gettime(CLOCK_REALTIME, &now);
return now.tv_nsec + pthread_self();
}

// Randomly fills the contents of 'string' up to the given length.
void RandomlyFillString(char* string, const int length) {
ASSERT_GT(length, 0);
unsigned int rand_seed = RandSeed();
int char_count = static_cast<int>('~') - static_cast<int>(' ') + 1;
for (int i = 0; i < length - 1; ++i) {
string[i] = ' ' + rand_r(&rand_seed) % char_count;
}
string[length - 1] = '\0';
}

void AssertErrorMessageContains(const std::string& message, const char* expected) {
ASSERT_TRUE(message.find(expected) != std::string::npos)
<< "Expected substring <<" << expected << ">> is not in <<" << message << ">>";
}

void AssertRedactedEquals(const char* message, const char* expected) {
std::string temp(message);
Redact(&temp);
ASSERT_EQ(expected, temp);
}

void AssertUnredacted(const char* message) {
AssertRedactedEquals(message, message);
}

// Putting these assertion utilities above into functions messes up failure messages
// such that failures appear to be coming from this file instead of from the file
// that called the utility assertion. Using a "SCOPED_TRACE" adds the location of the
// caller to the error message.
#define SCOPED_ASSERT(assertion) { \
SCOPED_TRACE(""); \
assertion; \
if (HasFatalFailure()) return; \
}

#define ASSERT_ERROR_MESSAGE_CONTAINS(error, expected) \
SCOPED_ASSERT(AssertErrorMessageContains(error, expected))

#define ASSERT_REDACTED_EQ(actual, expected) \
SCOPED_ASSERT(AssertRedactedEquals(actual, expected))

#define ASSERT_UNREDACTED(string) SCOPED_ASSERT(AssertUnredacted(string))

}

#endif

0 comments on commit 803a95c

Please sign in to comment.