Skip to content

Commit

Permalink
[location_awareness] Assign locations to registering tablet servers
Browse files Browse the repository at this point in the history
This patch introduces a location field maintained by the master for
each tablet server. The master determines the value of this field
whenever a tablet server registers. It does this by using an
external command, specified with the flag --location_mapping_cmd,
to produce a location from the hostname or IP address of the tablet
server.

To help with ad hoc testing, I also added the location field to the
/tablet-servers web page, and fixed a small oversight where the
table of tablet servers wasn't sorted, so its order changed depending
on the order tablet servers first registered in.

I also altered the DATA_FILES CMake function so that data files copied
to the build directory or to slaves by dist-test are executable as well
as readable. This was necessary for the new TSDescriptor test which
tests location assignment.

Change-Id: I5eb98823ab7b3b8141b8630196c29c1ebf8e6878
Reviewed-on: http://gerrit.cloudera.org:8080/11115
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin <aserbin@cloudera.com>
  • Loading branch information
wdberkeley committed Sep 4, 2018
1 parent a58867c commit dcc39d5
Show file tree
Hide file tree
Showing 9 changed files with 342 additions and 12 deletions.
6 changes: 3 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -843,10 +843,10 @@ function(ADD_KUDU_TEST REL_TEST_NAME)
file(REMOVE ${DST_DIR}/${DATA_FILE})
endif()
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/${DATA_FILE}
# Copy with read-only permissions since tests should not
# modify the data files in place.
# Copy with read and execute permissions, since tests should not modify
# the data files in place, but data files may be scripts used by tests.
DIRECTORY_PERMISSIONS OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE
FILE_PERMISSIONS OWNER_READ GROUP_READ
FILE_PERMISSIONS OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE
DESTINATION ${DST_DIR})
endforeach()

Expand Down
1 change: 1 addition & 0 deletions src/kudu/master/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ ADD_KUDU_TEST(hms_notification_log_listener-test)
ADD_KUDU_TEST(master-test RESOURCE_LOCK "master-web-port")
ADD_KUDU_TEST(mini_master-test RESOURCE_LOCK "master-web-port")
ADD_KUDU_TEST(sys_catalog-test RESOURCE_LOCK "master-web-port")
ADD_KUDU_TEST(ts_descriptor-test DATA_FILES testdata/first_argument.sh)

# Actual master executable
add_executable(kudu-master master_main.cc)
Expand Down
37 changes: 37 additions & 0 deletions src/kudu/master/master-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ DECLARE_bool(raft_prepare_replacement_before_eviction);
DECLARE_double(sys_catalog_fail_during_write);
DECLARE_int32(diagnostics_log_stack_traces_interval_ms);
DECLARE_int32(master_inject_latency_on_tablet_lookups_ms);
DECLARE_string(location_mapping_cmd);

namespace kudu {
namespace master {
Expand Down Expand Up @@ -442,6 +443,42 @@ TEST_F(MasterTest, TestRegisterAndHeartbeat) {
"they must be run with the same scheme", kTsUUID);
ASSERT_STR_MATCHES(s.ToString(), msg);
}

// Ensure that the TS doesn't register if location mapping fails.
{
// Set a command that always fails.
FLAGS_location_mapping_cmd = "false";

// Set a new UUID so registration is for the first time.
auto new_common = common;
new_common.mutable_ts_instance()->set_permanent_uuid("lmc-fail-ts");

TSHeartbeatRequestPB hb_req;
TSHeartbeatResponsePB hb_resp;
RpcController rpc;
hb_req.mutable_common()->CopyFrom(new_common);
hb_req.mutable_registration()->CopyFrom(fake_reg);
hb_req.mutable_replica_management_info()->CopyFrom(rmi);

// Registration should fail.
Status s = proxy_->TSHeartbeat(hb_req, &hb_resp, &rpc);
ASSERT_TRUE(s.IsRemoteError());
ASSERT_STR_CONTAINS(s.ToString(), "failed to run location mapping command");

// Make sure the tablet server isn't returned to clients.
ListTabletServersRequestPB list_ts_req;
ListTabletServersResponsePB list_ts_resp;
rpc.Reset();
ASSERT_OK(proxy_->ListTabletServers(list_ts_req, &list_ts_resp, &rpc));

LOG(INFO) << SecureDebugString(list_ts_resp);
ASSERT_FALSE(list_ts_resp.has_error());
ASSERT_EQ(1, list_ts_resp.servers_size());
ASSERT_EQ("my-ts-uuid", list_ts_resp.servers(0).instance_id().permanent_uuid());

// Reset the flag.
FLAGS_location_mapping_cmd = "";
}
}

Status MasterTest::CreateTable(const string& table_name,
Expand Down
18 changes: 14 additions & 4 deletions src/kudu/master/master_path_handlers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <vector>

#include <boost/bind.hpp> // IWYU pragma: keep
#include <boost/optional/optional.hpp>
#include <glog/logging.h>

#include "kudu/common/common.pb.h"
Expand Down Expand Up @@ -90,9 +91,17 @@ MasterPathHandlers::~MasterPathHandlers() {
void MasterPathHandlers::HandleTabletServers(const Webserver::WebRequest& /*req*/,
Webserver::WebResponse* resp) {
EasyJson* output = resp->output;
vector<std::shared_ptr<TSDescriptor>> descs;
vector<shared_ptr<TSDescriptor>> descs;
master_->ts_manager()->GetAllDescriptors(&descs);

// Sort by UUID so the order remains consistent betweeen restarts.
std::sort(descs.begin(), descs.end(),
[](const shared_ptr<TSDescriptor>& left,
const shared_ptr<TSDescriptor>& right) {
DCHECK(left && right);
return left->permanent_uuid() < right->permanent_uuid();
});

(*output)["num_ts"] = std::to_string(descs.size());

// In mustache, when conditionally rendering a section of the template based
Expand All @@ -106,7 +115,7 @@ void MasterPathHandlers::HandleTabletServers(const Webserver::WebRequest& /*req*
output->Set("live_tservers", EasyJson::kArray);
output->Set("dead_tservers", EasyJson::kArray);
map<string, array<int, 2>> version_counts;
for (const std::shared_ptr<TSDescriptor>& desc : descs) {
for (const auto& desc : descs) {
string ts_key = desc->PresumedDead() ? "dead_tservers" : "live_tservers";
EasyJson ts_json = (*output)[ts_key].PushBack(EasyJson::kObject);

Expand All @@ -121,6 +130,7 @@ void MasterPathHandlers::HandleTabletServers(const Webserver::WebRequest& /*req*
}
ts_json["time_since_hb"] = StringPrintf("%.1fs", desc->TimeSinceHeartbeat().ToSeconds());
ts_json["registration"] = pb_util::SecureShortDebugString(reg);
ts_json["location"] = desc->location().get_value_or("<none>");
version_counts[reg.software_version()][desc->PresumedDead() ? 1 : 0]++;
has_no_live_ts &= desc->PresumedDead();
has_no_dead_ts &= !desc->PresumedDead();
Expand Down Expand Up @@ -599,9 +609,9 @@ void MasterPathHandlers::HandleDumpEntities(const Webserver::WebRequest& /*req*/

jw.String("tablet_servers");
jw.StartArray();
vector<std::shared_ptr<TSDescriptor> > descs;
vector<shared_ptr<TSDescriptor> > descs;
master_->ts_manager()->GetAllDescriptors(&descs);
for (const std::shared_ptr<TSDescriptor>& desc : descs) {
for (const auto& desc : descs) {
jw.StartObject();

jw.String("uuid");
Expand Down
20 changes: 20 additions & 0 deletions src/kudu/master/testdata/first_argument.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# A script that prints the first argument and ignores all the others.
echo "$1"
135 changes: 135 additions & 0 deletions src/kudu/master/ts_descriptor-test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "kudu/master/ts_descriptor.h"

#include <memory>
#include <string>
#include <vector>

#include <boost/optional/optional.hpp>
#include <gflags/gflags_declare.h>
#include <glog/logging.h>
#include <gtest/gtest.h>

#include "kudu/common/common.pb.h"
#include "kudu/common/wire_protocol.pb.h"
#include "kudu/gutil/strings/substitute.h"
#include "kudu/util/path_util.h"
#include "kudu/util/status.h"
#include "kudu/util/test_macros.h"
#include "kudu/util/test_util.h"

DECLARE_string(location_mapping_cmd);

using std::shared_ptr;
using std::string;
using std::vector;
using strings::Substitute;

namespace kudu {
namespace master {

// Configures 'instance' and 'registration' with basic info that can be
// used to register a tablet server.
void SetupBasicRegistrationInfo(const string& uuid,
NodeInstancePB* instance,
ServerRegistrationPB* registration) {
CHECK_NOTNULL(instance);
CHECK_NOTNULL(registration);

instance->set_permanent_uuid(uuid);
instance->set_instance_seqno(0);

registration->clear_rpc_addresses();
for (const auto port : { 12345, 67890 }) {
auto* rpc_hostport = registration->add_rpc_addresses();
rpc_hostport->set_host("localhost");
rpc_hostport->set_port(port);
}
registration->clear_http_addresses();
auto* http_hostport = registration->add_http_addresses();
http_hostport->set_host("localhost");
http_hostport->set_port(54321);
registration->set_software_version("1.0.0");
registration->set_https_enabled(false);
}

TEST(TSDescriptorTest, TestRegistration) {
const string uuid = "test";
NodeInstancePB instance;
ServerRegistrationPB registration;
SetupBasicRegistrationInfo(uuid, &instance, &registration);
shared_ptr<TSDescriptor> desc;
ASSERT_OK(TSDescriptor::RegisterNew(instance, registration, &desc));

// Spot check some fields and the ToString value.
ASSERT_EQ(uuid, desc->permanent_uuid());
ASSERT_EQ(0, desc->latest_seqno());
// There is no location as --location_mapping_cmd is unset by default.
ASSERT_EQ(boost::none, desc->location());
ASSERT_EQ("test (localhost:12345)", desc->ToString());
}

TEST(TSDescriptorTest, TestLocationCmd) {
const string kLocationCmdPath = JoinPathSegments(GetTestExecutableDirectory(),
"testdata/first_argument.sh");
// A happy case, using all allowed special characters.
const string location = "/foo-bar0/BAAZ._9-quux";
FLAGS_location_mapping_cmd = Substitute("$0 $1", kLocationCmdPath, location);

const string uuid = "test";
NodeInstancePB instance;
ServerRegistrationPB registration;
SetupBasicRegistrationInfo(uuid, &instance, &registration);
shared_ptr<TSDescriptor> desc;
ASSERT_OK(TSDescriptor::RegisterNew(instance, registration, &desc));

ASSERT_EQ(location, desc->location());

// Bad cases where the script returns locations with disallowed characters or
// in the wrong format.
const vector<string> bad_locations = {
"\"\"", // Empty (doesn't begin with /).
"foo", // Doesn't begin with /.
"/foo$", // Contains the illegal character '$'.
};
for (const auto& bad_location : bad_locations) {
FLAGS_location_mapping_cmd = Substitute("$0 $1", kLocationCmdPath, bad_location);
ASSERT_TRUE(desc->Register(instance, registration).IsRuntimeError());
}

// Bad cases where the script is invalid.
const vector<string> bad_cmds = {
// No command provided.
" ",
// Command not found.
"notfound.sh",
// Command returns no output.
"true",
// Command fails.
"false",
// Command returns too many locations (i.e. contains illegal ' ' character).
Substitute("echo $0 $1", "/foo", "/bar"),
};
for (const auto& bad_cmd : bad_cmds) {
FLAGS_location_mapping_cmd = bad_cmd;
ASSERT_TRUE(desc->Register(instance, registration).IsRuntimeError());
}
}
} // namespace master
} // namespace kudu
Loading

0 comments on commit dcc39d5

Please sign in to comment.