Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HBASE-28436 Use connection url to specify the connection registry inf… #5770

Merged
merged 2 commits into from
Apr 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions hbase-client/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,11 @@
<artifactId>mockito-core</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-inline</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest-library</artifactId>
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -17,27 +17,77 @@
*/
package org.apache.hadoop.hbase.client;

import static org.apache.hadoop.hbase.HConstants.CLIENT_CONNECTION_REGISTRY_IMPL_CONF_KEY;

import java.io.IOException;
import java.net.URI;
import java.util.ServiceLoader;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.hbase.util.ReflectionUtils;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMap;

/**
* Factory class to get the instance of configured connection registry.
* The entry point for creating a {@link ConnectionRegistry}.
*/
@InterfaceAudience.Private
final class ConnectionRegistryFactory {

private static final Logger LOG = LoggerFactory.getLogger(ConnectionRegistryFactory.class);

private static final ImmutableMap<String, ConnectionRegistryURIFactory> CREATORS;
static {
ImmutableMap.Builder<String, ConnectionRegistryURIFactory> builder = ImmutableMap.builder();
for (ConnectionRegistryURIFactory factory : ServiceLoader
.load(ConnectionRegistryURIFactory.class)) {
builder.put(factory.getScheme().toLowerCase(), factory);
}
// throw IllegalArgumentException if there are duplicated keys
CREATORS = builder.buildOrThrow();
}

private ConnectionRegistryFactory() {
}

/** Returns The connection registry implementation to use. */
static ConnectionRegistry getRegistry(Configuration conf, User user) {
/**
* Returns the connection registry implementation to use, for the given connection url
* {@code uri}.
* <p/>
* We use {@link ServiceLoader} to load different implementations, and use the scheme of the given
* {@code uri} to select. And if there is no protocol specified, or we can not find a
* {@link ConnectionRegistryURIFactory} implementation for the given scheme, we will fallback to
* use the old way to create the {@link ConnectionRegistry}. Notice that, if fallback happens, the
* specified connection url {@code uri} will not take effect, we will load all the related
* configurations from the given Configuration instance {@code conf}
*/
static ConnectionRegistry create(URI uri, Configuration conf, User user) throws IOException {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI, I did a search on URI vs. URL in Java and I conclude that URI is the correct object type for our use-case.

if (StringUtils.isBlank(uri.getScheme())) {
LOG.warn("No scheme specified for {}, fallback to use old way", uri);
ndimiduk marked this conversation as resolved.
Show resolved Hide resolved
return create(conf, user);
}
ConnectionRegistryURIFactory creator = CREATORS.get(uri.getScheme().toLowerCase());
if (creator == null) {
LOG.warn("No creator registered for {}, fallback to use old way", uri);
return create(conf, user);
}
return creator.create(uri, conf, user);
}

/**
* Returns the connection registry implementation to use.
* <p/>
* This is used when we do not have a connection url, we will use the old way to load the
* connection registry, by checking the
* {@literal HConstants#CLIENT_CONNECTION_REGISTRY_IMPL_CONF_KEY} configuration.
*/
static ConnectionRegistry create(Configuration conf, User user) {
Class<? extends ConnectionRegistry> clazz =
conf.getClass(CLIENT_CONNECTION_REGISTRY_IMPL_CONF_KEY, RpcConnectionRegistry.class,
ConnectionRegistry.class);
conf.getClass(HConstants.CLIENT_CONNECTION_REGISTRY_IMPL_CONF_KEY,
RpcConnectionRegistry.class, ConnectionRegistry.class);
return ReflectionUtils.newInstance(clazz, conf, user);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.client;

import java.io.IOException;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.security.User;
import org.apache.yetus.audience.InterfaceAudience;

/**
* For creating different {@link ConnectionRegistry} implementation.
*/
@InterfaceAudience.Private
public interface ConnectionRegistryURIFactory {

/**
* Instantiate the {@link ConnectionRegistry} using the given parameters.
*/
ConnectionRegistry create(URI uri, Configuration conf, User user) throws IOException;

/**
* The scheme for this implementation. Used to register this URI factory to the
* {@link ConnectionRegistryFactory}.
*/
String getScheme();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.client;

import java.io.IOException;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.security.User;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Connection registry creator implementation for creating {@link RpcConnectionRegistry}.
*/
@InterfaceAudience.Private
public class RpcConnectionRegistryCreator implements ConnectionRegistryURIFactory {

private static final Logger LOG = LoggerFactory.getLogger(RpcConnectionRegistryCreator.class);

@Override
public ConnectionRegistry create(URI uri, Configuration conf, User user) throws IOException {
assert getScheme().equals(uri.getScheme());
LOG.debug("connect to hbase cluster with rpc bootstrap servers='{}'", uri.getAuthority());
Configuration c = new Configuration(conf);
c.set(RpcConnectionRegistry.BOOTSTRAP_NODES, uri.getAuthority());
return new RpcConnectionRegistry(c, user);
}

@Override
public String getScheme() {
return "hbase+rpc";
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This part gets messy. I think the convention that has arisen around URI's with a '+' in their scheme section is used to indicate a protocol+transport. What does that mean for us? What do we even call our current RPC implementation of "Hadoop RPC plus protobuf cell back-channel", just "HBase RPC", so hbaserpc, which maybe is short for hbaserpc+tcp? Say down the road we support an hbase rpc over HTTP or over GRPC (which itself supports http/2 and http/3 as transports, as well as grpc-web) or over UDP, what then? What do we call our existing Thrift ("hbase+thrift" ?) and REST gateways ("rest+http(s)" ?)

By contrast, the zookeeper client isn't a protocol at all. It's just a location of an expected service type. So then we can call it just "zookeeper" or "zk" for short.

I'm not saying we expect to have all these transport mechanisms, but we should think through what we want this part of our public API to look like and give precise meaning to the scheme section of the URI.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just followed the similar way with what we have in phoenix.

https://phoenix.apache.org/classpath_and_url.html

Agree that we should take care of the schema part. Maybe we could start a discussion thread again on the mailing list?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO having the first part be just hbase to denote that your talking to hbase makes the most sense. Then the second part can be whatever. Based on what technology we have today and for years past and the foreseeable future, zk and rpc make sense. In this case, rpc doesn't even mean "talk to hbase over rpc" it means "use RpcConnectionRegistry". I could see that being confusing, we could consider calling it bootstrap or something.

The suffix tells our code whether to parse the authority as a list of zk servers or as a list of bootstrap nodes. That unrelated to grpc vs custom rpc, etc.

If someone wanted to create a grpc or http2 or w/e protocol, they'd still be talking to hbase and they'd still need someway to bootstrap the connection. So I bet the scheme would stay the same and which communication protocol to use would be a query parameter.

We could argue what's most to spec but since the +suffix stuff is largely not spec'd I think we should consider user intuitiveness and how our code uses it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose, the way to think about this connection URI is that is it just for locating and initiating a connection to a cluster. Even if we were to pursue a more complete specification as I've advocated, a client still has non-trivial reliance on other configuration parameters. It's unrealistic to expect that we could roll up those details in the scheme portion of the URI. You've convinced me.

Given the menu of options that we have available today, what you have implemented here seems fine. Can we add "hbase" without the "+..." part and let that be the default bootstrap mechanism for the current hbase version? In 2.6 that would be an alias for hbase+zk, for 3.0 that would be an alias for hbase+rpc.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the scheme part should contain the 'zk' or 'rpc' information, so we could know how to decode the other part of the URI.

And for other parameters, we could add them as the parameter of the URI in the future.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or maybe another choice is to introduce a parameter like 'registry'. So the URI could be

hbase://zk1:2181,zk2:2181,zk3:2181/xxx?registry=zk

or

hbase://rs1:16010/?registry=rpc

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Personally I don't think that option works well because the point of the scheme is to tell the code how to parse the authority. For rpc vs zk, we need different parsing. It seems odd (even if possible) to put that in the query param. To me we could put things in the query param that affect anything else about the connection other than how to parse the uri

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IUC the standard Hbase client libraries don't support any other procol than HBaseRPC.
IF they were to ever support say REST and/or Thrift, then we could still add new variant for them as needed.
However, the current API probably has a lot of built-in assumptions of using RPC, and even if that were to be solved, achieving and maintaining parity for the alternate protocols would be monumental job, and we can make up new protocol variants for those when needed.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One other issue we run into with Phoenix quite bit is that the cluster configurations still have to match in several aspects, like timeouts, TLS/SASL settings, etc, otherwise the client either can not even connect, or experiences errors due timeout / buffer size etc mismathces.

I think that some of that may also be a problem when configuring replication.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One other issue we run into with Phoenix quite bit is that the cluster configurations still have to match in several aspects, like timeouts, TLS/SASL settings, etc, otherwise the client either can not even connect, or experiences errors due timeout / buffer size etc mismathces.

I think that some of that may also be a problem when configuring replication.

For replication in hbase, there is configuration map in peer configuration, so we could add configurations specific for connecting to the peer cluster.

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.client;

import java.io.IOException;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.security.User;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Connection registry creator implementation for creating {@link ZKConnectionRegistry}.
*/
@InterfaceAudience.Private
public class ZKConnectionRegistryCreator implements ConnectionRegistryURIFactory {

private static final Logger LOG = LoggerFactory.getLogger(ZKConnectionRegistryCreator.class);

@Override
public ConnectionRegistry create(URI uri, Configuration conf, User user) throws IOException {
assert getScheme().equals(uri.getScheme());
LOG.debug("connect to hbase cluster with zk quorum='{}' and parent='{}'", uri.getAuthority(),
uri.getPath());
Configuration c = new Configuration(conf);
c.set(HConstants.CLIENT_ZOOKEEPER_QUORUM, uri.getAuthority());
c.set(HConstants.ZOOKEEPER_ZNODE_PARENT, uri.getPath());
return new ZKConnectionRegistry(c, user);
}

@Override
public String getScheme() {
return "hbase+zk";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
org.apache.hadoop.hbase.client.RpcConnectionRegistryCreator
org.apache.hadoop.hbase.client.ZKConnectionRegistryCreator