Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

FLUME-286: DFO mode does not detect network failure

  • Loading branch information...
commit d9b316c534f0f85b4942ede8d764a5453123e65f 1 parent ccfaf6b
Jonathan Hsieh jmhsieh authored
7 RELEASENOTES
View
@@ -15,6 +15,13 @@ configurations or node maps written by masters from <0.9.2
installations. There is no upgrade path yet to preserve both node maps
and configurations, but one is planned for the 0.9.2 release.
+The bug that caused FLUME-286 has been fixed for Thrift RPCs but not
+for Avro-based RPC mechanisms. Previous to the fix, the Thrift
+version would not recover when a downstream RPC server had a network
+partion such as a wire cut or power down. Unlike killing a server,
+these situations provided no failure feedback. The Avro version
+currently does not properly detect or recover from these situations so
+the DFO mode can lose data. This is reported as issue FLUME-313.
Flume 0.9.1 Update 1 (CDH3b3) Release Notes
===========================================
13 conf/flume-conf.xml
View
@@ -290,5 +290,18 @@ configuration values placed in flume-site.xml. -->
<value>/tmp/flume-${user.name}-zk</value>
<description>The base directory in which the ZBCS stores data.</description>
</property>
+
+
+ <!-- ================================================== -->
+ <!-- Thrift RPC settings ============================== -->
+ <!-- ================================================== -->
+
+ <property>
+ <name>flume.thrift.socket.timeout.ms</name>
+ <value>10000</value>
+ <description>Milliseconds with no transmissions before thrift
+ client times out a connection</description>
+ </property>
+
</configuration>
4 src/java/com/cloudera/flume/agent/ThriftMasterRPC.java
View
@@ -36,6 +36,7 @@
import com.cloudera.flume.conf.thrift.ThriftFlumeClientServer;
import com.cloudera.flume.conf.FlumeConfigData;
+import com.cloudera.flume.conf.FlumeConfiguration;
import com.cloudera.flume.conf.thrift.ThriftFlumeClientServer.Client;
import com.cloudera.flume.handlers.endtoend.AckListener;
import com.cloudera.flume.handlers.endtoend.CollectorAckListener;
@@ -65,7 +66,8 @@
ThriftMasterRPC(String masterHostname, int masterPort) throws IOException {
Preconditions.checkState(masterClient == null,
"client already initialized -- double init not allowed");
- TTransport masterTransport = new TSocket(masterHostname, masterPort);
+ int timeout = FlumeConfiguration.get().getThriftSocketTimeoutMs();
+ TTransport masterTransport = new TSocket(masterHostname, masterPort, timeout);
TProtocol protocol = new TBinaryProtocol(masterTransport);
try {
masterTransport.open();
20 src/java/com/cloudera/flume/conf/FlumeConfiguration.java
View
@@ -52,8 +52,11 @@
/**
* Returns the 'FLUME_HOME' location. Taken in order of precedence:
+ *
* - Java system property 'flume.home'
+ *
* - $FLUME_HOME in the environment.
+ *
* - null if neither of these are set.
*/
public static String getFlumeHome() {
@@ -71,9 +74,13 @@ public static String getFlumeHome() {
/**
* Returns the 'FLUME_CONF_DIR' location. Taken in order of precedence:
+ *
* - Java system property 'flume.conf.dir'
+ *
* - $FLUME_CONF_DIR in the environment
+ *
* - getFlumeHome()/conf
+ *
* - ./conf
*/
public static String getFlumeConfDir() {
@@ -168,6 +175,7 @@ protected FlumeConfiguration(boolean loadDefaults) {
public static final String POLLER_QUEUESIZE = "flume.poller.queuesize";
public static final String THRIFT_QUEUESIZE = "flume.thrift.queuesize";
public static final String THRIFT_CLOSE_MAX_SLEEP = "flume.thrift.close.maxsleep";
+ public static final String THRIFT_SOCKET_TIMEOUT_MS = "flume.thrift.socket.timeout.ms";
public static final String INSISTENTOPEN_INIT_BACKOFF = "flume.inisistentOpen.init.backoff";
public static final String HISTORY_DEFAULTPERIOD = "flume.countHistory.period";
public static final String HISTORY_MAXLENGTH = "flume.history.maxlength";
@@ -550,6 +558,10 @@ public int getThriftQueueSize() {
return getInt(THRIFT_QUEUESIZE, 1000);
}
+ public int getThriftSocketTimeoutMs() {
+ return getInt(THRIFT_SOCKET_TIMEOUT_MS, 10000);
+ }
+
/**
* Initial backoff in mills after a failed open attempt in an insistentOpen
* decorator
@@ -702,7 +714,6 @@ public int getReportServerPort() {
return getInt(REPORT_SERVER_PORT, DEFAULT_REPORT_SERVER_PORT);
}
-
/**
* This returns the type of RPC mechanism (Thrift or Avro) chosen for the
* FlumeReportServer.
@@ -716,11 +727,12 @@ public String getReportServerRPC() {
}
}
// defaulting to Thrift with a polite warning
- LOG.warn("flume.report.server.rpc.type incorrectly defined, should be either"
- + " \"THRIFT\" or \"AVRO\". Defaulting to \"THRIFT\"");
+ LOG.warn("flume.report.server.rpc.type incorrectly defined, "
+ + "should be either \"THRIFT\" or \"AVRO\". "
+ + "Defaulting to \"THRIFT\"");
return RPC_TYPE_THRIFT;
}
-
+
/**
* If MASTER_HEARTBEAT_PORT is set, we use that as our heartbeat port. If not,
* we look at the list of server:port pairs in MASTER_HEARTBEAT_SERVERS, in
5 src/java/com/cloudera/flume/handlers/thrift/ThriftEventSink.java
View
@@ -94,13 +94,14 @@ public void close() throws IOException {
public void open() throws IOException {
try {
+ int timeout = FlumeConfiguration.get().getThriftSocketTimeoutMs();
if (nonblocking) {
// non blocking must use "Framed transport"
- transport = new TSocket(host, port);
+ transport = new TSocket(host, port, timeout);
stats = new TStatsTransport(transport);
transport = new TFramedTransport(stats);
} else {
- transport = new TSocket(host, port);
+ transport = new TSocket(host, port, timeout);
stats = new TStatsTransport(transport);
transport = stats;
}
Please sign in to comment.
Something went wrong with that request. Please try again.