Conversation
src/brpc/channel.h
Outdated
| // Maximum: INT_MAX | ||
| int max_retry; | ||
|
|
||
| // When the error rate of an endpoint is too high, deactivate the node. |
There was a problem hiding this comment.
deactivate -> isolate, an endpoint -> a server node
src/brpc/channel.h
Outdated
| int max_retry; | ||
|
|
||
| // When the error rate of an endpoint is too high, deactivate the node. | ||
| // Note that this deactive is GLOBAL, and the endpoint will become |
There was a problem hiding this comment.
deactive -> isolation, endpoint -> node.
The node will become unavailable for all channels running in this process during the isolation.
src/brpc/socket.h
Outdated
| __attribute__ ((__format__ (__printf__, 3, 4))); | ||
| static int SetFailed(SocketId id); | ||
|
|
||
| void FeedbackCircuitBreaker(int error_code, int64_t latency_us) { |
There was a problem hiding this comment.
假如把CircuitBreaker放在SharedPart里的话,由于SharedPart的定义在cpp文件里,在Controller::Call::OnComplete里就拿不到CircuitBreaker了,会提示编译错误说SharedPart是incompleted class。
这样的话反而只能封装接口了
src/brpc/socket.h
Outdated
| butil::Mutex _stream_mutex; | ||
| std::set<StreamId> *_stream_set; | ||
|
|
||
| CircuitBreaker _circuit_breaker; |
There was a problem hiding this comment.
放的位置不对,应该在SharedPart里面。这是为了让连接池模式下(Controller::Call::peer_id代表的)主链接和(Controller::Call::sending_sock代表的)实际连接方便地共享状态。注意:其他链接方式当然也会用到SharedPart,只是主链接就是sending_sock
src/brpc/circuit_breaker.h
Outdated
| return true; | ||
| } | ||
|
|
||
| butil::DoublyBufferedData<ErrRecorderList> _recorders; |
There was a problem hiding this comment.
这个类有点over-design了,DBD和ErrRecorderList都是不必要的。用户就是实现一个接口OnCallEnd,这个对象会new出来设置在Socket.SharedPart中,健康检查前(Socket.WaitAndReset)中被delete掉。长短两个窗口就是inline地写在代码中(当然一些函数可以共用),而不需要用vector管理。
There was a problem hiding this comment.
”这个对象会new出来设置在Socket.SharedPart中” 是指CircuitBreaker对象需要new出来放在SharedPart中吗,我觉得一直放在SharedPart里,然后在WaiAndReset里调一下CircuitBreaker::Reset就可以了吧
src/brpc/circuit_breaker.h
Outdated
| #ifndef BRPC_CIRCUIT_BREAKER_H | ||
| #define BRPC_CIRCUIT_BREAKER_H | ||
|
|
||
| #include "butil/containers/doubly_buffered_data.h" |
25504df to
7f80dc4
Compare
src/brpc/controller.h
Outdated
| // Some of them are copied from `Channel' which might be destroyed | ||
| // after CallMethod. | ||
| int _max_retry; | ||
| bool _enable_circuit_breaker; |
There was a problem hiding this comment.
建议加到Controller::FLAGS_xxx里去。
src/brpc/socket.cpp
Outdated
| void Socket::FeedbackCircuitBreaker(int error_code, int64_t latency_us) { | ||
| if (!GetOrNewSharedPart()->circuit_breaker.OnCallEnd(error_code, latency_us)) { | ||
| LOG(ERROR) | ||
| << "Socket[" << *this << "] deactivted by circuit breaker"; |
src/brpc/controller.cpp
Outdated
| } | ||
| if (enable_circuit_breaker) { | ||
| SocketUniquePtr sock; | ||
| if (Socket::Address(peer_id, &sock) == 0) { |
There was a problem hiding this comment.
上面就有sending_sock,不用address了。现在放sharedpart里,sending_sock是和peer_id对应的(mainsock)共享的,所以就能访问到。注意sending_sock会被置空,这块代码位置可以调整一下。
src/brpc/circuit_breaker.cpp
Outdated
|
|
||
| int64_t CircuitBreaker::EmaErrorRecorder::UpdateLatency(int64_t latency) { | ||
| while (true) { | ||
| int64_t ema_latency = _ema_latency.load(butil::memory_order_relaxed); |
There was a problem hiding this comment.
load放循环外。失败的case会更新ema_latency的。另外这里写成do while更好。
src/brpc/circuit_breaker.cpp
Outdated
| //Ordinary response | ||
| while (true) { | ||
| int64_t ema_error_cost = | ||
| _ema_error_cost.load(butil::memory_order_relaxed); |
src/brpc/circuit_breaker.cpp
Outdated
| int sample_count = _sample_count.fetch_add(1, butil::memory_order_relaxed); | ||
| bool init_completed = _init_completed.load(butil::memory_order_acquire); | ||
| if (!init_completed && sample_count >= _window_size) { | ||
| _init_completed.store(true, butil::memory_order_release); |
There was a problem hiding this comment.
没看出这个init_completed意义在哪,每次判下_sample_count和_window_size关系不就行了?
src/brpc/circuit_breaker.cpp
Outdated
| DEFINE_int32(circuit_breaker_min_error_cost_us, 100, | ||
| "The minimum error_cost, when the ema of error cost is less than this " | ||
| "value, it will be set to zero."); | ||
| DEFINE_int32(circuit_breaker_max_failed_latency_mutliple, 2, |
There was a problem hiding this comment.
mutliple -> multiple。这主要是针对超时的把,2倍够么?
655fbb0 to
6e5a863
Compare
373297e to
2ae2559
Compare
src/brpc/socket.cpp
Outdated
|
|
||
| void Socket::FeedbackCircuitBreaker(int error_code, int64_t latency_us) { | ||
| if (!GetOrNewSharedPart()->circuit_breaker.OnCallEnd(error_code, latency_us)) { | ||
| LOG(ERROR) << "Socket[" << *this << "] deactivted by circuit breaker"; |
There was a problem hiding this comment.
deactivated 改为 isolated 以保持用词统一
src/brpc/controller.h
Outdated
| SocketId peer_id; // main server id | ||
| int64_t begin_time_us; // sent real time. | ||
| SocketId peer_id; // main server id | ||
| int64_t begin_time_us; // sent real time. |
src/brpc/circuit_breaker.cpp
Outdated
| "the average latency of the success requests."); | ||
| DEFINE_int32(circuit_breaker_min_isolation_duration_ms, 100, | ||
| "Minimum isolation duration in milliseconds"); | ||
| DEFINE_int32(circuit_breaker_max_isolation_duration_ms, 300000, |
src/brpc/circuit_breaker.cpp
Outdated
| if (now_time_ms - _last_reset_time_ms < max_isolation_duration_ms) { | ||
| isolation_duration_ms *= 2; | ||
| isolation_duration_ms = | ||
| std::min(isolation_duration_ms, max_isolation_duration_ms); |
src/brpc/circuit_breaker.cpp
Outdated
| DEFINE_int32(circuit_breaker_min_error_cost_us, 500, | ||
| "The minimum error_cost, when the ema of error cost is less than this " | ||
| "value, it will be set to zero."); | ||
| DEFINE_int32(circuit_breaker_max_failed_latency_mutilple, 2, |
There was a problem hiding this comment.
mutilple -> mutiple,全部搜一下吧
src/brpc/socket.h
Outdated
| #include "brpc/options.pb.h" // ConnectionType | ||
| #include "brpc/socket_id.h" // SocketId | ||
| #include "brpc/socket_message.h" // SocketMessagePtr | ||
| #include "brpc/circuit_breaker.h" // CircuitBreaker |
src/brpc/socket.cpp
Outdated
| void Socket::FeedbackCircuitBreaker(int error_code, int64_t latency_us) { | ||
| if (!GetOrNewSharedPart()->circuit_breaker.OnCallEnd(error_code, latency_us)) { | ||
| LOG(ERROR) << "Socket[" << *this << "] deactivted by circuit breaker"; | ||
| SetFailed(); |
…health check will not be started.
b877c0d to
036a8a4
Compare
增加了CircuitBreaker:
还存在的问题: