From 9d3fedd640ccfc94c96194382a1981f9c9307582 Mon Sep 17 00:00:00 2001 From: Alex Markuze Date: Thu, 22 Sep 2022 23:06:27 +0300 Subject: [PATCH 1/2] [HApropxy] new haproxy config and resources --- terraform/helm/aptos-node/files/haproxy.cfg | 164 +++++++++++++++++--- terraform/helm/aptos-node/values.yaml | 15 +- 2 files changed, 149 insertions(+), 30 deletions(-) diff --git a/terraform/helm/aptos-node/files/haproxy.cfg b/terraform/helm/aptos-node/files/haproxy.cfg index de9bf905ca269..c48d4814ab78b 100644 --- a/terraform/helm/aptos-node/files/haproxy.cfg +++ b/terraform/helm/aptos-node/files/haproxy.cfg @@ -1,47 +1,161 @@ global log stdout len 10240 format raw local0 - maxconn 500000 - nbthread 16 + + # Config manual: https://cbonte.github.io/haproxy-dconv/2.5/configuration.html + # magic values : terraform/helm/aptos-node/values.yaml + + maxconn 1024 + # This limits the whole HA Proxy impacting both validators and other frontends + # maxconnrate 128 + nbthread 4 + + #4MB for client facing sndbuf/rcvbuf. -- 100Mb/s with 300 mili latency (e.g., us-asia) + #tune.sndbuf.client {{ $.Values.haproxy.limits.validator.tcpBufSize }} + tune.rcvbuf.client {{ $.Values.haproxy.limits.validator.tcpBufSize }} + user nobody +## TCP port defaults defaults log global - option tcplog - maxconn 500000 - timeout queue 1s - timeout connect 10s - timeout server 60s - timeout client 60s - timeout client-fin 5s - -frontend validator + mode tcp + #option tcplog + option dontlog-normal + log-format "%ci:%cp - %sp[%rt] [%t] %ft %Tw/%Tc/%Tt %B [%ts] %ac/%fc/%bc/%sc/%rc %sq/%bq" + maxconn 1024 #Validator network mesh + FN x2 + retries 3 + timeout queue 5s #limits num of concurrent connections. Not clear if t/o connect is needed. #https://www.papertrail.com/solution/tips/haproxy-logging-how-to-tune-timeouts-for-performance/ + timeout connect 5s + # enough for 1 successfull + 5 unsuccessfull HB(10 sec interval) + 20 sec timeout + timeout server 80s + timeout client 80s + + timeout client-fin 3s #How long to hold an interrupted client connection. + timeout server-fin 1s + +frontend fe-{{ include "aptos-validator.fullname" $ }}-validator bind :6180 - default_backend validator + default_backend {{ include "aptos-validator.fullname" $ }}-validator # Deny requests from blocked IPs - tcp-request connection reject if { src -n -f /usr/local/etc/haproxy/blocked.ips } + tcp-request connection silent-drop if { src -n -f /usr/local/etc/haproxy/blocked.ips } + + acl ip_high_conn_rate sc0_conn_rate gt {{ $.Values.haproxy.limits.validator.connectionsPerIPPerMin }} + + stick-table type ip size 10m expire 30m store gpc0,gpc1,conn_rate(1m),bytes_out_rate(10s),bytes_out_cnt ##about 500MB of memory + tcp-request connection track-sc0 src #update table with src ip as key, store in sc0 + + #We Count rate-limit manualy -- Will be more CPU intensieve but will allow whitelists to enter and up to rateLimitSession non blacklisted IPs. + tcp-request connection track-sc1 int(1) table CONN_RATE + + #tcp-request connection sc-set-gpt0(0) int(...) if ip_high_conn_rate is better but dies with: + #parsing [/usr/local/etc/haproxy/haproxy.cfg:53] : internal error, unexpected rule->from=0, please report this bug! + #<1> Mark Blacklist + tcp-request connection sc-inc-gpc0(0) if ip_high_conn_rate + + #This connection is silently dropped no reason to count it for rateLimitSession + tcp-request connection sc-inc-gpc1(1) unless { sc0_get_gpc0() ge 1 } + + # an IP that was blacklisted due to to many unsucsessfull tcp attempts + #-1- Enforece Blacklist + tcp-request connection silent-drop if { sc0_get_gpc0() ge 1 } - # Limit to N TCP connections per minute per source IP - stick-table type ip size 500k expire 1m store gpc0_rate(1m) - tcp-request connection track-sc0 src - # TODO: Reject at content phase for now so we get logs, but this should be - # done at connection phase for higher efficiency - tcp-request content reject if { sc_gpc0_rate(0) ge {{ $.Values.haproxy.limits.validator.connectionsPerIPPerMin }} } - tcp-request content sc-inc-gpc0(0) unless { nbsrv(validator) eq 0 } + #an IP that had a sucessfull connection. + #-2- Allow Whitelist + tcp-request connection accept if { sc0_get_gpc1() ge 1 } -backend validator + #-3- Enforece RateLimit + tcp-request connection reject if { sc1_gpc1_rate(CONN_RATE) gt {{ $.Values.haproxy.limits.validator.rateLimitSession }} } + + # This is a successfull connection i.e., was sent more than 16K bytes in the last 30 min + #tcp-request session sc-set-gpt0(0) int(...) if { sc0_kbytes_out gt 16 } + #<2> Mark Whitelist + tcp-request session sc-inc-gpc1(0) if { sc0_kbytes_out gt 4 } + + # -4- Break a long high rate connection + tcp-request session reject if { sc0_bytes_out_rate gt {{ $.Values.haproxy.limits.validator.maxBytesOutRate10sec }} } + +backend {{ include "aptos-validator.fullname" $ }}-validator default-server maxconn 1024 {{ if $.Values.haproxy.config.send_proxy_protocol }}send-proxy-v2{{ end }} server {{ include "aptos-validator.fullname" $ }}-{{ $.Values.i }}-validator {{ include "aptos-validator.fullname" $ }}-{{ $.Values.i }}-validator:6180 +frontend fe-{{ include "aptos-validator.fullname" $ }}-validator-fn + bind :6181 + default_backend {{ include "aptos-validator.fullname" $ }}-validator-fn + + # Deny requests from blocked IPs + tcp-request connection silent-drop if { src -n -f /usr/local/etc/haproxy/blocked.ips } + + acl ip_high_conn_rate sc0_conn_rate gt {{ $.Values.haproxy.limits.validator.connectionsPerIPPerMin }} + + stick-table type ip size 10m expire 30m store gpc0,gpc1,conn_rate(1m),bytes_out_rate(10s),bytes_out_cnt ##about 500MB of memory + tcp-request connection track-sc0 src #update table with src ip as key, store in sc0 + + #We Count rate-limit manualy -- Will be more CPU intensieve but will allow whitelists to enter and up to rateLimitSession non blacklisted IPs. + tcp-request connection track-sc1 int(1) table CONN_RATE + + #tcp-request connection sc-set-gpt0(0) int(...) if ip_high_conn_rate is better but dies with: + #parsing [/usr/local/etc/haproxy/haproxy.cfg:53] : internal error, unexpected rule->from=0, please report this bug! + #<1> Mark Blacklist + tcp-request connection sc-inc-gpc0(0) if ip_high_conn_rate + + #This connection is silently dropped no reason to count it for rateLimitSession + tcp-request connection sc-inc-gpc1(1) unless { sc0_get_gpc0() ge 1 } + + # an IP that was blacklisted due to to many unsucsessfull tcp attempts + #-1- Enforece Blacklist + tcp-request connection silent-drop if { sc0_get_gpc0() ge 1 } + + #an IP that had a sucessfull connection. + #-2- Allow Whitelist + tcp-request connection accept if { sc0_get_gpc1() ge 1 } + + #-3- Enforece RateLimit + tcp-request connection reject if { sc1_gpc1_rate(CONN_RATE) gt {{ $.Values.haproxy.limits.validator.rateLimitSession }} } + + # This is a successfull connection i.e., was sent more than 16K bytes in the last 30 min + #tcp-request session sc-set-gpt0(0) int(...) if { sc0_kbytes_out gt 16 } + #<2> Mark Whitelist + tcp-request session sc-inc-gpc1(0) if { sc0_kbytes_out gt 4 } + + # -4- Break a long high rate connection + tcp-request session reject if { sc0_bytes_out_rate gt {{ $.Values.haproxy.limits.validator.maxBytesOutRate10sec }} } + +backend {{ include "aptos-validator.fullname" $ }}-validator-fn + default-server maxconn 16 + server {{ include "aptos-validator.fullname" $ }}-{{ $.Values.i }}-validator {{ include "aptos-validator.fullname" $ }}-{{ $.Values.i }}-validator:6181 + + +#CONNRATE holds only entry with key 1: used for determening global conn rate +backend CONN_RATE + stick-table type integer size 1 expire 10m store gpc1,gpc1_rate(1s) + +################## HTTP: metrics & API +defaults + mode http + retries 3 + timeout queue 5s #limits num of concurrent connections. Not clear if t/o connect is needed. #https://www.papertrail.com/solution/tips/haproxy-logging-how-to-tune-timeouts-for-performance/ + timeout connect 5s + timeout server 60s #what makes sense? for silence between nodes? + timeout client 60s + + timeout client-fin 3s #How long to hold an interrupted client connection. + timeout server-fin 1s + + timeout http-request 60s #len of http request + timeout http-keep-alive 2s + + rate-limit sessions 256 + frontend validator-metrics mode http option httplog bind :9102 default_backend validator-metrics - http-request add-header Forwarded "for=%ci" # Deny requests from blocked IPs tcp-request connection reject if { src -n -f /usr/local/etc/haproxy/blocked.ips } + http-request add-header Forwarded "for=%ci" backend validator-metrics mode http @@ -55,10 +169,10 @@ frontend validator-api option httplog bind :8180 default_backend validator-api - http-request add-header Forwarded "for=%ci" # Deny requests from blocked IPs tcp-request connection reject if { src -n -f /usr/local/etc/haproxy/blocked.ips } + http-request add-header Forwarded "for=%ci" backend validator-api mode http @@ -87,10 +201,10 @@ frontend {{ $config.name }}-api default_backend {{ $config.name }}-api # add Forwarded header, which behaves differently than X-Forwarded-For # see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Forwarded - http-request add-header Forwarded "for=%ci" # Deny requests from blocked IPs tcp-request connection reject if { src -n -f /usr/local/etc/haproxy/blocked.ips } + http-request add-header Forwarded "for=%ci" backend {{ $config.name }}-api mode http @@ -102,10 +216,10 @@ frontend {{ $config.name }}-metrics option httplog bind :{{ add 9103 $index }} default_backend {{ $config.name }}-metrics - http-request add-header Forwarded "for=%ci" # Deny requests from blocked IPs tcp-request connection reject if { src -n -f /usr/local/etc/haproxy/blocked.ips } + http-request add-header Forwarded "for=%ci" backend {{ $config.name }}-metrics mode http diff --git a/terraform/helm/aptos-node/values.yaml b/terraform/helm/aptos-node/values.yaml index f0c78941115a5..2878d8f2b2bd0 100644 --- a/terraform/helm/aptos-node/values.yaml +++ b/terraform/helm/aptos-node/values.yaml @@ -31,18 +31,23 @@ haproxy: pullPolicy: IfNotPresent resources: limits: - cpu: 1.5 - memory: 2Gi + cpu: 4 + memory: 8Gi requests: - cpu: 1.5 - memory: 2Gi + cpu: 4 + memory: 8Gi nodeSelector: {} tolerations: [] affinity: {} limits: validator: - # -- Limit the number of connections per IP address per minute + # -- Limit the number of connections per IP address per sec connectionsPerIPPerMin: 2 + # Sustained 100mb/s for 10 sec. + maxBytesOutRate10sec: 134217728 + rateLimitSession: 256 + tcpBufSize: 524288 + config: # -- Whether to send Proxy Protocol v2 send_proxy_protocol: &send_proxy_protocol false From 7eda68e1a338304eaeb3a8ca2a470d1a692bf4c0 Mon Sep 17 00:00:00 2001 From: Alex Markuze Date: Thu, 22 Sep 2022 23:07:23 +0300 Subject: [PATCH 2/2] [Docker] Adding HAProxy to docker compose --- docker/compose/aptos-node/blocked.ips | 1 + docker/compose/aptos-node/docker-compose.yaml | 28 ++- docker/compose/aptos-node/haproxy.cfg | 180 ++++++++++++++++++ 3 files changed, 204 insertions(+), 5 deletions(-) create mode 100644 docker/compose/aptos-node/blocked.ips create mode 100644 docker/compose/aptos-node/haproxy.cfg diff --git a/docker/compose/aptos-node/blocked.ips b/docker/compose/aptos-node/blocked.ips new file mode 100644 index 0000000000000..794ce517b389e --- /dev/null +++ b/docker/compose/aptos-node/blocked.ips @@ -0,0 +1 @@ +# Add Blocked Ips here diff --git a/docker/compose/aptos-node/docker-compose.yaml b/docker/compose/aptos-node/docker-compose.yaml index 7e715b9155f34..08bc161f4bf0b 100644 --- a/docker/compose/aptos-node/docker-compose.yaml +++ b/docker/compose/aptos-node/docker-compose.yaml @@ -3,6 +3,29 @@ version: "3.8" services: + haproxy: + image: haproxytech/haproxy-debian:2.2 + volumes: + - type: bind + source: ./haproxy.cfg + target: /usr/local/etc/haproxy/haproxy.cfg + - type: bind + source: ./blocked.ips + target: /usr/local/etc/haproxy/blocked.ips + networks: + - shared + expose: + - 6180 + - 6181 + - 9101 + - 80 + ports: + - "6180:6180" + - "6181:6181" + - "80:80" + - "9101:9101" + + validator: image: "${VALIDATOR_IMAGE_REPO:-aptoslabs/validator}:${IMAGE_TAG:-testnet}" networks: @@ -25,11 +48,6 @@ services: target: /opt/aptos/genesis/validator-identity.yaml command: ["/usr/local/bin/aptos-node", "-f", "/opt/aptos/etc/validator.yaml"] restart: unless-stopped - ports: - - "6180:6180" - - "6181:6181" - - "80:8080" - - "9101:9101" expose: - 6180 - 6181 diff --git a/docker/compose/aptos-node/haproxy.cfg b/docker/compose/aptos-node/haproxy.cfg new file mode 100644 index 0000000000000..477120580aa44 --- /dev/null +++ b/docker/compose/aptos-node/haproxy.cfg @@ -0,0 +1,180 @@ +global + log stdout len 10240 format raw local0 + + # Config manual: https://cbonte.github.io/haproxy-dconv/2.5/configuration.html + # magic values : terraform/helm/aptos-node/values.yaml + + maxconn 1024 + # This limits the whole HA Proxy impacting both validators and other frontends + # maxconnrate 128 + nbthread 4 + + #4MB for client facing sndbuf/rcvbuf. -- 100Mb/s with 300 mili latency (e.g., us-asia) + tune.sndbuf.client 4194304 #tcpBufSize + tune.rcvbuf.client 4194304 #tcpBufSize + + user nobody + +## TCP port defaults +defaults + log global + mode tcp + #option tcplog + option dontlog-normal + log-format "%ci:%cp - %sp[%rt] [%t] %ft %Tw/%Tc/%Tt %B [%ts] %ac/%fc/%bc/%sc/%rc %sq/%bq" + maxconn 1024 #Validator network mesh + FN x2 + retries 3 + timeout queue 5s #limits num of concurrent connections. Not clear if t/o connect is needed. #https://www.papertrail.com/solution/tips/haproxy-logging-how-to-tune-timeouts-for-performance/ + timeout connect 5s + # enough for 1 successfull + 5 unsuccessfull HB(10 sec interval) + 20 sec timeout + timeout server 80s + timeout client 80s + + timeout client-fin 3s #How long to hold an interrupted client connection. + timeout server-fin 1s + +frontend fe-validator + bind :6180 + default_backend validator + + # Deny requests from blocked IPs + tcp-request connection silent-drop if { src -n -f /usr/local/etc/haproxy/blocked.ips } + + # We deem a connection rate high when an IP is attempting to reconnect more than twice a min + acl ip_high_conn_rate sc0_conn_rate gt 2 + + stick-table type ip size 10m expire 30m store gpc0,gpc1,conn_rate(1m),bytes_out_rate(10s),bytes_out_cnt ##about 500MB of memory + tcp-request connection track-sc0 src #update table with src ip as key, store in sc0 + + #We Count rate-limit manualy -- Will be more CPU intensieve but will allow whitelists to enter and up to rateLimitSession non blacklisted IPs. + tcp-request connection track-sc1 int(1) table CONN_RATE + + #tcp-request connection sc-set-gpt0(0) int(...) if ip_high_conn_rate is better but dies with: + #parsing [/usr/local/etc/haproxy/haproxy.cfg:53] : internal error, unexpected rule->from=0, please report this bug! + #<1> Mark Blacklist + tcp-request connection sc-inc-gpc0(0) if ip_high_conn_rate + + #This connection is silently dropped no reason to count it for rateLimitSession + tcp-request connection sc-inc-gpc1(1) unless { sc0_get_gpc0() ge 1 } + + # an IP that was blacklisted due to to many unsucsessfull tcp attempts + #-1- Enforece Blacklist + tcp-request connection silent-drop if { sc0_get_gpc0() ge 1 } + + #an IP that had a sucessfull connection. + #-2- Allow Whitelist + tcp-request connection accept if { sc0_get_gpc1() ge 1 } + + #-3- Enforece RateLimit. Connection attempts by *new* IPs/sec + tcp-request connection reject if { sc1_gpc1_rate(CONN_RATE) gt 256 } #rateLimitSession + + # This is a successfull connection i.e., was sent more than 16K bytes in the last 30 min + #tcp-request session sc-set-gpt0(0) int(...) if { sc0_kbytes_out gt 16 } + #<2> Mark Whitelist + tcp-request session sc-inc-gpc1(0) if { sc0_kbytes_out gt 4 } + + # -4- Break a long high rate connection + # maxBytesOutRate10sec: 100mb/s for 10 sec + tcp-request session reject if { sc0_bytes_out_rate gt 134217728 } + +backend validator + default-server maxconn 1024 + server validator validator:6180 + +frontend fe-fullnode + bind :6181 + default_backend validator-fn + + # Deny requests from blocked IPs + tcp-request connection silent-drop if { src -n -f /usr/local/etc/haproxy/blocked.ips } + + acl ip_high_conn_rate sc0_conn_rate gt 2 + + stick-table type ip size 10m expire 30m store gpc0,gpc1,conn_rate(1m),bytes_out_rate(10s),bytes_out_cnt ##about 500MB of memory + tcp-request connection track-sc0 src #update table with src ip as key, store in sc0 + + #We Count rate-limit manualy -- Will be more CPU intensieve but will allow whitelists to enter and up to rateLimitSession non blacklisted IPs. + tcp-request connection track-sc1 int(1) table CONN_RATE + + #tcp-request connection sc-set-gpt0(0) int(...) if ip_high_conn_rate is better but dies with: + #parsing [/usr/local/etc/haproxy/haproxy.cfg:53] : internal error, unexpected rule->from=0, please report this bug! + #<1> Mark Blacklist + tcp-request connection sc-inc-gpc0(0) if ip_high_conn_rate + + #This connection is silently dropped no reason to count it for rateLimitSession + tcp-request connection sc-inc-gpc1(1) unless { sc0_get_gpc0() ge 1 } + + # an IP that was blacklisted due to to many unsucsessfull tcp attempts + #-1- Enforece Blacklist + tcp-request connection silent-drop if { sc0_get_gpc0() ge 1 } + + #an IP that had a sucessfull connection. + #-2- Allow Whitelist + tcp-request connection accept if { sc0_get_gpc1() ge 1 } + + #-3- Enforece RateLimit. Connection attempts by *new* IPs/sec + tcp-request connection reject if { sc1_gpc1_rate(CONN_RATE) gt 256 } #rateLimitSession + + # This is a successfull connection i.e., was sent more than 16K bytes in the last 30 min + #tcp-request session sc-set-gpt0(0) int(...) if { sc0_kbytes_out gt 16 } + #<2> Mark Whitelist + tcp-request session sc-inc-gpc1(0) if { sc0_kbytes_out gt 4 } + + # -4- Break a long high rate connection + # maxBytesOutRate10sec: 100mb/s for 10 sec + tcp-request session reject if { sc0_bytes_out_rate gt 134217728 } + +backend validator-fn + default-server maxconn 16 + server validator validator:6181 + +#CONNRATE holds only entry with key 1: used for determening global conn rate +backend CONN_RATE + stick-table type integer size 1 expire 10m store gpc1,gpc1_rate(1s) + +################## HTTP: metrics & API +defaults + mode http + retries 3 + timeout queue 5s #limits num of concurrent connections. Not clear if t/o connect is needed. #https://www.papertrail.com/solution/tips/haproxy-logging-how-to-tune-timeouts-for-performance/ + timeout connect 5s + timeout server 60s #what makes sense? for silence between nodes? + timeout client 60s + + timeout client-fin 3s #How long to hold an interrupted client connection. + timeout server-fin 1s + + timeout http-request 60s #len of http request + timeout http-keep-alive 2s + + rate-limit sessions 256 + +frontend validator-metrics + mode http + option httplog + bind :9101 + default_backend validator-metrics + + # Deny requests from blocked IPs + tcp-request connection reject if { src -n -f /usr/local/etc/haproxy/blocked.ips } + http-request add-header Forwarded "for=%ci" + +backend validator-metrics + mode http + default-server maxconn 1024 + server validator validator:9101 + +frontend validator-api + mode http + option httplog + bind :8180 + default_backend validator-api + + # Deny requests from blocked IPs + tcp-request connection reject if { src -n -f /usr/local/etc/haproxy/blocked.ips } + http-request add-header Forwarded "for=%ci" + +backend validator-api + mode http + default-server maxconn 1024 + server validator validator:8080