Skip to content

Commit

Permalink
[HApropxy] new haproxy config and resources
Browse files Browse the repository at this point in the history
  • Loading branch information
Markuze authored and perryjrandall committed Sep 28, 2022
1 parent f87b98d commit 74b8581
Show file tree
Hide file tree
Showing 2 changed files with 149 additions and 30 deletions.
164 changes: 139 additions & 25 deletions terraform/helm/aptos-node/files/haproxy.cfg
Original file line number Diff line number Diff line change
@@ -1,47 +1,161 @@
global
log stdout len 10240 format raw local0
maxconn 500000
nbthread 16

# Config manual: https://cbonte.github.io/haproxy-dconv/2.5/configuration.html
# magic values : terraform/helm/aptos-node/values.yaml

maxconn 1024
# This limits the whole HA Proxy impacting both validators and other frontends
# maxconnrate 128
nbthread 4

#4MB for client facing sndbuf/rcvbuf. -- 100Mb/s with 300 mili latency (e.g., us-asia)
#tune.sndbuf.client {{ $.Values.haproxy.limits.validator.tcpBufSize }}
tune.rcvbuf.client {{ $.Values.haproxy.limits.validator.tcpBufSize }}

user nobody

## TCP port defaults
defaults
log global
option tcplog
maxconn 500000
timeout queue 1s
timeout connect 10s
timeout server 60s
timeout client 60s
timeout client-fin 5s

frontend validator
mode tcp
#option tcplog
option dontlog-normal
log-format "%ci:%cp - %sp[%rt] [%t] %ft %Tw/%Tc/%Tt %B [%ts] %ac/%fc/%bc/%sc/%rc %sq/%bq"
maxconn 1024 #Validator network mesh + FN x2
retries 3
timeout queue 5s #limits num of concurrent connections. Not clear if t/o connect is needed. #https://www.papertrail.com/solution/tips/haproxy-logging-how-to-tune-timeouts-for-performance/
timeout connect 5s
# enough for 1 successfull + 5 unsuccessfull HB(10 sec interval) + 20 sec timeout
timeout server 80s
timeout client 80s

timeout client-fin 3s #How long to hold an interrupted client connection.
timeout server-fin 1s

frontend fe-{{ include "aptos-validator.fullname" $ }}-validator
bind :6180
default_backend validator
default_backend {{ include "aptos-validator.fullname" $ }}-validator

# Deny requests from blocked IPs
tcp-request connection reject if { src -n -f /usr/local/etc/haproxy/blocked.ips }
tcp-request connection silent-drop if { src -n -f /usr/local/etc/haproxy/blocked.ips }

acl ip_high_conn_rate sc0_conn_rate gt {{ $.Values.haproxy.limits.validator.connectionsPerIPPerMin }}

stick-table type ip size 10m expire 30m store gpc0,gpc1,conn_rate(1m),bytes_out_rate(10s),bytes_out_cnt ##about 500MB of memory
tcp-request connection track-sc0 src #update table with src ip as key, store in sc0

#We Count rate-limit manualy -- Will be more CPU intensieve but will allow whitelists to enter and up to rateLimitSession non blacklisted IPs.
tcp-request connection track-sc1 int(1) table CONN_RATE

#tcp-request connection sc-set-gpt0(0) int(...) if ip_high_conn_rate is better but dies with:
#parsing [/usr/local/etc/haproxy/haproxy.cfg:53] : internal error, unexpected rule->from=0, please report this bug!
#<1> Mark Blacklist
tcp-request connection sc-inc-gpc0(0) if ip_high_conn_rate

#This connection is silently dropped no reason to count it for rateLimitSession
tcp-request connection sc-inc-gpc1(1) unless { sc0_get_gpc0() ge 1 }

# an IP that was blacklisted due to to many unsucsessfull tcp attempts
#-1- Enforece Blacklist
tcp-request connection silent-drop if { sc0_get_gpc0() ge 1 }

# Limit to N TCP connections per minute per source IP
stick-table type ip size 500k expire 1m store gpc0_rate(1m)
tcp-request connection track-sc0 src
# TODO: Reject at content phase for now so we get logs, but this should be
# done at connection phase for higher efficiency
tcp-request content reject if { sc_gpc0_rate(0) ge {{ $.Values.haproxy.limits.validator.connectionsPerIPPerMin }} }
tcp-request content sc-inc-gpc0(0) unless { nbsrv(validator) eq 0 }
#an IP that had a sucessfull connection.
#-2- Allow Whitelist
tcp-request connection accept if { sc0_get_gpc1() ge 1 }

backend validator
#-3- Enforece RateLimit
tcp-request connection reject if { sc1_gpc1_rate(CONN_RATE) gt {{ $.Values.haproxy.limits.validator.rateLimitSession }} }

# This is a successfull connection i.e., was sent more than 16K bytes in the last 30 min
#tcp-request session sc-set-gpt0(0) int(...) if { sc0_kbytes_out gt 16 }
#<2> Mark Whitelist
tcp-request session sc-inc-gpc1(0) if { sc0_kbytes_out gt 4 }

# -4- Break a long high rate connection
tcp-request session reject if { sc0_bytes_out_rate gt {{ $.Values.haproxy.limits.validator.maxBytesOutRate10sec }} }

backend {{ include "aptos-validator.fullname" $ }}-validator
default-server maxconn 1024 {{ if $.Values.haproxy.config.send_proxy_protocol }}send-proxy-v2{{ end }}
server {{ include "aptos-validator.fullname" $ }}-{{ $.Values.i }}-validator {{ include "aptos-validator.fullname" $ }}-{{ $.Values.i }}-validator:6180

frontend fe-{{ include "aptos-validator.fullname" $ }}-validator-fn
bind :6181
default_backend {{ include "aptos-validator.fullname" $ }}-validator-fn

# Deny requests from blocked IPs
tcp-request connection silent-drop if { src -n -f /usr/local/etc/haproxy/blocked.ips }

acl ip_high_conn_rate sc0_conn_rate gt {{ $.Values.haproxy.limits.validator.connectionsPerIPPerMin }}

stick-table type ip size 10m expire 30m store gpc0,gpc1,conn_rate(1m),bytes_out_rate(10s),bytes_out_cnt ##about 500MB of memory
tcp-request connection track-sc0 src #update table with src ip as key, store in sc0

#We Count rate-limit manualy -- Will be more CPU intensieve but will allow whitelists to enter and up to rateLimitSession non blacklisted IPs.
tcp-request connection track-sc1 int(1) table CONN_RATE

#tcp-request connection sc-set-gpt0(0) int(...) if ip_high_conn_rate is better but dies with:
#parsing [/usr/local/etc/haproxy/haproxy.cfg:53] : internal error, unexpected rule->from=0, please report this bug!
#<1> Mark Blacklist
tcp-request connection sc-inc-gpc0(0) if ip_high_conn_rate

#This connection is silently dropped no reason to count it for rateLimitSession
tcp-request connection sc-inc-gpc1(1) unless { sc0_get_gpc0() ge 1 }

# an IP that was blacklisted due to to many unsucsessfull tcp attempts
#-1- Enforece Blacklist
tcp-request connection silent-drop if { sc0_get_gpc0() ge 1 }

#an IP that had a sucessfull connection.
#-2- Allow Whitelist
tcp-request connection accept if { sc0_get_gpc1() ge 1 }

#-3- Enforece RateLimit
tcp-request connection reject if { sc1_gpc1_rate(CONN_RATE) gt {{ $.Values.haproxy.limits.validator.rateLimitSession }} }

# This is a successfull connection i.e., was sent more than 16K bytes in the last 30 min
#tcp-request session sc-set-gpt0(0) int(...) if { sc0_kbytes_out gt 16 }
#<2> Mark Whitelist
tcp-request session sc-inc-gpc1(0) if { sc0_kbytes_out gt 4 }

# -4- Break a long high rate connection
tcp-request session reject if { sc0_bytes_out_rate gt {{ $.Values.haproxy.limits.validator.maxBytesOutRate10sec }} }

backend {{ include "aptos-validator.fullname" $ }}-validator-fn
default-server maxconn 16
server {{ include "aptos-validator.fullname" $ }}-{{ $.Values.i }}-validator {{ include "aptos-validator.fullname" $ }}-{{ $.Values.i }}-validator:6181


#CONNRATE holds only entry with key 1: used for determening global conn rate
backend CONN_RATE
stick-table type integer size 1 expire 10m store gpc1,gpc1_rate(1s)

################## HTTP: metrics & API
defaults
mode http
retries 3
timeout queue 5s #limits num of concurrent connections. Not clear if t/o connect is needed. #https://www.papertrail.com/solution/tips/haproxy-logging-how-to-tune-timeouts-for-performance/
timeout connect 5s
timeout server 60s #what makes sense? for silence between nodes?
timeout client 60s

timeout client-fin 3s #How long to hold an interrupted client connection.
timeout server-fin 1s

timeout http-request 60s #len of http request
timeout http-keep-alive 2s

rate-limit sessions 256

frontend validator-metrics
mode http
option httplog
bind :9102
default_backend validator-metrics
http-request add-header Forwarded "for=%ci"

# Deny requests from blocked IPs
tcp-request connection reject if { src -n -f /usr/local/etc/haproxy/blocked.ips }
http-request add-header Forwarded "for=%ci"

backend validator-metrics
mode http
Expand All @@ -55,10 +169,10 @@ frontend validator-api
option httplog
bind :8180
default_backend validator-api
http-request add-header Forwarded "for=%ci"

# Deny requests from blocked IPs
tcp-request connection reject if { src -n -f /usr/local/etc/haproxy/blocked.ips }
http-request add-header Forwarded "for=%ci"

backend validator-api
mode http
Expand Down Expand Up @@ -87,10 +201,10 @@ frontend {{ $config.name }}-api
default_backend {{ $config.name }}-api
# add Forwarded header, which behaves differently than X-Forwarded-For
# see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Forwarded
http-request add-header Forwarded "for=%ci"

# Deny requests from blocked IPs
tcp-request connection reject if { src -n -f /usr/local/etc/haproxy/blocked.ips }
http-request add-header Forwarded "for=%ci"

backend {{ $config.name }}-api
mode http
Expand All @@ -102,10 +216,10 @@ frontend {{ $config.name }}-metrics
option httplog
bind :{{ add 9103 $index }}
default_backend {{ $config.name }}-metrics
http-request add-header Forwarded "for=%ci"

# Deny requests from blocked IPs
tcp-request connection reject if { src -n -f /usr/local/etc/haproxy/blocked.ips }
http-request add-header Forwarded "for=%ci"

backend {{ $config.name }}-metrics
mode http
Expand Down
15 changes: 10 additions & 5 deletions terraform/helm/aptos-node/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,23 @@ haproxy:
pullPolicy: IfNotPresent
resources:
limits:
cpu: 1.5
memory: 2Gi
cpu: 4
memory: 8Gi
requests:
cpu: 1.5
memory: 2Gi
cpu: 4
memory: 8Gi
nodeSelector: {}
tolerations: []
affinity: {}
limits:
validator:
# -- Limit the number of connections per IP address per minute
# -- Limit the number of connections per IP address per sec
connectionsPerIPPerMin: 2
# Sustained 100mb/s for 10 sec.
maxBytesOutRate10sec: 134217728
rateLimitSession: 256
tcpBufSize: 524288

config:
# -- Whether to send Proxy Protocol v2
send_proxy_protocol: &send_proxy_protocol false
Expand Down

0 comments on commit 74b8581

Please sign in to comment.