From 5ef0591d81762188f0f28a01187035bc678b5bd8 Mon Sep 17 00:00:00 2001 From: Will Jordan Date: Wed, 12 Sep 2018 15:50:31 -0700 Subject: [PATCH] More puma tuning - Use nakayoshi_fork to reduce copy-on-write memory usage - Adjust jemalloc tuning parameters for lower memory usage - reduce threads per worker to 5 - Use Puma fork for more stable request balancing and out-of-band GC --- Gemfile | 5 ++++- Gemfile.lock | 12 ++++++++++-- cookbooks/cdo-jemalloc/attributes/default.rb | 12 ++++++------ dashboard/config/puma.rb | 13 ++++++++++++- pegasus/config/puma.rb | 15 ++++++++++++++- 5 files changed, 46 insertions(+), 11 deletions(-) diff --git a/Gemfile b/Gemfile index 0985e57c0c8c0..f9051dc7a9e76 100644 --- a/Gemfile +++ b/Gemfile @@ -131,7 +131,10 @@ gem 'open_uri_redirections', require: false, group: [:development, :staging, :te # Ref: https://github.com/tmm1/gctools/pull/17 gem 'gctools', github: 'wjordan/gctools', ref: 'ruby-2.5' -gem 'puma' +# Optimizes copy-on-write memory usage with GC before web-application fork. +gem 'nakayoshi_fork' +# Ref: https://github.com/puma/puma/pull/1646 +gem 'puma', github: 'wjordan/puma', ref: 'out_of_band' gem 'unicorn', '~> 5.1.0' gem 'chronic', '~> 0.10.2' diff --git a/Gemfile.lock b/Gemfile.lock index e669c4f59f15e..749e6c4cee21b 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -164,6 +164,13 @@ GIT multi_json (>= 1.0.3) omniauth-oauth2 (~> 1.4) +GIT + remote: https://github.com/wjordan/puma.git + revision: 2ab96d67341a0ae1a4baf6bf80790389abc159ac + ref: out_of_band + specs: + puma (3.12.0) + GIT remote: https://github.com/wjordan/sass-rails.git revision: 06e833d92d083fbcb137364ffebc597b2577db23 @@ -553,6 +560,7 @@ GEM multipart-post (2.0.0) mustermann (1.0.1) mysql2 (0.3.21) + nakayoshi_fork (0.0.4) naturally (2.1.0) net-http-persistent (2.9.4) net-scp (1.2.1) @@ -623,7 +631,6 @@ GEM powerpack (0.1.1) progress (3.3.1) public_suffix (3.0.3) - puma (3.12.0) pusher (1.3.1) httpclient (~> 2.7) multi_json (~> 1.0) @@ -953,6 +960,7 @@ DEPENDENCIES minitest-reporters (~> 1.2.0.beta3) mocha mysql2 (~> 0.3.13) + nakayoshi_fork naturally net-http-persistent net-scp @@ -977,7 +985,7 @@ DEPENDENCIES petit! pg phantomjs (~> 1.9.7.1) - puma + puma! pusher (~> 1.3.1) rack-cache rack-mini-profiler diff --git a/cookbooks/cdo-jemalloc/attributes/default.rb b/cookbooks/cdo-jemalloc/attributes/default.rb index 27cdc04e3e7c8..031e41e47f583 100644 --- a/cookbooks/cdo-jemalloc/attributes/default.rb +++ b/cookbooks/cdo-jemalloc/attributes/default.rb @@ -2,15 +2,15 @@ default['cdo-jemalloc']['checksum'] = '5396e61cc6103ac393136c309fae09e44d74743c86f90e266948c50f3dbb7268' default['cdo-jemalloc']['lib'] = '/usr/local/lib/libjemalloc.so.2' -# See: https://github.com/jemalloc/jemalloc/blob/dev/TUNING.md +# See: +# https://github.com/jemalloc/jemalloc/blob/dev/TUNING.md +# http://jemalloc.net/jemalloc.3.html # To convert this attributes hash to a malloc_conf string, run: # node['cdo-jemalloc']['malloc_conf'].map {|x| x.join(':')}.join(',') default['cdo-jemalloc']['malloc_conf'] = { - # Enable dynamic thread to arena association based on running CPU. - # This has the potential to improve locality, e.g. when thread to CPU affinity is present. - # - # Suggested: try percpu_arena:percpu or percpu_arena:phycpu if thread migration between processors is expected to be infrequent. - percpu_arena: 'percpu', + # Maximum number of arenas to use for automatic multiplexing of threads and arenas. + # The default is four times the number of CPUs, or one if there is a single CPU. + narenas: 2, # Enabling jemalloc background threads generally improves the tail latency for application threads, # since unused memory purging is shifted to the dedicated background threads. diff --git a/dashboard/config/puma.rb b/dashboard/config/puma.rb index 5a2971b20941e..a4d62b5113e99 100644 --- a/dashboard/config/puma.rb +++ b/dashboard/config/puma.rb @@ -8,7 +8,12 @@ bind "tcp://#{CDO.dashboard_host}:#{CDO.dashboard_port}" end workers CDO.dashboard_workers unless CDO.dashboard_workers.to_i < 2 -threads 8, 16 +threads 1, 5 + +drain_on_shutdown + +# nginx already buffers/queues requests so disable Puma's own queue. +queue_requests false pidfile "#{File.expand_path(__FILE__)}.pid" preload_app! @@ -16,12 +21,18 @@ directory deploy_dir('dashboard') before_fork do + PEGASUS_DB.disconnect + DASHBOARD_DB.disconnect ActiveRecord::Base.connection_pool.disconnect! end on_worker_boot do |_index| + ActiveRecord::Base.establish_connection require 'dynamic_config/gatekeeper' require 'dynamic_config/dcdo' Gatekeeper.after_fork DCDO.after_fork end + +require 'gctools/oobgc' +out_of_band {GC::OOB.run} diff --git a/pegasus/config/puma.rb b/pegasus/config/puma.rb index 723ced03288dc..88b1bf02ca534 100644 --- a/pegasus/config/puma.rb +++ b/pegasus/config/puma.rb @@ -7,7 +7,12 @@ end workers CDO.pegasus_workers unless CDO.pegasus_workers.to_i < 2 -threads 8, 16 +threads 1, 5 + +drain_on_shutdown + +# nginx already buffers/queues requests so disable Puma's own queue. +queue_requests false pidfile "#{File.expand_path(__FILE__)}.pid" @@ -16,9 +21,17 @@ stdout_redirect pegasus_dir('log', 'puma_stdout.log'), pegasus_dir('log', 'puma_stderr.log'), true directory deploy_dir('pegasus') +before_fork do + PEGASUS_DB.disconnect + DASHBOARD_DB.disconnect +end + on_worker_boot do |_index| require 'dynamic_config/gatekeeper' require 'dynamic_config/dcdo' Gatekeeper.after_fork DCDO.after_fork end + +require 'gctools/oobgc' +out_of_band {GC::OOB.run}