From d9aaa7c231f25d0d8271eee0e6bbb33e13cb7f0f Mon Sep 17 00:00:00 2001 From: Phil Sorber Date: Tue, 30 Aug 2016 15:43:23 -0600 Subject: [PATCH 1/5] TS-4806: Add ability to pass a new stack to thread creation. --- iocore/eventsystem/I_Thread.h | 3 ++- iocore/eventsystem/Thread.cc | 4 ++-- lib/ts/ink_thread.h | 8 ++++++-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/iocore/eventsystem/I_Thread.h b/iocore/eventsystem/I_Thread.h index 5acccee0b0f..528cdbfd238 100644 --- a/iocore/eventsystem/I_Thread.h +++ b/iocore/eventsystem/I_Thread.h @@ -145,7 +145,8 @@ class Thread Thread &operator=(const Thread &); public: - ink_thread start(const char *name, size_t stacksize = DEFAULT_STACKSIZE, ThreadFunction f = NULL, void *a = NULL); + ink_thread start(const char *name, size_t stacksize = DEFAULT_STACKSIZE, ThreadFunction f = NULL, void *a = NULL, + void *stack = NULL); virtual void execute() diff --git a/iocore/eventsystem/Thread.cc b/iocore/eventsystem/Thread.cc index e8c8a4453bb..f9ef1a3d478 100644 --- a/iocore/eventsystem/Thread.cc +++ b/iocore/eventsystem/Thread.cc @@ -87,7 +87,7 @@ spawn_thread_internal(void *a) } ink_thread -Thread::start(const char *name, size_t stacksize, ThreadFunction f, void *a) +Thread::start(const char *name, size_t stacksize, ThreadFunction f, void *a, void *stack) { thread_data_internal *p = (thread_data_internal *)ats_malloc(sizeof(thread_data_internal)); @@ -96,7 +96,7 @@ Thread::start(const char *name, size_t stacksize, ThreadFunction f, void *a) p->me = this; memset(p->name, 0, MAX_THREAD_NAME_LENGTH); ink_strlcpy(p->name, name, MAX_THREAD_NAME_LENGTH); - tid = ink_thread_create(spawn_thread_internal, (void *)p, 0, stacksize); + tid = ink_thread_create(spawn_thread_internal, (void *)p, 0, stacksize, stack); return tid; } diff --git a/lib/ts/ink_thread.h b/lib/ts/ink_thread.h index f824b2f6ff0..448e9f20b8d 100644 --- a/lib/ts/ink_thread.h +++ b/lib/ts/ink_thread.h @@ -127,7 +127,7 @@ ink_thread_key_delete(ink_thread_key key) } static inline ink_thread -ink_thread_create(void *(*f)(void *), void *a, int detached = 0, size_t stacksize = 0) +ink_thread_create(void *(*f)(void *), void *a, int detached = 0, size_t stacksize = 0, void *stack = NULL) { ink_thread t; int ret; @@ -137,7 +137,11 @@ ink_thread_create(void *(*f)(void *), void *a, int detached = 0, size_t stacksiz pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); if (stacksize) { - pthread_attr_setstacksize(&attr, stacksize); + if (stack) { + pthread_attr_setstack(&attr, stack, stacksize); + } else { + pthread_attr_setstacksize(&attr, stacksize); + } } if (detached) { From 972ffb5c81507298dd8275291779bc36372c0608 Mon Sep 17 00:00:00 2001 From: Phil Sorber Date: Wed, 31 Aug 2016 10:51:46 -0600 Subject: [PATCH 2/5] TS-4806: Normalize stacksize --- iocore/eventsystem/UnixEventProcessor.cc | 11 +++++++++++ lib/ts/ink_thread.h | 1 + 2 files changed, 12 insertions(+) diff --git a/iocore/eventsystem/UnixEventProcessor.cc b/iocore/eventsystem/UnixEventProcessor.cc index fc985ddafd9..2e3d63b57e7 100644 --- a/iocore/eventsystem/UnixEventProcessor.cc +++ b/iocore/eventsystem/UnixEventProcessor.cc @@ -30,6 +30,7 @@ #include #endif #include "ts/ink_defs.h" +#include "ts/hugepages.h" EventType EventProcessor::spawn_event_threads(int n_threads, const char *et_name, size_t stacksize) @@ -81,6 +82,16 @@ EventProcessor::start(int n_event_threads, size_t stacksize) n_ethreads = n_event_threads; n_thread_groups = 1; + // Make sure that our thread stack size is at least the minimum size + stacksize = MAX(stacksize, INK_THREAD_STACK_MIN); + + // Make sure it is a multiple of our page size + if (ats_hugepage_enabled()) { + stacksize = INK_ALIGN(stacksize, ats_hugepage_size()); + } else { + stacksize = INK_ALIGN(stacksize, ats_pagesize()); + } + for (i = 0; i < n_event_threads; i++) { EThread *t = new EThread(REGULAR, i); if (i == 0) { diff --git a/lib/ts/ink_thread.h b/lib/ts/ink_thread.h index 448e9f20b8d..0c19cd532bc 100644 --- a/lib/ts/ink_thread.h +++ b/lib/ts/ink_thread.h @@ -49,6 +49,7 @@ #endif #define INK_MUTEX_INIT PTHREAD_MUTEX_INITIALIZER +#define INK_THREAD_STACK_MIN PTHREAD_STACK_MIN typedef pthread_t ink_thread; typedef pthread_cond_t ink_cond; From ebd4a4eabb8809a90fe807e417bf07ebb0220034 Mon Sep 17 00:00:00 2001 From: Phil Sorber Date: Wed, 31 Aug 2016 15:01:01 -0600 Subject: [PATCH 3/5] TS-4806: Allocate thread stacks on corresponding NUMA nodes. --- iocore/eventsystem/UnixEventProcessor.cc | 39 ++++++++++++++++++++---- 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/iocore/eventsystem/UnixEventProcessor.cc b/iocore/eventsystem/UnixEventProcessor.cc index 2e3d63b57e7..2b6970f6081 100644 --- a/iocore/eventsystem/UnixEventProcessor.cc +++ b/iocore/eventsystem/UnixEventProcessor.cc @@ -72,6 +72,7 @@ EventProcessor::start(int n_event_threads, size_t stacksize) { char thr_name[MAX_THREAD_NAME_LENGTH]; int i; + void *stack = NULL; // do some sanity checking. static int started = 0; @@ -146,12 +147,7 @@ EventProcessor::start(int n_event_threads, size_t stacksize) #endif for (i = 0; i < n_ethreads; i++) { ink_thread tid; - if (i > 0) { - snprintf(thr_name, MAX_THREAD_NAME_LENGTH, "[ET_NET %d]", i); - tid = all_ethreads[i]->start(thr_name, stacksize); - } else { - tid = ink_thread_self(); - } + #if TS_USE_HWLOC if (obj_count > 0) { obj = hwloc_get_obj_by_type(ink_get_topology(), obj_type, i % obj_count); @@ -163,6 +159,37 @@ EventProcessor::start(int n_event_threads, size_t stacksize) #else Debug("iocore_thread", "EThread: %d %s: %d", i, obj_name, obj->logical_index); #endif // HWLOC_API_VERSION + } +#endif // TS_USE_HWLOC + + if (i > 0) { + snprintf(thr_name, MAX_THREAD_NAME_LENGTH, "[ET_NET %d]", i); +#if TS_USE_HWLOC + if (obj_count > 0) { + hwloc_nodeset_t nodeset = hwloc_bitmap_alloc(); + + hwloc_cpuset_to_nodeset(ink_get_topology(), obj->cpuset, nodeset); + + if (hwloc_get_nbobjs_inside_cpuset_by_type(ink_get_topology(), obj->cpuset, HWLOC_OBJ_NODE) == 1) { + stack = hwloc_alloc_membind_nodeset(ink_get_topology(), stacksize, nodeset, HWLOC_MEMBIND_BIND, 0); + } else if (hwloc_get_nbobjs_inside_cpuset_by_type(ink_get_topology(), obj->cpuset, HWLOC_OBJ_NODE) > 1) { + stack = hwloc_alloc_membind_nodeset(ink_get_topology(), stacksize, nodeset, HWLOC_MEMBIND_INTERLEAVE, 0); + } else { + stack = NULL; + } + + hwloc_bitmap_free(nodeset); + } +#endif // TS_USE_HWLOC + tid = all_ethreads[i]->start(thr_name, stacksize, NULL, stack); + } else { + // We should stop using this thread like this and create a new one and have the master thread just join all these and block + // indefinitely. + tid = ink_thread_self(); + } + +#if TS_USE_HWLOC + if (obj_count > 0) { hwloc_set_thread_cpubind(ink_get_topology(), tid, obj->cpuset, HWLOC_CPUBIND_STRICT); } else { Warning("hwloc returned an unexpected value -- CPU affinity disabled"); From 638429e3cfa3edccc3cf5d714f94e38a255e1886 Mon Sep 17 00:00:00 2001 From: Phil Sorber Date: Wed, 31 Aug 2016 16:00:12 -0600 Subject: [PATCH 4/5] TS-4806: Stop re-using main thread as net thread. --- iocore/aio/test_AIO.cc | 9 +++++- iocore/eventsystem/UnixEventProcessor.cc | 40 +++++++++--------------- iocore/eventsystem/test_Buffer.cc | 5 +-- iocore/eventsystem/test_Event.cc | 4 ++- proxy/Main.cc | 8 +++-- 5 files changed, 34 insertions(+), 32 deletions(-) diff --git a/iocore/aio/test_AIO.cc b/iocore/aio/test_AIO.cc index ae90b52c103..27ef6887aa3 100644 --- a/iocore/aio/test_AIO.cc +++ b/iocore/aio/test_AIO.cc @@ -400,6 +400,10 @@ main(int /* argc ATS_UNUSED */, char *argv[]) RecProcessInit(RECM_STAND_ALONE); ink_event_system_init(EVENT_SYSTEM_MODULE_VERSION); eventProcessor.start(ink_number_of_processors()); + + Thread *main_thread = new EThread; + main_thread->set_specific(); + #if AIO_MODE == AIO_MODE_NATIVE int etype = ET_NET; int n_netthreads = eventProcessor.n_threads_for_type[etype]; @@ -446,5 +450,8 @@ main(int /* argc ATS_UNUSED */, char *argv[]) } } - this_thread()->execute(); + while (!shutdown_event_system) { + sleep(1); + } + delete main_thread; } diff --git a/iocore/eventsystem/UnixEventProcessor.cc b/iocore/eventsystem/UnixEventProcessor.cc index 2b6970f6081..9ed172e50f9 100644 --- a/iocore/eventsystem/UnixEventProcessor.cc +++ b/iocore/eventsystem/UnixEventProcessor.cc @@ -94,11 +94,7 @@ EventProcessor::start(int n_event_threads, size_t stacksize) } for (i = 0; i < n_event_threads; i++) { - EThread *t = new EThread(REGULAR, i); - if (i == 0) { - ink_thread_setspecific(Thread::thread_data_key, t); - Thread::get_hrtime_updated(); - } + EThread *t = new EThread(REGULAR, i); all_ethreads[i] = t; eventthread[ET_CALL][i] = t; @@ -162,31 +158,25 @@ EventProcessor::start(int n_event_threads, size_t stacksize) } #endif // TS_USE_HWLOC - if (i > 0) { - snprintf(thr_name, MAX_THREAD_NAME_LENGTH, "[ET_NET %d]", i); + snprintf(thr_name, MAX_THREAD_NAME_LENGTH, "[ET_NET %d]", i); #if TS_USE_HWLOC - if (obj_count > 0) { - hwloc_nodeset_t nodeset = hwloc_bitmap_alloc(); - - hwloc_cpuset_to_nodeset(ink_get_topology(), obj->cpuset, nodeset); + if (obj_count > 0) { + hwloc_nodeset_t nodeset = hwloc_bitmap_alloc(); - if (hwloc_get_nbobjs_inside_cpuset_by_type(ink_get_topology(), obj->cpuset, HWLOC_OBJ_NODE) == 1) { - stack = hwloc_alloc_membind_nodeset(ink_get_topology(), stacksize, nodeset, HWLOC_MEMBIND_BIND, 0); - } else if (hwloc_get_nbobjs_inside_cpuset_by_type(ink_get_topology(), obj->cpuset, HWLOC_OBJ_NODE) > 1) { - stack = hwloc_alloc_membind_nodeset(ink_get_topology(), stacksize, nodeset, HWLOC_MEMBIND_INTERLEAVE, 0); - } else { - stack = NULL; - } + hwloc_cpuset_to_nodeset(ink_get_topology(), obj->cpuset, nodeset); - hwloc_bitmap_free(nodeset); + if (hwloc_get_nbobjs_inside_cpuset_by_type(ink_get_topology(), obj->cpuset, HWLOC_OBJ_NODE) == 1) { + stack = hwloc_alloc_membind_nodeset(ink_get_topology(), stacksize, nodeset, HWLOC_MEMBIND_BIND, 0); + } else if (hwloc_get_nbobjs_inside_cpuset_by_type(ink_get_topology(), obj->cpuset, HWLOC_OBJ_NODE) > 1) { + stack = hwloc_alloc_membind_nodeset(ink_get_topology(), stacksize, nodeset, HWLOC_MEMBIND_INTERLEAVE, 0); + } else { + stack = NULL; } -#endif // TS_USE_HWLOC - tid = all_ethreads[i]->start(thr_name, stacksize, NULL, stack); - } else { - // We should stop using this thread like this and create a new one and have the master thread just join all these and block - // indefinitely. - tid = ink_thread_self(); + + hwloc_bitmap_free(nodeset); } +#endif // TS_USE_HWLOC + tid = all_ethreads[i]->start(thr_name, stacksize, NULL, stack); #if TS_USE_HWLOC if (obj_count > 0) { diff --git a/iocore/eventsystem/test_Buffer.cc b/iocore/eventsystem/test_Buffer.cc index 165b09082e2..befee33d4ee 100644 --- a/iocore/eventsystem/test_Buffer.cc +++ b/iocore/eventsystem/test_Buffer.cc @@ -42,6 +42,9 @@ main(int /* argc ATS_UNUSED */, const char * /* argv ATS_UNUSED */ []) ink_event_system_init(EVENT_SYSTEM_MODULE_VERSION); eventProcessor.start(TEST_THREADS); + Thread *main_thread = new EThread; + main_thread->set_specific(); + for (unsigned i = 0; i < 100; ++i) { MIOBuffer *b1 = new_MIOBuffer(default_large_iobuffer_size); IOBufferReader *b1reader ATS_UNUSED = b1->alloc_reader(); @@ -58,6 +61,4 @@ main(int /* argc ATS_UNUSED */, const char * /* argv ATS_UNUSED */ []) } exit(0); - this_thread()->execute(); - return 0; } diff --git a/iocore/eventsystem/test_Event.cc b/iocore/eventsystem/test_Event.cc index 5fe80bf76d1..ef53b82365f 100644 --- a/iocore/eventsystem/test_Event.cc +++ b/iocore/eventsystem/test_Event.cc @@ -73,6 +73,8 @@ main(int /* argc ATS_UNUSED */, const char * /* argv ATS_UNUSED */ []) process_killer *killer = new process_killer(new_ProxyMutex()); eventProcessor.schedule_in(killer, HRTIME_SECONDS(10)); eventProcessor.schedule_every(alrm, HRTIME_SECONDS(1)); - this_thread()->execute(); + while (!shutdown_event_system) { + sleep(1); + } return 0; } diff --git a/proxy/Main.cc b/proxy/Main.cc index 4e583204113..17c9a79ed32 100644 --- a/proxy/Main.cc +++ b/proxy/Main.cc @@ -1924,8 +1924,7 @@ main(int /* argc ATS_UNUSED */, const char **argv) reinterpret_cast(static_cast(MGMT_EVENT_STORAGE_DEVICE_CMD_OFFLINE))); pmgmt->registerMgmtCallback(MGMT_EVENT_LIFECYCLE_MESSAGE, mgmt_lifecycle_msg_callback, NULL); - // The main thread also becomes a net thread. - ink_set_thread_name("[ET_NET 0]"); + ink_set_thread_name("[TS_MAIN]"); Note("traffic server running"); @@ -1944,7 +1943,10 @@ main(int /* argc ATS_UNUSED */, const char **argv) } #endif - this_thread()->execute(); + while (!shutdown_event_system) { + sleep(1); + } + delete main_thread; } From fe7077cb7977ec1a7525cba807fe92ef2e72e89d Mon Sep 17 00:00:00 2001 From: Phil Sorber Date: Thu, 1 Sep 2016 15:55:51 -0600 Subject: [PATCH 5/5] TS-4806: Make stacks use huge pages if enabled. --- iocore/eventsystem/UnixEventProcessor.cc | 92 ++++++++++++++++++++---- 1 file changed, 78 insertions(+), 14 deletions(-) diff --git a/iocore/eventsystem/UnixEventProcessor.cc b/iocore/eventsystem/UnixEventProcessor.cc index 9ed172e50f9..6415cb17564 100644 --- a/iocore/eventsystem/UnixEventProcessor.cc +++ b/iocore/eventsystem/UnixEventProcessor.cc @@ -65,6 +65,64 @@ EventProcessor::spawn_event_threads(int n_threads, const char *et_name, size_t s return new_thread_group_id; } +static void * +alloc_stack(size_t stacksize) +{ + void *stack = NULL; + + if (ats_hugepage_enabled()) { + stack = ats_alloc_hugepage(stacksize); + } + + if (stack == NULL) { + stack = ats_memalign(ats_pagesize(), stacksize); + } + + return stack; +} + +#if TS_USE_HWLOC +static void * +alloc_numa_stack(hwloc_cpuset_t cpuset, size_t stacksize) +{ + hwloc_membind_policy_t mem_policy = HWLOC_MEMBIND_DEFAULT; + hwloc_nodeset_t nodeset = hwloc_bitmap_alloc(); + int num_nodes = 0; + void *stack = NULL; + + // Find the NUMA node set that correlates to our next thread CPU set + hwloc_cpuset_to_nodeset(ink_get_topology(), cpuset, nodeset); + // How many NUMA nodes will we be needing to allocate across? + num_nodes = hwloc_get_nbobjs_inside_cpuset_by_type(ink_get_topology(), cpuset, HWLOC_OBJ_NODE); + + if (num_nodes == 1) { + // The preferred memory policy. The thread lives in one NUMA node. + mem_policy = HWLOC_MEMBIND_BIND; + } else if (num_nodes > 1) { + // If we have mode than one NUMA node we should interleave over them. + mem_policy = HWLOC_MEMBIND_INTERLEAVE; + } + + if (mem_policy != HWLOC_MEMBIND_DEFAULT) { + // Let's temporarily set the memory binding to our destination NUMA node + hwloc_set_membind_nodeset(ink_get_topology(), nodeset, mem_policy, HWLOC_MEMBIND_THREAD); + } + + // Alloc our stack + stack = alloc_stack(stacksize); + + if (mem_policy != HWLOC_MEMBIND_DEFAULT) { + // Now let's set it back to default for this thread. + hwloc_set_membind_nodeset(ink_get_topology(), hwloc_topology_get_topology_nodeset(ink_get_topology()), HWLOC_MEMBIND_DEFAULT, + HWLOC_MEMBIND_THREAD); + } + + hwloc_bitmap_free(nodeset); + + return stack; +} +#endif // TS_USE_HWLOC + class EventProcessor eventProcessor; int @@ -93,6 +151,8 @@ EventProcessor::start(int n_event_threads, size_t stacksize) stacksize = INK_ALIGN(stacksize, ats_pagesize()); } + Debug("iocore_thread", "Thread stack size set to %zu", stacksize); + for (i = 0; i < n_event_threads; i++) { EThread *t = new EThread(REGULAR, i); all_ethreads[i] = t; @@ -137,6 +197,7 @@ EventProcessor::start(int n_event_threads, size_t stacksize) obj_name = (char *)"Machine"; } + // How many of the above `obj_type` do we have in our topology? obj_count = hwloc_get_nbobjs_by_type(ink_get_topology(), obj_type); Debug("iocore_thread", "Affinity: %d %ss: %d PU: %d", affinity, obj_name, obj_count, ink_number_of_processors()); @@ -146,8 +207,10 @@ EventProcessor::start(int n_event_threads, size_t stacksize) #if TS_USE_HWLOC if (obj_count > 0) { + // Get our `obj` instance with index based on the thread number we are on. obj = hwloc_get_obj_by_type(ink_get_topology(), obj_type, i % obj_count); #if HWLOC_API_VERSION >= 0x00010100 + // Pretty print our CPU set int cpu_mask_len = hwloc_bitmap_snprintf(NULL, 0, obj->cpuset) + 1; char *cpu_mask = (char *)alloca(cpu_mask_len); hwloc_bitmap_snprintf(cpu_mask, cpu_mask_len, obj->cpuset); @@ -158,33 +221,34 @@ EventProcessor::start(int n_event_threads, size_t stacksize) } #endif // TS_USE_HWLOC + // Name our thread snprintf(thr_name, MAX_THREAD_NAME_LENGTH, "[ET_NET %d]", i); #if TS_USE_HWLOC + // Lets create a NUMA local stack if we can if (obj_count > 0) { - hwloc_nodeset_t nodeset = hwloc_bitmap_alloc(); - - hwloc_cpuset_to_nodeset(ink_get_topology(), obj->cpuset, nodeset); - - if (hwloc_get_nbobjs_inside_cpuset_by_type(ink_get_topology(), obj->cpuset, HWLOC_OBJ_NODE) == 1) { - stack = hwloc_alloc_membind_nodeset(ink_get_topology(), stacksize, nodeset, HWLOC_MEMBIND_BIND, 0); - } else if (hwloc_get_nbobjs_inside_cpuset_by_type(ink_get_topology(), obj->cpuset, HWLOC_OBJ_NODE) > 1) { - stack = hwloc_alloc_membind_nodeset(ink_get_topology(), stacksize, nodeset, HWLOC_MEMBIND_INTERLEAVE, 0); - } else { - stack = NULL; - } - - hwloc_bitmap_free(nodeset); + stack = alloc_numa_stack(obj->cpuset, stacksize); + } else { + // Lets just alloc a stack even with no NUMA knowledge + stack = alloc_stack(stacksize); } +#else + // Lets just alloc a stack even with no NUMA knowledge + stack = alloc_stack(stacksize); #endif // TS_USE_HWLOC - tid = all_ethreads[i]->start(thr_name, stacksize, NULL, stack); + + // Start our new thread with our new stack. + tid = all_ethreads[i]->start(thr_name, stacksize, NULL, stack); + stack = NULL; #if TS_USE_HWLOC if (obj_count > 0) { + // Lets bind our new thread to it's CPU set hwloc_set_thread_cpubind(ink_get_topology(), tid, obj->cpuset, HWLOC_CPUBIND_STRICT); } else { Warning("hwloc returned an unexpected value -- CPU affinity disabled"); } #else + // Lets ignore tid if we don't link with HWLOC (void)tid; #endif // TS_USE_HWLOC }