Skip to content

Commit

Permalink
Merge pull request #3168 from garlick/rc_allranks
Browse files Browse the repository at this point in the history
broker:  allow late-joining brokers, and execute rc1/rc3 on all broker ranks
  • Loading branch information
mergify[bot] committed Sep 9, 2020
2 parents 8b84b47 + 60b1c22 commit 284ec80
Show file tree
Hide file tree
Showing 46 changed files with 1,529 additions and 2,041 deletions.
4 changes: 0 additions & 4 deletions doc/man1/flux-broker.rst
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,6 @@ OPTIONS
Set the security mode. The mode may be *none*, *plain*, or *curve*
(default: curve). See flux-keygen(1) for more information.

**-g, --shutdown-grace**\ =\ *SECS*
Specify the shutdown grace period, in seconds (default: guess based
on session size).


RESOURCES
=========
Expand Down
11 changes: 8 additions & 3 deletions doc/man7/flux-broker-attributes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,15 @@ log-filename
(rank zero only) If set, session log entries, as filtered by log-forward-level,
are directed to this file.

log-stderr-mode
If set to "leader" (default), broker rank 0 emits forwarded logs from
other ranks to stderr, subject to the constraints of log-forward-level
and log-stderr-level. If set to "local", each broker emits its own
logs to stderr, subject to the constraints of log-stderr-level.

log-stderr-level
(rank zero only) Session log entries at syslog(3) level at or below this
value, and as filtered by log-forward-level, are copied to stderr of the
rank zero broker.
Log entries at syslog(3) level at or below this value to stderr,
subject to log-stderr-mode.

log-level
Log entries at syslog(3) level at or below this value are stored
Expand Down
6 changes: 5 additions & 1 deletion etc/flux.service.in
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,13 @@ ExecStart=@X_BINDIR@/flux broker \
-Srundir=@X_RUNSTATEDIR@/flux \
-Slocal-uri=local://@X_RUNSTATEDIR@/flux/local \
-Slog-stderr-level=6 \
-Slog-stderr-mode=local \
-Scontent.backing-path=@X_LOCALSTATEDIR@/lib/flux/content.sqlite \
-Sbroker.rc2_none
-Sbroker.rc2_none \
-Sbroker.quorum=0
ExecReload=@X_BINDIR@/flux config reload
Restart=on-success
RestartSec=5s
User=flux
Group=flux
RuntimeDirectory=flux
Expand Down
59 changes: 24 additions & 35 deletions etc/rc1
Original file line number Diff line number Diff line change
@@ -1,46 +1,34 @@
#!/bin/bash -e

# Usage: wait_check pid [pid ...]
wait_check() {
local pid
for pid in $*; do wait $pid; done
}

# Allow connector-local more time to start listening on socket in rc1 only
export FLUX_LOCAL_CONNECTOR_RETRY_COUNT=10
# Allow connector-local more time to start listening on socket
FLUX_LOCAL_CONNECTOR_RETRY_COUNT=30 RANK=$(flux getattr rank)

if ! content_backing=$(flux getattr content.backing-module 2>/dev/null); then
content_backing=content-sqlite
fi

declare -a pids
flux exec -r all flux module load barrier & pids+=($!)
flux module load ${content_backing} & pids+=($!)
flux exec -r all flux module load aggregator & pids+=($!)
wait_check ${pids[@]}
unset pids
# Usage: modload {all|<rank>} modname [args ...]
modload() {
local where=$1; shift
if test "$where" = "all" || test $where -eq $RANK; then
flux module load $*
fi
}

modload all barrier
modload 0 ${content_backing}
modload all aggregator

declare -a pids
flux module load kvs
flux exec -r all -x 0 flux module load kvs & pids+=($!)
flux exec -r all flux module load kvs-watch & pids+=($!)
wait_check ${pids[@]}
unset pids
modload all kvs
modload all kvs-watch

declare -a pids
flux module load resource
flux module load job-info & pids+=($!)
flux module load cron sync=hb & pids+=($!)
flux module load job-manager & pids+=($!)
wait_check ${pids[@]}
unset pids
modload 0 resource
modload 0 job-info
modload 0 cron sync=hb
modload 0 job-manager

declare -a pids
flux module load job-ingest
flux exec -r all -x 0 flux module load job-ingest & pids+=($!)
flux module load job-exec & pids+=($!)
wait_check ${pids[@]}
unset pids
modload all job-ingest
modload 0 job-exec

core_dir=$(cd ${0%/*} && pwd -P)
all_dirs=$core_dir${FLUX_RC_EXTRA:+":$FLUX_RC_EXTRA"}
Expand All @@ -59,11 +47,12 @@ lookup_sched_module() {
flux module list | awk '$6 == "sched" { print $1 }'
}

if test "${FLUX_SCHED_MODULE}" != "none" -a -z "$(lookup_sched_module)"; then
if test $RANK -eq 0 -a "${FLUX_SCHED_MODULE}" != "none" \
-a -z "$(lookup_sched_module)"; then
flux module load ${FLUX_SCHED_MODULE:-sched-simple}
fi

flux admin cleanup-push <<-EOT
test $RANK -ne 0 || flux admin cleanup-push <<-EOT
flux queue stop --quiet
flux job cancelall --user=all --quiet -f --states RUN
flux queue idle --quiet
Expand Down
3 changes: 2 additions & 1 deletion etc/rc1.d/01-enclosing-instance
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

# Inform the enclosing instance (if any) of the URI's for this instance

if parent_uri=$(flux getattr parent-uri 2>/dev/null) \
if test $(flux getattr rank) -eq 0 \
&& parent_uri=$(flux getattr parent-uri 2>/dev/null) \
&& parent_ns=$(flux getattr parent-kvs-namespace 2>/dev/null); then
key_prefix=flux
local_uri=${FLUX_URI}
Expand Down
35 changes: 22 additions & 13 deletions etc/rc3
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
#!/bin/bash

RANK=$(flux getattr rank)

# Usage: modrm {all|<rank>} modname
modrm() {
local where=$1; shift
if test "$where" = "all" || test $where -eq $RANK; then
flux module remove -f $*
fi
}

core_dir=$(cd ${0%/*} && pwd -P)
all_dirs=$core_dir${FLUX_RC_EXTRA:+":$FLUX_RC_EXTRA"}
IFS=:
Expand All @@ -12,22 +22,21 @@ for rcdir in $all_dirs; do
done
shopt -u nullglob

flux module remove -f sched-simple
flux module remove -f resource
flux module remove -f job-exec
flux module remove -f job-manager
flux exec -r all flux module remove -f job-ingest
modrm 0 sched-simple
modrm 0 resource
modrm 0 job-exec
modrm 0 job-manager
modrm all job-ingest

flux module remove -f cron
flux exec -r all flux module remove -f aggregator
flux exec -r all flux module remove -f barrier
modrm 0 cron
modrm all aggregator
modrm all barrier

flux module remove -f job-info
flux exec -r all flux module remove -f kvs-watch
flux exec -r all -x 0 flux module remove -f kvs
modrm 0 job-info
modrm all kvs-watch
modrm all kvs

flux module remove -f kvs
flux content flush

backingmod=$(flux getattr content.backing-module 2>/dev/null) || true
flux module remove -f ${backingmod:-content-sqlite}
modrm 0 ${backingmod:-content-sqlite}
18 changes: 0 additions & 18 deletions src/broker/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,6 @@ libbroker_la_SOURCES = \
heartbeat.c \
service.h \
service.c \
hello.h \
hello.c \
reduce.h \
reduce.c \
shutdown.h \
shutdown.c \
attr.h \
attr.c \
log.h \
Expand Down Expand Up @@ -77,10 +71,8 @@ flux_broker_LDADD = \
flux_broker_LDFLAGS =

TESTS = test_heartbeat.t \
test_hello.t \
test_attr.t \
test_service.t \
test_reduce.t \
test_liblist.t \
test_pmiutil.t \
test_boot_config.t \
Expand Down Expand Up @@ -113,11 +105,6 @@ test_heartbeat_t_CPPFLAGS = $(test_cppflags)
test_heartbeat_t_LDADD = $(test_ldadd)
test_heartbeat_t_LDFLAGS = $(test_ldflags)

test_hello_t_SOURCES = test/hello.c
test_hello_t_CPPFLAGS = $(test_cppflags)
test_hello_t_LDADD = $(test_ldadd)
test_hello_t_LDFLAGS = $(test_ldflags)

test_attr_t_SOURCES = test/attr.c
test_attr_t_CPPFLAGS = $(test_cppflags)
test_attr_t_LDADD = $(test_ldadd)
Expand All @@ -128,11 +115,6 @@ test_service_t_CPPFLAGS = $(test_cppflags)
test_service_t_LDADD = $(test_ldadd)
test_service_t_LDFLAGS = $(test_ldflags)

test_reduce_t_SOURCES = test/reduce.c
test_reduce_t_CPPFLAGS = $(test_cppflags)
test_reduce_t_LDADD = $(test_ldadd)
test_reduce_t_LDFLAGS = $(test_ldflags)

test_liblist_t_SOURCES = test/liblist.c
test_liblist_t_CPPFLAGS = $(test_cppflags)
test_liblist_t_LDADD = $(test_ldadd)
Expand Down

0 comments on commit 284ec80

Please sign in to comment.