Skip to content

Commit

Permalink
workbench: allow custom supervisord.conf and only the needed Nomad te…
Browse files Browse the repository at this point in the history
…mplates
  • Loading branch information
fmaste committed May 30, 2023
1 parent 18a7312 commit 632c855
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 94 deletions.
122 changes: 66 additions & 56 deletions nix/workbench/backend/nomad-job.nix
Expand Up @@ -3,12 +3,11 @@
# clusters and SRE infrastructure used for long-running cloud benchmarks. Why?
# To make it easier to improve and debug the almighty workbench!
################################################################################
{ lib
{ pkgs
, lib
, stateDir
, profileData
, containerSpecs
# Needs unix_http_server.file
, supervisorConf
, execTaskDriver
, oneTracerPerNode ? false
}:
Expand Down Expand Up @@ -58,7 +57,8 @@ let
# the container I get (from journald):
# Nov 02 11:44:36 hostname cluster-18f3852f-e067-6394-8159-66a7b8da2ecc[1088457]: Error: Cannot open an HTTP server: socket.error reported -2
# Nov 02 11:44:36 hostname cluster-18f3852f-e067-6394-8159-66a7b8da2ecc[1088457]: For help, use /nix/store/izqhlj5i1x9ldyn43d02kcy4mafmj3ci-python3.9-supervisor-4.2.4/bin/supervisord -h
task_supervisord_url = "unix://${supervisorConf.value.unix_http_server.file}";
unixHttpServerPort = "/tmp/supervisor.sock";
task_supervisord_url = "unix://${unixHttpServerPort}";
# Location of the supervisord config file inside the container.
# This file can be mounted as a volume or created as a template.
task_supervisord_conf = "${task_statedir}/supervisor/supervisord.conf";
Expand Down Expand Up @@ -452,8 +452,21 @@ let
{
env = false;
destination = "${task_supervisord_conf}";
data = escapeTemplate (__readFile
supervisorConf.INI);
data = escapeTemplate (__readFile (
let supervisorConf = import ./supervisor-conf.nix
{ inherit pkgs lib stateDir;
# Include only this taks' node
nodeSpecs = if taskName == "tracer"
then {}
else {"${nodeSpec.name}"=nodeSpec;}
;
# Only for the tracer task or also nodes if oneTracerPerNode
withTracer = oneTracerPerNode || taskName == "tracer";
# ''{{ env "NOMAD_TASK_DIR" }}/supervisor.sock''
inherit unixHttpServerPort;
};
in supervisorConf.INI
));
change_mode = "noop";
error_on_missing_key = true;
}
Expand Down Expand Up @@ -521,54 +534,51 @@ let
}
])
++
# Node(s)
(lib.lists.flatten (lib.mapAttrsToList
(_: nodeSpec: [
## Node start.sh script.
{
env = false;
destination = "${task_statedir}/${nodeSpec.name}/start.sh";
data = escapeTemplate (
let scriptValue = profileData.node-services."${nodeSpec.name}".startupScript.value;
in if execTaskDriver
then (startScriptToGoTemplate
nodeSpec.name
("perf-" + nodeSpec.name)
("node" + (toString nodeSpec.i))
nodeSpec
scriptValue
)
else scriptValue
);
change_mode = "noop";
error_on_missing_key = true;
perms = "744"; # Only for every "start.sh" script. Default: "644"
}
## Node configuration file.
{
env = false;
destination = "${task_statedir}/${nodeSpec.name}/config.json";
data = escapeTemplate (lib.generators.toJSON {}
profileData.node-services."${nodeSpec.name}".nodeConfig.value);
change_mode = "noop";
error_on_missing_key = true;
}
## Node topology file.
{
env = false;
destination = "${task_statedir}/${nodeSpec.name}/topology.json";
data = escapeTemplate (
let topology = profileData.node-services."${nodeSpec.name}".topology;
in if execTaskDriver
then (topologyToGoTemplate topology.value)
else (__readFile topology.JSON )
);
change_mode = "noop";
error_on_missing_key = true;
}
])
profileData.node-specs.value
))
# Node
(lib.optionals (taskName != "tracer") [
## Node start.sh script.
{
env = false;
destination = "${task_statedir}/${nodeSpec.name}/start.sh";
data = escapeTemplate (
let scriptValue = profileData.node-services."${nodeSpec.name}".startupScript.value;
in if execTaskDriver
then (startScriptToGoTemplate
taskName # taskName
serviceName # serviceName
portName # portName (can't have "-")
nodeSpec # nodeSpec
scriptValue # startScript
)
else scriptValue
);
change_mode = "noop";
error_on_missing_key = true;
perms = "744"; # Only for every "start.sh" script. Default: "644"
}
## Node configuration file.
{
env = false;
destination = "${task_statedir}/${nodeSpec.name}/config.json";
data = escapeTemplate (lib.generators.toJSON {}
profileData.node-services."${nodeSpec.name}".nodeConfig.value);
change_mode = "noop";
error_on_missing_key = true;
}
## Node topology file.
{
env = false;
destination = "${task_statedir}/${nodeSpec.name}/topology.json";
data = escapeTemplate (
let topology = profileData.node-services."${nodeSpec.name}".topology;
in if execTaskDriver
then (topologyToGoTemplate topology.value)
else (__readFile topology.JSON )
);
change_mode = "noop";
error_on_missing_key = true;
}
])
;

# Specifies logging configuration for the stdout and stderr of the
Expand Down Expand Up @@ -687,7 +697,7 @@ let
"tracer" # portName (can't have "-")
0 # portNum
# TODO: Which region?
{region=null;}; # node-specs
{region=null;}; # node-spec
}
]
++
Expand All @@ -706,7 +716,7 @@ let
("perf-node-" + (toString nodeSpec.i)) # serviceName
("node" + (toString nodeSpec.i)) # portName (can't have "-")
nodeSpec.port # portNum
nodeSpec; # node-specs
nodeSpec; # node-spec
})
(profileData.node-specs.value)
)
Expand Down
19 changes: 4 additions & 15 deletions nix/workbench/backend/nomad.nix
Expand Up @@ -15,13 +15,6 @@ let
materialise-profile =
{ profileData }:
let
supervisorConf = import ./supervisor-conf.nix
{ inherit profileData;
inherit pkgs lib stateDir;
# ''{{ env "NOMAD_TASK_DIR" }}/supervisor.sock''
unixHttpServerPort = "/tmp/supervisor.sock";
}
;
# Intermediate / workbench-adhoc container specifications
containerSpecs = rec {
#
Expand Down Expand Up @@ -120,19 +113,17 @@ let
podman = {
# TODO: oneTracerPerGroup
oneTracerPerCluster = import ./nomad-job.nix
{ inherit lib stateDir;
{ inherit pkgs lib stateDir;
inherit profileData;
inherit containerSpecs;
inherit supervisorConf;
# May evolve to a "cloud" flag!
execTaskDriver = false;
oneTracerPerNode = false;
};
oneTracerPerNode = import ./nomad-job.nix
{ inherit lib stateDir;
{ inherit pkgs lib stateDir;
inherit profileData;
inherit containerSpecs;
inherit supervisorConf;
# May evolve to a "cloud" flag!
execTaskDriver = false;
oneTracerPerNode = true;
Expand All @@ -141,19 +132,17 @@ let
exec = {
# TODO: oneTracerPerGroup
oneTracerPerCluster = import ./nomad-job.nix
{ inherit lib stateDir;
{ inherit pkgs lib stateDir;
inherit profileData;
inherit containerSpecs;
inherit supervisorConf;
# May evolve to a "cloud" flag!
execTaskDriver = true;
oneTracerPerNode = false;
};
oneTracerPerNode = import ./nomad-job.nix
{ inherit lib stateDir;
{ inherit pkgs lib stateDir;
inherit profileData;
inherit containerSpecs;
inherit supervisorConf;
# May evolve to a "cloud" flag!
execTaskDriver = true;
oneTracerPerNode = true;
Expand Down
41 changes: 20 additions & 21 deletions nix/workbench/backend/supervisor-conf.nix
@@ -1,7 +1,8 @@
{ pkgs
, lib
, stateDir
, profileData
, nodeSpecs
, withTracer
, unixHttpServerPort ? null
, inetHttpServerPort ? null
}:
Expand Down Expand Up @@ -63,7 +64,7 @@ let
};
}
//
lib.attrsets.optionalAttrs (profileData.value.node.tracer)
lib.attrsets.optionalAttrs withTracer
{
"program:tracer" = {
# "command" below assumes "directory" is set accordingly.
Expand All @@ -82,25 +83,23 @@ let
};
}
//
listToAttrs
(flip mapAttrsToList profileData.node-services
(_: { nodeSpec, service, ... }:
nameValuePair "program:${nodeSpec.value.name}" {
# "command" below assumes "directory" is set accordingly.
directory = "${stateDir}/${nodeSpec.value.name}";
command = "${command}";
stdout_logfile = "${stateDir}/${nodeSpec.value.name}/stdout";
stderr_logfile = "${stateDir}/${nodeSpec.value.name}/stderr";
stopasgroup = false;
killasgroup = false;
autostart = false;
autorestart = false;
# Don't attempt any restart!
startretries = 0;
# Seconds it needs to stay running to consider the start successful
startsecs = 5;
})
)
(builtins.listToAttrs (lib.mapAttrsToList (nodeName: nodeSpec:
lib.attrsets.nameValuePair "program:${nodeName}" {
# "command" below assumes "directory" is set accordingly.
directory = "${stateDir}/${nodeName}";
command = "${command}";
stdout_logfile = "${stateDir}/${nodeName}/stdout";
stderr_logfile = "${stateDir}/${nodeName}/stderr";
stopasgroup = false;
killasgroup = false;
autostart = false;
autorestart = false;
# Don't attempt any restart!
startretries = 0;
# Seconds it needs to stay running to consider the start successful
startsecs = 5;
})
nodeSpecs))
##
## [unix_http_server] Section Settings
##
Expand Down
6 changes: 4 additions & 2 deletions nix/workbench/backend/supervisor.nix
Expand Up @@ -38,8 +38,10 @@ let
materialise-profile =
{ profileData }:
let supervisorConf = import ./supervisor-conf.nix
{ inherit profileData;
inherit pkgs lib stateDir;
{ inherit pkgs lib stateDir;
# Create a `supervisord.conf`
nodeSpecs = profileData.node-specs.value;
withTracer = profileData.value.node.tracer;
inetHttpServerPort = "127.0.0.1:9001";
};
in pkgs.runCommand "workbench-backend-output-${profileData.profileName}-supervisor"
Expand Down

0 comments on commit 632c855

Please sign in to comment.