Skip to content

Commit

Permalink
better organization of scripts (#3)
Browse files Browse the repository at this point in the history
instead of strings in the python files, it will be better
to have these stored as actual bash scripts for easier readability

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
Co-authored-by: vsoch <vsoch@users.noreply.github.com>
  • Loading branch information
vsoch and vsoch committed Jul 17, 2023
1 parent ca27af8 commit 7784a19
Show file tree
Hide file tree
Showing 6 changed files with 205 additions and 179 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ and **Merged pull requests**. Critical items to know are:
The versions coincide with releases on pip. Only major versions will be released as tags on Github.

## [0.0.x](https://github.com/converged-computing/flux-burst-compute-engine/tree/main) (0.0.x)
- better organize templates to be bash scripts for readability (0.0.12)
- add back isolated burst mode (0.0.11)
- support for main purpose, connected burst (0.0.1)
- initial skeleton release of project (0.0.0)
9 changes: 2 additions & 7 deletions fluxburst_compute_engine/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,6 @@ def generate_bursted_boot_script(self, hosts):
"""
Generate a bursted broked config.
"""
template = templates.bursting_boot_script
with open(self.params.munge_key, "rb") as fd:
content = fd.read()
bytes_string = base64.b64encode(content).decode("utf-8")
Expand All @@ -119,8 +118,7 @@ def generate_bursted_boot_script(self, hosts):
"LEAD_BROKER_ADDRESS": self.params.lead_host,
"LEAD_BROKER_PORT": str(self.params.lead_port),
}
for key, value in replace.items():
template = template.replace(key, value)
template = templates.get_script("burst_boot.sh", replace)
self.params.compute_boot_script = template

def load_encoded_curve_cert(self):
Expand All @@ -142,8 +140,6 @@ def generate_default_boot_script(self, node_count):
"""
Generate a bursted broked config.
"""
template = templates.default_boot_script

# Generate range of hosts, numbered 1-N
hostrange = "001"
if node_count > 1:
Expand All @@ -161,8 +157,7 @@ def generate_default_boot_script(self, node_count):
"NODELIST": hosts,
"CURVECERT": curve_cert,
}
for key, value in replace.items():
template = template.replace(key, value)
template = templates.get_script("default_boot.sh", replace)
self.params.compute_boot_script = template

def generate_resource_hostlist(self):
Expand Down
23 changes: 23 additions & 0 deletions fluxburst_compute_engine/templates/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Copyright 2023 Lawrence Livermore National Security, LLC and other
# HPCIC DevTools Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (MIT)

import os

import fluxburst.utils as utils

here = os.path.dirname(os.path.abspath(__file__))


def get_script(name, replace):
"""
Get a template file by name and replace a set of strings.
"""
template_file = os.path.join(here, name)
if not os.path.exists(template_file):
raise ValueError(f"{template_file} does not exist")
template = utils.read_file(template_file)
for key, value in replace.items():
template = template.replace(key, value)
return template
Original file line number Diff line number Diff line change
@@ -1,174 +1,12 @@
# Copyright 2023 Lawrence Livermore National Security, LLC and other
# HPCIC DevTools Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (MIT)
#!/bin/sh


default_boot_script = """#!/bin/sh
set -eEu -o pipefail
# This is already built into the image
fluxuser=flux
fluxuid=$(id -u $fluxuser)
# IMPORTANT - this needs to match the local cluster
fluxroot=/usr
echo "Flux username: $fluxuser"
echo "Flux install root: $fluxroot"
export fluxroot
# Prepare NFS
dnf install nfs-utils -y
mkdir -p /var/nfs/home
chown nobody:nobody /var/nfs/home
ip_addr=$(hostname -I)
echo "/var/nfs/home *(rw,no_subtree_check,no_root_squash)" >> /etc/exports
firewall-cmd --add-service={nfs,nfs3,mountd,rpc-bind} --permanent
firewall-cmd --reload
systemctl enable --now nfs-server rpcbind
# TODO we can allow custom logic here if needed
echo "$fluxuser ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
printf "$fluxuser user identifiers:\n$(id $fluxuser)\n"
export STATE_DIR=/var/lib/flux
mkdir -p ${STATE_DIR}
mkdir -p $fluxroot/etc/flux/system/conf.d
# --cores=IDS Assign cores with IDS to each rank in R, so we assign 0-(N-1) to each host
echo "flux R encode --hosts=NODELIST"
flux R encode --hosts=NODELIST --local > $fluxroot/etc/flux/system/R
printf "\n📦 Resources\n"
cat $fluxroot/etc/flux/system/R
mkdir -p /etc/flux/imp/conf.d/
cat <<EOT >> /etc/flux/imp/conf.d/imp.toml
[exec]
allowed-users = [ "$fluxuser", "root" ]
allowed-shells = [ "$fluxroot/libexec/flux/flux-shell" ]
EOT
printf "\n🦊 Independent Minister of Privilege\n"
cat /etc/flux/imp/conf.d/imp.toml
cat <<EOT >> /tmp/system.toml
[exec]
imp = "$fluxroot/libexec/flux/flux-imp"
# Allow users other than the instance owner (guests) to connect to Flux
# Optionally, root may be given "owner privileges" for convenience
[access]
allow-guest-user = true
allow-root-owner = true
# Point to shared network certificate generated flux-keygen(1).
# Define the network endpoints for Flux's tree based overlay network
# and inform Flux of the hostnames that will start flux-broker(1).
[bootstrap]
curve_cert = "$fluxroot/etc/flux/system/curve.cert"
default_port = 8050
default_bind = "tcp://eth0:%p"
default_connect = "tcp://%h:%p"
hosts = [{host="NODELIST"}]
# Speed up detection of crashed network peers (system default is around 20m)
[tbon]
tcp_user_timeout = "2m"
# Point to resource definition generated with flux-R(1).
# Uncomment to exclude nodes (e.g. mgmt, login), from eligibility to run jobs.
[resource]
path = "$fluxroot/etc/flux/system/R"
# Remove inactive jobs from the KVS after one week.
[job-manager]
inactive-age-limit = "7d"
EOT
mv /tmp/system.toml $fluxroot/etc/flux/system/conf.d/system.toml
echo "🐸 Broker Configuration"
cat $fluxroot/etc/flux/system/conf.d/system.toml
# If we are communicating via the flux uri this service needs to be started
chmod u+s $fluxroot/libexec/flux/flux-imp
chmod 4755 $fluxroot/libexec/flux/flux-imp
chmod 0644 /etc/flux/imp/conf.d/imp.toml
# sudo chown -R $fluxuser:$fluxuser $fluxroot/etc/flux/system/conf.d
cat << "PYTHON_DECODING_SCRIPT" > /tmp/convert_curve_cert.py
#!/usr/bin/env python3
import sys
import base64
string = sys.argv[1]
dest = sys.argv[2]
with open(dest, 'w') as fd:
fd.write(base64.b64decode(string).decode('utf-8'))
PYTHON_DECODING_SCRIPT
python3 /tmp/convert_curve_cert.py "CURVECERT" /tmp/curve.cert
mv /tmp/curve.cert $fluxroot/etc/flux/system/curve.cert
chmod u=r,g=,o= $fluxroot/etc/flux/system/curve.cert
chown $fluxuser:$fluxuser $fluxroot/etc/flux/system/curve.cert
# munge.key gets shipped with image, needs to be same / shared
# /usr/sbin/create-munge-key
service munge start
# The rundir needs to be created first, and owned by user flux
# Along with the state directory and curve certificate
mkdir -p /run/flux
sudo chown -R $fluxuser:$fluxuser /run/flux
# Remove group and other read
chmod o-r $fluxroot/etc/flux/system/curve.cert
chmod g-r $fluxroot/etc/flux/system/curve.cert
chown -R $fluxuid /run/flux ${STATE_DIR} $fluxroot/etc/flux/system/curve.cert
printf "\n✨ Curve certificate generated by helper pod\n"
cat $fluxroot/etc/flux/system/curve.cert
cat << "FIRST_BOOT_UNIT" > /etc/systemd/system/flux-start.service
[Unit]
Description=Flux message broker
Wants=munge.service
[Service]
Type=simple
NotifyAccess=main
TimeoutStopSec=90
KillMode=mixed
ExecStart=/usr/bin/flux start --broker-opts --config /usr/etc/flux/system/conf.d -Stbon.fanout=256 -Srundir=/run/flux -Sbroker.rc2_none -Sstatedir=/var/lib/flux -Slocal-uri=local:///run/flux/local -Stbon.connect_timeout=5s -Stbon.zmqdebug=1 -Slog-stderr-level=7 -Slog-stderr-mode=local
SyslogIdentifier=flux
Restart=always
RestartSec=5s
RestartPreventExitStatus=42
SuccessExitStatus=42
User=flux
Group=flux
PermissionsStartOnly=true
Delegate=yes
[Install]
WantedBy=multi-user.target
FIRST_BOOT_UNIT
systemctl enable flux-start.service
systemctl start flux-start.service
"""

bursting_boot_script = """#!/bin/sh
# Burst boot, requires:
# CURVECERT: base64 encoded curve certificate
# NODELIST: with complete list of nodes
# LOGLEVEL: desireed flux log level
# MUNGEKEY: bytes string for the munge key
# LEAD_BROKER_ADDRESS
# LEAD_BROKER_PORT

set -eEu -o pipefail

Expand Down Expand Up @@ -358,4 +196,3 @@

systemctl enable flux-start.service
systemctl start flux-start.service
"""

0 comments on commit 7784a19

Please sign in to comment.