Skip to content

Commit

Permalink
more WIP still not working
Browse files Browse the repository at this point in the history
Signed-off-by: vsoch <vsoch@users.noreply.github.com>
  • Loading branch information
vsoch committed Jun 19, 2023
1 parent a3de033 commit 98e23f7
Show file tree
Hide file tree
Showing 10 changed files with 375 additions and 12 deletions.
2 changes: 1 addition & 1 deletion config/manager/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ kind: Kustomization
images:
- name: controller
newName: ghcr.io/converged-computing/htcondor-operator
newTag: latest
newTag: test
8 changes: 8 additions & 0 deletions controllers/htcondor/htcondor.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,14 @@ func (r *HTCondorReconciler) getConfigMap(
data["start-manager"] = managerStart
data["start-execute"] = executeStart
data["start-submit"] = submitStart

// This will be copied into
// /root/secrets/token
//token, err := r.getToken(ctx, cluster)
//if err != nil || token == "" {
// return cm, ctrl.Result{Requeue: true}, err
//}
//data[tokenKey] = token
}

// Create the config map with respective data!
Expand Down
3 changes: 3 additions & 0 deletions controllers/htcondor/templates.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ var startSubmitTemplate string
//go:embed templates/components.sh
var startComponents string

//go:embed templates/token.sh
var tokenTemplate string

// NodeTemplate populates a node entrypoint
type NodeTemplate struct {
Node api.Node
Expand Down
84 changes: 78 additions & 6 deletions controllers/htcondor/templates/components.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
echo "Sleeping for networking..."
sleep 3

# Copy over our token, and remove newline at top
# mkdir -p /root/secrets
# tr --delete '\n' < /htcondor_operator/token > /root/secrets/token

# Initialization commands
{{ .Node.Commands.Init}} > /dev/null 2>&1

Expand All @@ -20,17 +24,35 @@ mkdir -p ${workdir}
{{end}}

{{define "config"}}

# This won't trigger to the right set of conditions in
# /etc/condor/config.d/01-security.conf if we set to yes
# with yes, the basic cluster works, but parallel universe does not
export USE_POOL_PASSWORD=yes

# Shared logic to write a config across nodes
echo "NEGOTIATOR_INTERVAL=10" >> /etc/condor/condor_config.local
# echo "SEC_PASSWORD_FILE = $(LOCAL_DIR)/lib/condor/pool_password" >> /etc/condor/condor_config.local

# Generate password
mkdir -p /root/secrets
# chmod 0700 /root/secrets
# note that this container setup seems to already be using a USE_POOL_PASSWORD with host auth
# These don't seem to work for this setup
# echo "use security:host_based" >> /etc/condor/config.d/00-insecure.config

# The top one is shown for docker-compose, the second in the config example
mkdir -p /root/secrets
condor_store_cred -p {{.Spec.Config.Password}} -f /root/secrets/pool_password
# condor_store_cred -p {{.Spec.Config.Password}} -f /var/lib/condor/pool_password

# Try updating to allow TOKEN and PASSWORD
echo "SEC_DEFAULT_AUTHENTICATION_METHODS = FS, PASSWORD, TOKEN, IDTOKENS" >> /etc/condor/config.d/01-security.conf
echo "ALLOW_WRITE = *" >> /etc/condor/config.d/01-security.conf

# Austin DANGER POWERS!
echo 'ALLOW_ADVERTISE_STARTD = condor_pool@*/* $(ALLOW_ADVERTISE_STARTD)' >> /etc/condor/config.d/01-security.conf
echo 'ALLOW_ADVERTISE_SCHEDD = condor_pool@*/* $(ALLOW_ADVERTISE_SCHEDD)' >> /etc/condor/config.d/01-security.conf
echo 'ALLOW_ADVERTISE_MASTER = condor_pool@*/* $(ALLOW_ADVERTISE_MASTER)' >> /etc/condor/config.d/01-security.conf

# ALLOW_WRITE = *

# TODO this should be actual cpus, not nodes
export NUM_CPUS={{.Spec.Size}}
{{end}}
Expand All @@ -40,11 +62,61 @@ export NUM_CPUS={{.Spec.Size}}
{{ end }}

{{define "condor-host"}}

export USE_POOL_PASSWORD=yes
export CONDOR_HOST={{ .ClusterName }}-manager-0-0.{{ .Spec.ServiceName }}.{{ .Namespace }}.svc.cluster.local
# export CONDOR_SERVICE_HOST=${CONDOR_HOST}
{{ end }}

{{define "approve-tokens"}}
# This snippet will always approve token requests, if needed
# Install jq for now - a hack to approve token requests
yum install -y jq

# Ideally we can provide this via a config or the condor_token_request_auto_approve that takes a hostname
while true
do
for requestid in $(condor_token_request_list -json | jq -r .[].RequestId); do
echo "yes" | condor_token_request_approve -reqid ${requestid}
done
sleep 15
done
{{end}}

{{define "security-config"}}
tee -a /etc/condor/config.d/01-security.conf <<EOF
# Require authentication and integrity checking by default.
use SECURITY : With_Authentication
# Host-based security is fine in a container environment, especially if
# we're also using a pool password or a token.
use SECURITY : Host_Based
# We also want root to be able to do reconfigs, restarts, etc.
ALLOW_ADMINISTRATOR = root@$(FULL_HOSTNAME) condor@$(FULL_HOSTNAME) $(ALLOW_ADMINISTRATOR)
# SEC_DEFAULT_AUTHENTICATION_METHODS = FS, PASSWORD, TOKEN
# ALLOW_ADVERTISE_STARTD = condor_pool@*/* $(ALLOW_ADVERTISE_STARTD)
# ALLOW_ADVERTISE_SCHEDD = condor_pool@*/* $(ALLOW_ADVERTISE_SCHEDD)
# ALLOW_ADVERTISE_MASTER = condor_pool@*/* $(ALLOW_ADVERTISE_MASTER)
# Allow public reads and writes
ALLOW_READ = *
ALLOW_WRITE = *
SEC_READ_AUTHENTICATION = OPTIONAL
ALLOW_ADVERTISE_MASTER = \
$(ALLOW_ADVERTISE_MASTER) \
$(ALLOW_WRITE_COLLECTOR) \
dockerworker@example.net
ALLOW_ADVERTISE_STARTD = \
$(ALLOW_ADVERTISE_STARTD) \
$(ALLOW_WRITE_COLLECTOR) \
dockerworker@example.net
ALLOW_ADVERTISE_SCHEDD = \
$(ALLOW_ADVERTISE_STARTD) \
$(ALLOW_WRITE_COLLECTOR) \
dockersubmit@example.net
EOF
{{end}}
# Ohno, not DNS again!
# 06/18/23 22:49:02 WARNING: Saw slow DNS query, which may impact entire system: getaddrinfo(htcondor-sample-manager-0-0.htc-service.htcondor-operator.svc.cluster.local) took 3.918414 seconds.
6 changes: 6 additions & 0 deletions controllers/htcondor/templates/execute.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ echo "Hello, I am an execute note with $(hostname)"
# Environment variables specific to submit
{{template "condor-host" . }}

# Target nodes for dedicated scheduler (allows for parallel universe for MPI)
# The variable CONDOR_HOST is set in the snippet above
# https://htcondor.readthedocs.io/en/latest/admin-manual/setting-up-special-environments.html#configuration-examples-for-dedicated-resources
echo "DedicatedScheduler = \"DedicatedScheduler@${CONDOR_HOST}\"" >> /etc/condor/condor_config.local
echo "STARTD_ATTRS = $(STARTD_ATTRS), DedicatedScheduler" >> /etc/condor/condor_config.local

# https://github.com/htcondor/htcondor/blob/main/build/docker/services/base/start.sh
exec bash -x /start.sh

Expand Down
5 changes: 1 addition & 4 deletions controllers/htcondor/templates/manager.sh
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
#!/bin/sh

echo "Hello, I am a server with $(hostname)"
echo "Hello, I am a config manager node with $(hostname)"

# This script handles shared start logic
{{template "init" .}}

{{template "config" .}}

export USE_POOL_PASSWORD=yes
condor_store_cred -p password -f /htcondor_operator/pool_password

# See https://github.com/htcondor/htcondor/blob/main/build/docker/services/base/start.sh
exec bash -x /start.sh

Expand Down
21 changes: 21 additions & 0 deletions controllers/htcondor/templates/token.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/sh

# Start the server, and keep trying to generate token until it works

export USE_POOL_PASSWORD=yes
condor_store_cred -p {{.Spec.Config.Password}} -f /root/secrets/pool_password
exec bash -x /start.sh &

# Create a token for the execute nodes.
# And this will bind to /htcondor_operator and be copied to /root/secrets/token
retval=1
while [ ${retval} -ne 0 ]
do
echo "Waiting for cluster to become ready to generate token...";
condor_token_create -authz ADVERTISE_MASTER -authz ADVERTISE_STARTD -authz READ -identity dockerworker@example.net -token dockerworker_token
retval=$?
sleep 2
done
echo "HTCondor properly configured"
echo "CUT HERE"
cat /etc/condor/tokens.d/dockerworker_token

0 comments on commit 98e23f7

Please sign in to comment.