This repository has been archived by the owner on Sep 26, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
compute-3-slurm_client.sh
73 lines (62 loc) · 1.99 KB
/
compute-3-slurm_client.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/bin/bash
##############################################################
# Setup slurm client
##############################################################
# Setup users
export MUNGEUSER=991
groupadd -g $MUNGEUSER munge
useradd -m -c "MUNGE Uid 'N' Gid Emporium" -d /var/lib/munge -u $MUNGEUSER -g munge -s /sbin/nologin munge
export SLURMUSER=992
groupadd -g $SLURMUSER slurm
useradd -m -c "SLURM workload manager" -d /var/lib/slurm -u $SLURMUSER -g slurm -s /bin/bash slurm
# Install munge
yum -y install epel-release
yum -y install munge munge-libs munge-devel
# Create a munge key
echo "THIS_IS_NOT_A_VERY_SECURE_MUNGE_KEY_BUT_IT_WILL_DO" > /etc/munge/munge.key
chown munge.munge /etc/munge/munge.key
chmod 400 /etc/munge/munge.key
# Correct perms & enable service
chown -R munge: /etc/munge/ /var/log/munge/
chmod 0700 /etc/munge/ /var/log/munge/
systemctl enable munge
systemctl start munge
# Install slurm RPMs (built on master)
yum -y install /nfs/slurm-rpms/slurm-*.rpm
# Minimal slurm config
cat <<EOF >> /etc/slurm/slurm.conf
ControlMachine=master
ControlAddr=10.0.4.100
MpiDefault=none
ProctrackType=proctrack/pgid
ReturnToService=1
SlurmctldPidFile=/var/run/slurmctld.pid
SlurmdPidFile=/var/run/slurmd.pid
SlurmdSpoolDir=/var/spool/slurmd
SlurmUser=slurm
StateSaveLocation=/var/spool/slurmctld
SwitchType=switch/none
TaskPlugin=task/none
FastSchedule=1
SchedulerType=sched/backfill
SelectType=select/linear
AccountingStorageType=accounting_storage/none
ClusterName=dev
JobAcctGatherType=jobacct_gather/none
SlurmctldLogFile=/var/log/slurmctld.log
SlurmdLogFile=/var/log/slurmd.log
#
# COMPUTE NODES
NodeName=compute01 NodeAddr=10.0.4.101 CPUs=1 State=UNKNOWN
NodeName=compute02 NodeAddr=10.0.4.102 CPUs=1 State=UNKNOWN
PartitionName=super Nodes=compute0[1-2] Default=YES MaxTime=INFINITE State=UP
EOF
# Start slurm
systemctl enable slurmd
systemctl start slurmd
# Check sinfo works
sinfo
# Mark node ready to rock
if ! grep "idle $(hostname)"; then
scontrol update nodename=$(hostname) State=resume
fi