Skip to content

Commit

Permalink
Merge branch 'release/1.4.0' into production
Browse files Browse the repository at this point in the history
  • Loading branch information
Sebastien Varrette authored and Sebastien Varrette committed Mar 30, 2021
2 parents 97cbbd6 + 4b7a6c8 commit 20cf269
Show file tree
Hide file tree
Showing 12 changed files with 503 additions and 58 deletions.
2 changes: 1 addition & 1 deletion README.md
Expand Up @@ -6,7 +6,7 @@

Configure and manage [Slurm](https://slurm.schedmd.com/): A Highly Scalable Resource Manager

Copyright (c) 2017-2020 UL HPC Team <hpc-sysadmins@uni.lu>
Copyright (c) 2017-2021 UL HPC Team <hpc-sysadmins@uni.lu>
. see also http://hpc.uni.lu

## Overview
Expand Down
1 change: 0 additions & 1 deletion docs/overview.md

This file was deleted.

425 changes: 425 additions & 0 deletions docs/overview.md

Large diffs are not rendered by default.

11 changes: 4 additions & 7 deletions manifests/init.pp
Expand Up @@ -135,8 +135,6 @@
# @param enforcepartlimits [String] Default: 'ALL'
# @param epilog [String] Default: ''
# @param epilogslurmctld [String] Default: ''
# @param fastschedule [Integer] Default: 1
# Elligible values in [ 0, 1, 2]
# @param grestypes [Array] Default: []
# list of generic resources to be managed
# @param healthcheckinterval [Integer] Default: 30
Expand Down Expand Up @@ -280,9 +278,9 @@
# @param switchtype [String ] Default: 'none'
# Elligible values in ['nrt', 'none']
# @param taskepilog [String ] Default: ''
# @param taskplugin [String ] Default: 'cgroup'
# @param taskplugin [String or Array] Default: ['affinity','cgroup']
# Elligible values in ['affinity', 'cgroup','none']
# @param taskpluginparams [Array ] Default: ['cpusets']
# @param taskpluginparams [Array ] Default: []
# @param taskprolog [String ] Default: ''
# @param tmpfs [String ] Default: '/tmp'
# @param waittime [Integer] Default: 0
Expand Down Expand Up @@ -374,7 +372,7 @@
# lower bound (in MB) on the memory limits defined by AllowedKmemSpace.
# @param cgroup_minramspace [String] Default: '30M'
# lower bound (in MB) on the memory limits defined by AllowedRAMSpace & AllowedSwapSpace.
# @param cgroup_taskaffinity [Boolean] Default: true
# @param cgroup_taskaffinity [Boolean] Default: false
# This feature requires the Portable Hardware Locality (hwloc) library
#
############################ ####################################
Expand Down Expand Up @@ -558,7 +556,6 @@
String $enforcepartlimits = $slurm::params::enforcepartlimits,
String $epilog = $slurm::params::epilog,
String $epilogslurmctld = $slurm::params::epilogslurmctld,
Integer $fastschedule = $slurm::params::fastschedule,
Integer $getenvtimeout = $slurm::params::getenvtimeout,
Array $grestypes = $slurm::params::grestypes,
Integer $healthcheckinterval = $slurm::params::healthcheckinterval,
Expand Down Expand Up @@ -651,7 +648,7 @@
String $srunprolog = $slurm::params::srunprolog,
String $switchtype = $slurm::params::switchtype,
String $taskepilog = $slurm::params::taskepilog,
String $taskplugin = $slurm::params::taskplugin,
$taskplugin = $slurm::params::taskplugin,
Array $taskpluginparams = $slurm::params::taskpluginparams,
String $taskprolog = $slurm::params::taskprolog,
String $tmpfs = $slurm::params::tmpfs,
Expand Down
28 changes: 20 additions & 8 deletions manifests/pam.pp
Expand Up @@ -98,24 +98,36 @@

# PAM limits
# Ex: Update PAM MEMLOCK limits (required for MPI) + nproc
include ::ulimit
include ::limits

if $ulimits_source != undef {
ulimit::rule { 'slurm':
file { "${limits::limits_dir}/50_slurm.conf":
ensure => $ensure,
owner => 'root',
group => 'root',
mode => '0644',
source => $ulimits_source,

}
# ulimit::rule { 'slurm':
# ensure => $ensure,
# source => $ulimits_source,
# }
}
else
{
$ulimits.each |String $item, $value| {
ulimit::rule { "slurm-${item}":
ensure => $ensure,
ulimit_domain => '*',
ulimit_type => [ 'soft', 'hard' ],
ulimit_item => $item,
ulimit_value => $value,
limits::limits { "*/${item}":
ensure => $ensure,
both => $value
}
# ulimit::rule { "slurm-${item}":
# ensure => $ensure,
# ulimit_domain => '*',
# ulimit_type => [ 'soft', 'hard' ],
# ulimit_item => $item,
# ulimit_value => $value,
# }
}
}
}
18 changes: 9 additions & 9 deletions manifests/params.pp
Expand Up @@ -38,6 +38,7 @@
'libX11-devel',
'libssh2-devel',
'libevent-devel',
'python3'
],
default => []
}
Expand Down Expand Up @@ -170,7 +171,6 @@
$enforcepartlimits = 'ALL'
$epilog = ''
$epilogslurmctld = ''
$fastschedule = 1 # in [ 0, 1, 2]
$grestypes = [] # list of generic resources to be managed
$healthcheckinterval = 30
$healthchecknodestate = 'ANY' # in ['ALLOC', 'ANY','CYCLE','IDLE','MIXED']
Expand Down Expand Up @@ -275,8 +275,8 @@
$srunprolog = ''
$switchtype = 'none' # in ['nrt', 'none']
$taskepilog = ''
$taskplugin = 'cgroup' # in ['affinity', 'cgroup','none']
$taskpluginparams = ['cpusets']
$taskplugin = ['affinity','cgroup'] # in ['affinity', 'cgroup','none']
$taskpluginparams = []
$taskprolog = ''
$tmpfs = '/tmp'
$tresbillingweights = ''
Expand Down Expand Up @@ -359,7 +359,7 @@
$cgroup_maxkmempercent = 100 # upper bound in percent of total Kmem for a job.
$cgroup_minkmemspace = 30 # lower bound (in MB) on the memory limits defined by AllowedKmemSpace.
$cgroup_minramspace = 30 # lower bound (in MB) on the memory limits defined by AllowedRAMSpace & AllowedSwapSpace.
$cgroup_taskaffinity = true # This feature requires the Portable Hardware Locality (hwloc) library
$cgroup_taskaffinity = false # if true, this feature requires the Portable Hardware Locality (hwloc) library

###
### Generic RESource management -- gres.conf
Expand Down Expand Up @@ -429,11 +429,11 @@
default => undef
}
# Which version of Slurm to grab and build
$version = '19.05.3-2'
$version = '20.11.3'

### SLURM Sources
# Checksum for the slurm source archive (empty means no check will be done)
$src_checksum = '6fe2c6196f089f6210d5ba79e99b0656f5a527b4'
$src_checksum = 'dcf328865591b42b6c8f5586b3e396e6eb30dcd7'
$src_checksum_type = 'sha1'
# From where the Slurm sources can be downloaded
$download_baseurl = 'https://download.schedmd.com/slurm/'
Expand Down Expand Up @@ -507,9 +507,9 @@
############################
# See https://pmix.org/code/getting-the-reference-implementation/
$with_pmix = false # Whether or not using PMIx
$pmix_version = '3.1.4'
$pmix_version = '3.2.3'
# Checksum for the pmix source archive (empty means no check will be done)
$pmix_src_checksum = '0f3f575e486d8492441c34276d1d56cbb48b4c37'
$pmix_src_checksum = '97978abcd4da1b2a3d2bf2452247c4d47f8cc6a3'
$pmix_src_checksum_type = 'sha1'
# From where the Slurm sources can be downloaded
$pmix_download_baseurl = 'https://github.com/openpmix/openpmix/releases/download'
Expand Down Expand Up @@ -608,7 +608,7 @@
### SLURM DataBase Configuration (slurmdbd.conf) ###
#####################################################
$dbd_configfile = 'slurmdbd.conf'
$dbd_configfile_mode = '0400'
$dbd_configfile_mode = '0600'
$archivedir = '/tmp'
$archiveevents = false # When purging events also archive them?
$archivejobs = false # When purging jobs also archive them?
Expand Down
48 changes: 29 additions & 19 deletions manifests/pmix.pp
Expand Up @@ -26,21 +26,27 @@
# Top directory of the sources builds (i.e. RPMs, debs...)
# For instance, built RPMs will be placed under
# <builddir>/RPMS/${::architecture}
# @param do_build [Boolean] Default: true
# Do we perform the build of the OpenPMIx packages from sources or not?
# @param do_package_install [Boolean] Default: true
# Do we perform the install of the OpenPMIx packages or not?
#
# @example install version 3.1.4 of PMIx
# @example install version 3.2.3 of PMIx
#
# slurm::pmix { '3.1.4':
# ensure => 'present',
# builddir => "/root/rpmbuild/",
#
# }
#
class slurm::pmix(
String $ensure = $slurm::params::ensure,
String $version = $slurm::params::pmix_version,
String $srcdir = $slurm::params::srcdir,
String $src_checksum = $slurm::params::pmix_src_checksum,
String $src_checksum_type = $slurm::params::pmix_src_checksum_type,
String $builddir = $slurm::params::builddir,
String $ensure = $slurm::params::ensure,
String $version = $slurm::params::pmix_version,
String $srcdir = $slurm::params::srcdir,
String $src_checksum = $slurm::params::pmix_src_checksum,
String $src_checksum_type = $slurm::params::pmix_src_checksum_type,
String $builddir = $slurm::params::builddir,
Boolean $do_build = $slurm::params::do_build,
Boolean $do_package_install = $slurm::params::do_package_install,
) {
include ::slurm::params

Expand All @@ -52,19 +58,23 @@
checksum_type => $src_checksum_type,
}

# Now build them
slurm::pmix::build { $version :
ensure => $ensure,
srcdir => $srcdir,
dir => $builddir,
require => Slurm::Pmix::Download[$version],
if $do_build {
# Now build them
slurm::pmix::build { $version :
ensure => $ensure,
srcdir => $srcdir,
dir => $builddir,
require => Slurm::Pmix::Download[$version],
}
}

# And install it
slurm::pmix::install { $version :
ensure => $ensure,
builddir => $builddir,
require => Slurm::Pmix::Build[$version],
if $do_package_install {
# And install it
slurm::pmix::install { $version :
ensure => $ensure,
builddir => $builddir,
require => Slurm::Pmix::Build[$version],
}
}

}
2 changes: 1 addition & 1 deletion manifests/slurmctld.pp
Expand Up @@ -43,7 +43,7 @@
Class['slurm::install'] -> Class['slurm::config']

if $slurm::manage_firewall {
slurm::firewall { $slurm::slurmctldport:
slurm::firewall { "${slurm::slurmctldport}":
ensure => $slurm::ensure,
}
}
Expand Down
4 changes: 2 additions & 2 deletions manifests/slurmd.pp
Expand Up @@ -30,10 +30,10 @@
Class['slurm::install'] -> Class['slurm::config']

if $slurm::manage_firewall {
slurm::firewall { $slurm::slurmdport:
slurm::firewall { "${slurm::slurmdport}":
ensure => $slurm::ensure,
}
slurm::firewall { $slurm::srunportrange:
slurm::firewall { "${slurm::srunportrange}":
ensure => $slurm::ensure,
}
}
Expand Down
2 changes: 1 addition & 1 deletion manifests/slurmdbd.pp
Expand Up @@ -192,7 +192,7 @@
Class['slurm::install'] -> Class['slurm::config']

if $slurm::manage_firewall {
slurm::firewall { $dbdport:
slurm::firewall { "${dbdport}":
ensure => $slurm::ensure,
}
}
Expand Down
9 changes: 5 additions & 4 deletions metadata.json
@@ -1,6 +1,6 @@
{
"name": "ULHPC-slurm",
"version": "1.3.0",
"version": "1.4.0",
"author": "UL HPC Team",
"summary": "Configure and manage Slurm: A Highly Scalable Resource Manager",
"license": "Apache-2.0",
Expand Down Expand Up @@ -41,8 +41,8 @@
"version_requirement": ">=1.2.2 <3.0.1"
},
{
"name": "svarrette-ulimit",
"version_requirement": ">=1.0.6 <2.0.0"
"name": "saz-limits",
"version_requirement": ">=3.0.4 <4.0.0"
},
{
"name": "crayfishx-firewalld",
Expand All @@ -65,7 +65,8 @@
"operatingsystem": "CentOS",
"operatingsystemrelease": [
"6",
"7"
"7",
"8"
]
}
],
Expand Down
6 changes: 4 additions & 2 deletions templates/slurm.conf.erb
Expand Up @@ -269,7 +269,6 @@ SuspendTime=<%= scope['slurm::suspendtime'] %>
AccountingStorageEnforce=<%= scope['slurm::accountingstorageenforce'].join(',') %>
<% end -%>
AccountingStorageHost=<%= scope['slurm::accountingstoragehost'] %>
AccountingStorageLoc=slurm
#AccountingStoragePass=
#AccountingStoragePort=
AccountingStorageType=accounting_storage/slurmdbd
Expand Down Expand Up @@ -334,7 +333,6 @@ StateSaveLocation=<%= scope['slurm::statesavelocation'] %>
# - Backfill
# - job priority
###
FastSchedule=<%= scope['slurm::fastschedule'] %>
#SchedulerTimeSlice=30
SchedulerType=sched/<%= scope['slurm::schedulertype'] %>
<% if scope['slurm::schedulerparameters'].empty? -%>
Expand Down Expand Up @@ -434,7 +432,11 @@ SelectTypeParameters=<%= scope['slurm::selecttype_params'].join(',') %>
<% else -%>
TaskEpilog=<%= scope['slurm::taskepilog'] %>
<% end -%>
<% if scope['slurm::taskplugin'].is_a?(Array) -%>
TaskPlugin=task/<%= scope['slurm::taskplugin'].join(',task/') %>
<% else -%>
TaskPlugin=task/<%= scope['slurm::taskplugin'] %>
<% end -%>
<% if scope['slurm::taskpluginparams'].empty? -%>
#TaskPluginParam=
<% else -%>
Expand Down
5 changes: 2 additions & 3 deletions tests/vagrant/puppet/hieradata/defaults.yaml
Expand Up @@ -9,7 +9,6 @@ slurm::builddir: '/vagrant/tests/vagrant/rpmbuild'


# SLURM general settings
#slurm::version: '19.05.3-2'
slurm::uid: 900
slurm::gid: 900
slurm::service_manage: true
Expand Down Expand Up @@ -71,9 +70,9 @@ slurm::prologflags:
- 'X11'
slurm::messagetimeout: 30
slurm::resumetimeout: 600
slurm::taskplugin: 'cgroup'
slurm::taskplugin: ['affinity', 'cgroup']
slurm::taskpluginparams :
- 'cpusets'
- 'none'

###############################
### cgroup.conf settings ###
Expand Down

0 comments on commit 20cf269

Please sign in to comment.