# Module sdenv

Adapted from: http://www.idris.fr/eng/jean-zay/gpu/jean-zay-gpu-torch-multi-eng.html

In [2]:
%%writefile sdenv.py

# sdenv module

import os, hostlist, socket

job_partition = os.uname()[1]
hostnames = [job_partition]

if 'SLURM_PROCID' in os.environ:
    # Equivalent to MPI rank
    rank = int(os.environ['SLURM_PROCID'])
    os.environ['RANK'] = str(rank)
    # rank inside a node
    local_rank = int(os.environ['SLURM_LOCALID'])
    # node rank inside NODELIST
    node_rank = int(os.environ['SLURM_NODEID'])
    size = int(os.environ['SLURM_NTASKS'])
    os.environ['WORLD_SIZE'] = str(size)
    cpus_per_task = int(os.environ['SLURM_CPUS_PER_TASK'])
    job_partition = os.environ['SLURM_JOB_PARTITION']
    job_id = os.environ['SLURM_JOB_ID']
    hostnames = hostlist.expand_hostlist(
        os.environ['SLURM_JOB_NODELIST'])

# Get IDs of GPUs
# Queues should be adusted as needed
gpu_ids = []
if job_partition in ['nvidia_dev', 'nvidia', 'nvidia_small',
                     'nvidia_scal', 'nvidia_long']:
    gpu_ids = [0, 1]
elif job_partition in ['sequana_gpu_dev', 'sequana_gpu_shared',
                       'sdumont18']:
    gpu_ids = [0, 1, 2, 3]

# Define the dataset directory.
# For simplicity, it is assumed that the same directory structure as the 
# login node also exists in the scratch/ area, i.e. assuming there is a 
# "dataset" directory in the login node, then there is also a "dataset" 
# directory in the scratch/ area.
DSDIR = os.environ['PWD'].replace('/prj/', '/scratch/')
os.environ['DSDIR'] = DSDIR

# Define the MASTER
MASTER_ADDR = socket.gethostbyname(hostnames[0])
os.environ['MASTER_ADDR'] = MASTER_ADDR

# To avoid port conflict on the same node.
# 20324 is an randon port.
MASTER_PORT = str(20324 + int(min(gpu_ids)))
os.environ['MASTER_PORT'] = MASTER_PORT

# Initializing dist.init_process_group via TCP 
MASTER_TCP = 'tcp://' + MASTER_ADDR + ":" + MASTER_PORT

# Define the
MASTER_FILE = 'file://////' + MASTER_ADDR + DSDIR + '/sharedfile.txt'

# NCCL backend network interface to use
NCCL_SOCKET_IFNAME = 'ib0'
os.environ['NCCL_SOCKET_IFNAME'] = NCCL_SOCKET_IFNAME

if __name__ == "__main__":
    # CHECK IF IT IS WORKING
    print(f'rank: {rank:02} ', f'| node_rank: {node_rank:2}',
          f'| local_rank: {local_rank:2}')
    if not rank:
        print("01. job_partition: ", job_partition)
        print("02. job_id: ", job_id)
        print("03. size: ", size)
        print("04. cpus_per_task: ", cpus_per_task)
        print("05. hostnames: ", hostnames)
        print("06. gpu_ids: ", gpu_ids)
        print("07. MASTER_ADDR: ", MASTER_ADDR)
        print("08. MASTER_PORT: ", MASTER_PORT)
        print("09. MASTER_TCP: ", MASTER_TCP)
        print("10. DSDIR: ", DSDIR)

Overwriting sdenv.py


## Check if it is working

In [31]:
! cp sdenv.py /scratch${PWD#/prj}

In [32]:
%%writefile ptch.srm
#!/bin/bash
#SBATCH --job-name ptch         # SLURM_JOB_NAME
#SBATCH --partition nvidia_dev  # SLURM_JOB_PARTITION
#SBATCH --nodes=4               # SLURM_JOB_NUM_NODES
#SBATCH --ntasks-per-node=3     # SLURM_NTASKS_PER_NODE
#SBATCH --cpus-per-task=2       # SLURM_CPUS_PER_TASK
#SBATCH --time=00:01:00         # Limit execution time
#SBATCH --exclusive             # Exclusive acccess to nodes

# VARIABLES OF INTEREST IN THE SLURM ENVIRONMENT
# <https://slurm.schedmd.com/sbatch.html>
# SLURM_PROCID
#     The MPI rank (or relative process ID) of the current process.
# SLURM_LOCALID
#     Node local task ID for the process within a job.
# SLURM_NODEID
#     ID of the nodes allocated. 

echo '========================================'
echo '- Job ID:' $SLURM_JOB_ID
echo '- # of nodes in the job:' $SLURM_JOB_NUM_NODES
echo '- # of tasks per node:' $SLURM_NTASKS_PER_NODE
echo '- # of tasks:' $SLURM_NTASKS
echo '- # of cpus per task:' $SLURM_CPUS_PER_TASK
echo '- Dir from which sbatch was invoked:' ${SLURM_SUBMIT_DIR##*/}
echo -n '- Nodes allocated to the job: '
nodeset -e $SLURM_JOB_NODELIST

# go to the work directory from which sbatch was invoked
cd $SLURM_SUBMIT_DIR

# load the Python environment
SCR=/scratch${PWD#/prj}
BASE=/scratch${HOME#/prj}
source $BASE/env2/etc/profile.d/conda.sh
conda activate $BASE/env2
cd $SCR

# run
echo -n '<1. starting python script > ' && date
echo '-- output -----------------------------'

srun python -u sdenv.py | sort

echo '-- end --------------------------------'
echo -n '<2. quit>                    ' && date

Overwriting ptch.srm


## Submit a job, wait, and print the result

In [33]:
import time
a = !sbatch ptch.srm
print(a[0])
while True:
    time.sleep(5)
    b = ! squeue --user $USER --name=ptch
    if len(b) < 2: break
b = !echo /scratch${PWD#/prj}/slurm-
%cat {b[0]+a[0].replace('Submitted batch job ','')}.out

Submitted batch job 10474508
- Job ID: 10474508
- # of nodes in the job: 4
- # of tasks per node: 3
- # of tasks: 12
- # of cpus per task: 2
- Dir from which sbatch was invoked: pt
- Nodes allocated to the job: sdumont3076 sdumont3077 sdumont3078 sdumont3079
<1. starting python script > Qui Mar 24 21:35:58 -03 2022
-- output -----------------------------
01. job_partition:  nvidia_dev
02. job_id:  10474508
03. size:  12
04. cpus_per_task:  2
05. hostnames:  ['sdumont3076', 'sdumont3077', 'sdumont3078', 'sdumont3079']
06. gpu_ids:  [0, 1]
07. MASTER_ADDR:  172.20.10.70
08. MASTER_PORT:  20324
09. MASTER_TCP:  tcp://172.20.10.70:20324
10. DSDIR:  /scratch/<project>/<username>/pt
rank: 00  | node_rank:  0 | local_rank:  0
rank: 01  | node_rank:  0 | local_rank:  1
rank: 02  | node_rank:  0 | local_rank:  2
rank: 03  | node_rank:  1 | local_rank:  0
rank: 04  | node_rank:  1 | local_rank:  1
rank: 05  | node_rank:  1 | local_rank:  2
rank: 06  | node_rank:  2 | local_rank:  0
rank: 07  | n