# Stencil Numba GPU B715 MPI

In [11]:
%%writefile ng2.py
import numpy as np, math
from time import time
from mpi4py import MPI
from numba import cuda, njit, prange, config

# parameters
n            = 4800    # n x n grid
energy       = 1.0     # energy to be injected per iteration
niters       = 500     # number of iterations
# initialize three heat sources
nsources     = 3       # number of sources of energy
sources      = np.zeros((nsources, 2), np.int16)
sources[:,:] = [ [n//2, n//2], [n//3, n//3], [n*4//5, n*8//9] ]
# initialize the data arrays
anew         = np.zeros((n + 2, n + 2), np.float64)
aold         = np.zeros((n + 2, n + 2), np.float64)

# configure blocks & grids
## set the number of threads in a block
threads_per_block = (16, 16)    # based on trial and error
## calculate the number of thread blocks in the grid
blocks_per_grid_x = math.ceil(aold.shape[0] / threads_per_block[0])
blocks_per_grid_y = math.ceil(aold.shape[1] / threads_per_block[1])
blocks_per_grid   = (blocks_per_grid_x, blocks_per_grid_y)

# computationally intensive core
@cuda.jit
def kernel(A, B):
    n = A.shape[0] - 1
    i, j = cuda.grid(2)
    if (i > 0 and j > 0) and (i < n and j < n) :
        A[i,j]=B[i,j]*.5+(B[i-1,j]+B[i+1,j]+B[i,j-1]+B[i,j+1])*.125

# start of main routine

#---mpi4py---
comm  = MPI.COMM_WORLD            # MPI default communicator
size  = comm.Get_size()           # MPI size
rank  = comm.Get_rank()           # MPI rank
name  = MPI.Get_processor_name()  # core hostname (eg sdumont3170)

#Only 2 processes per node are selected via Slurm. Within a node, color 
#rank 0 corresponds to the first process of this node, and color rank 1 
#corresponds to the second process of this node, and the other nodes are 
#similar. Example:
#  node      rank  color rank
#----------- ----  ----------
#sdumont3170   0        0
#sdumont3170   1        1
#sdumont3171   2        0
#sdumont3171   3        1
#sdumont3172   4        0
#sdumont3172   5        1
#sdumont3173   6        0
#sdumont3173   7        1
for i, c in enumerate(name) :     # find first digit in hostname
    if c.isdigit() :
        break
mcol  = int(name[i:])             # extract number from hostname
scomm = comm.Split(color = mcol)  # new communicator for the node
crank = scomm.Get_rank()          # get the node color rank

#---numba.cuda---
#In this implementation, Slurm is configured to run only 2 processes on 
#each node. For each of these processes (cores), a single GPU is 
#associated. Thus, within a node, color rank 0 is associated with GPU 0, 
#and color rank 1 is associated with GPU 1.
cuda.select_device(crank)         # 'color rank' 0 = 'gpu id' 0, etc.
cid = cuda.current_context().device.id

# time measurement for rank 0
if not rank :
    tt = -time()    # rank 0 time
    tk = 0          # accumulate kernel time
    tc = 0          # accumulate GPU communication time
    te = 0          # energy insertion time

# determine my coordinates (x,y)
pdims = MPI.Compute_dims(size, 2)
px    = pdims[0]
py    = pdims[1]
rx    = rank % px
ry    = rank // px

# determine my four neighbors
north = (ry - 1) * px + rx
if (ry - 1) < 0 :
    north = MPI.PROC_NULL
south = (ry + 1) * px + rx
if (ry + 1) >= py :
    south = MPI.PROC_NULL
west = ry * px + rx - 1
if (rx - 1) < 0 :
    west = MPI.PROC_NULL
east = ry * px + rx + 1
if (rx + 1) >= px :
    east = MPI.PROC_NULL

# decompose the domain
bx   = n // px          # block size in x
by   = n // py          # block size in y
offx = rx * bx + 1      # offset in x
offy = ry * by + 1      # offset in y

# sources in my area, local to my rank
locnsources = 0
locsources  = np.empty((nsources, 2), np.int16)

# determine which sources are in my patch
for i in range(nsources) :
    locx = sources[i, 0] - offx
    locy = sources[i, 1] - offy
    if(locx >= 0 and locx <= bx and locy >= 0 and locy <= by) :
        locsources[locnsources, 0] = locx
        locsources[locnsources, 1] = locy
        locnsources += 1

# working arrays with 1-wide halo zones
anew = np.zeros((bx+2, by+2), np.float64)
aold = np.zeros((bx+2, by+2), np.float64)

# system total heat
rheat = np.zeros(1, np.float64)
bheat = np.zeros(1, np.float64)

# copy the first arrays to the device
if not rank : tc -= time()
anew_global_mem    = cuda.to_device(anew)
aold_global_mem    = cuda.to_device(aold)
if not rank : tc += time()
   
# main loop
for _ in range(0, niters, 2) :

    # exchange data with neighbors
    if north != MPI.PROC_NULL :
        r1=comm.irecv(source=north, tag=1)
        s1=comm.isend(aold[1, 1:bx+1], dest=north, tag=1)
    if south != MPI.PROC_NULL :
        r2=comm.irecv(source=south, tag=1)
        s2=comm.isend(aold[bx, 1:bx+1], dest=south, tag=1)
    if east != MPI.PROC_NULL :
        r3 = comm.irecv(source=east, tag=1)
        s3 = comm.isend(aold[1:bx+1, bx], dest=east, tag=1)
    if west != MPI.PROC_NULL :
        r4 = comm.irecv(source=west, tag=1)
        s4 = comm.isend(aold[1:bx+1, 1], dest=west, tag=1)
    # wait for the end of communication
    if north != MPI.PROC_NULL :
        s1.wait()
        aold[0, 1:bx+1] = r1.wait()
    if south != MPI.PROC_NULL :
        s2.wait()
        aold[bx+1, 1:bx+1] = r2.wait()
    if east != MPI.PROC_NULL :
        s3.wait()
        aold[1:bx+1, bx+1] = r3.wait()
    if west != MPI.PROC_NULL :
        s4.wait
        aold[1:bx+1, 0] = r4.wait()

    # copy the received array to the device
    if not rank : tc -= time()
    aold_global_mem = cuda.to_device(aold)
    if not rank : tc += time()
        
    # update grid
    if not rank : tk -= time()
    kernel[blocks_per_grid, threads_per_block](
        anew_global_mem, aold_global_mem)
    if not rank : tk += time()
        
    # copy the result back to the host
    if not rank : tc -= time()
    anew = anew_global_mem.copy_to_host()
    if not rank : tc += time()
        
    # refresh heat sources
    if not rank : te -= time()
    for i in range(locnsources) :
        anew[locsources[i, 0]-1, locsources[i, 1]-1] += energy
    if not rank : te += time()

    # exchange data with neighbors
    if north != MPI.PROC_NULL :
        r1=comm.irecv(source=north, tag=1)
        s1=comm.isend(anew[1, 1:bx+1], dest=north, tag=1)
    if south != MPI.PROC_NULL :
        r2=comm.irecv(source=south, tag=1)
        s2=comm.isend(anew[bx, 1:bx+1], dest=south, tag=1)
    if east != MPI.PROC_NULL :
        r3 = comm.irecv(source=east, tag=1)
        s3 = comm.isend(anew[1:bx+1, bx], dest=east, tag=1)
    if west != MPI.PROC_NULL :
        r4 = comm.irecv(source=west, tag=1)
        s4 = comm.isend(anew[1:bx+1, 1], dest=west, tag=1)
    # wait for the end of communication
    if north != MPI.PROC_NULL :
        s1.wait()
        anew[0, 1:bx+1] = r1.wait()
    if south != MPI.PROC_NULL :
        s2.wait()
        anew[bx+1, 1:bx+1] = r2.wait()
    if east != MPI.PROC_NULL :
        s3.wait()
        anew[1:bx+1, bx+1] = r3.wait()
    if west != MPI.PROC_NULL :
        s4.wait
        anew[1:bx+1, 0] = r4.wait()

    # copy the received array to the device
    if not rank : tc -= time()
    anew_global_mem = cuda.to_device(anew)
    if not rank : tc += time()

    # update grid
    if not rank : tk -= time()
    kernel[blocks_per_grid, threads_per_block](
        aold_global_mem, anew_global_mem)
    if not rank : tk += time()
        
    # copy the result back to the host
    if not rank : tc -= time()
    aold = aold_global_mem.copy_to_host()
    if not rank : tc += time()
        
    # refresh heat sources
    if not rank : te -= time()
    for i in range(locnsources) :
        aold[locsources[i, 0]-1, locsources[i, 1]-1] += energy
    if not rank : te += time()

# end for

# get final heat in the system
bheat[0] = np.sum(aold[1:-1, 1:-1])
comm.Reduce(bheat, rheat)

# show the result
print(f"3. {name:11s}   {rank:02d}    {crank:02d}   {cid:02d}")
if not rank :
    tt += time()
    print( "1. hostname    rank crank  cid")
    print( "2. ----------- ---- ----- ----")
    print( "4. ---------------------------")
    print(f"5. Heat:{rheat[0]:.4f}", end=", ")
    print(f"TT:{tt:.4f}", end=", ")
    print(f"TK:{tk:.4f}", end=", ")
    print(f"TC:{tc:.4f}", end=", ")
    print(f"TE:{te:.4f}", end=", ")
    print(f"MPI:{size}")

Overwriting ng2.py


In [124]:
# era só para fazer um teste
%%writefile xxxxng2.py
import numpy as np, math
from time import time
from mpi4py import MPI
from numba import cuda, njit, prange, config

# parameters
n            = 480    # n x n grid
energy       = 1.0     # energy to be injected per iteration
niters       = 500     # number of iterations
# initialize three heat sources
nsources     = 3       # number of sources of energy
sources      = np.zeros((nsources, 2), np.int16)
sources[:,:] = [ [n//2, n//2], [n//3, n//3], [n*4//5, n*8//9] ]
# initialize the data arrays
anew         = np.zeros((n + 2, n + 2), np.float64)
aold         = np.zeros((n + 2, n + 2), np.float64)

# configure blocks & grids
## set the number of threads in a block
threads_per_block = (16, 16)    # based on trial and error
## calculate the number of thread blocks in the grid
blocks_per_grid_x = math.ceil(aold.shape[0] / threads_per_block[0])
blocks_per_grid_y = math.ceil(aold.shape[1] / threads_per_block[1])
blocks_per_grid   = (blocks_per_grid_x, blocks_per_grid_y)

# computationally intensive core
@cuda.jit
def kernel(A, B):
    n = A.shape[0] - 1
    i, j = cuda.grid(2)
    if (i > 0 and j > 0) and (i < n and j < n) :
        A[i,j]=B[i,j]*.5+(B[i-1,j]+B[i+1,j]+B[i,j-1]+B[i,j+1])*.125

# start of main routine

#---mpi4py---
comm  = MPI.COMM_WORLD            # MPI default communicator
size  = comm.Get_size()           # MPI size
rank  = comm.Get_rank()           # MPI rank
name  = MPI.Get_processor_name()  # core hostname (eg sdumont3170)

#Only 2 processes per node are selected via Slurm. Within a node, color 
#rank 0 corresponds to the first process of this node, and color rank 1 
#corresponds to the second process of this node, and the other nodes are 
#similar. Example:
#  node      rank  color rank
#----------- ----  ----------
#sdumont3170   0        0
#sdumont3170   1        1
#sdumont3171   2        0
#sdumont3171   3        1
#sdumont3172   4        0
#sdumont3172   5        1
#sdumont3173   6        0
#sdumont3173   7        1
for i, c in enumerate(name) :     # find first digit in hostname
    if c.isdigit() :
        break
mcol  = int(name[i:])             # extract number from hostname
scomm = comm.Split(color = mcol)  # new communicator for the node
crank = scomm.Get_rank()          # get the node color rank

#---numba.cuda---
#In this implementation, Slurm is configured to run only 2 processes on 
#each node. For each of these processes (cores), a single GPU is 
#associated. Thus, within a node, color rank 0 is associated with GPU 0, 
#and color rank 1 is associated with GPU 1.

# cuda.select_device(crank)         # 'color rank' 0 = 'gpu id' 0, etc.
cid = cuda.current_context().device.id

cuda.select_device(crank)
print(size, rank, name, mcol, crank, cid)


Overwriting ng2.py


## Para testes:

In [11]:
%%writefile ng3.srm
#!/bin/bash
#SBATCH --job-name ng3         # Job name
#SBATCH --partition nvidia_dev # Select partition
#SBATCH --ntasks-per-node=2    # Tasks per node
#SBATCH --nodes=2              # Minimum to be allocated
#SBATCH --ntasks=4             # Total tasks
#SBATCH --time=00:01:00        # Limit execution time
#SBATCH --exclusive            # Exclusive acccess to nodes

echo '========================================'
echo '- Job ID:' $SLURM_JOB_ID
echo '- Tasks per node:' $SLURM_NTASKS_PER_NODE
echo '- # of nodes in the job:' $SLURM_JOB_NUM_NODES
echo '- # of tasks:' $SLURM_NTASKS
echo '- Partition:' $SLURM_JOB_PARTITION
echo '- Dir from which sbatch was invoked:' ${SLURM_SUBMIT_DIR##*/}
cd $SLURM_SUBMIT_DIR
echo -n '- List of nodes allocated to the job: '
nodeset -e $SLURM_JOB_NODELIST

# Environment
echo '-- modules ----------------------------'
echo 'conda activate env2, --stack env3'

cd
SCR=/scratch${PWD#/prj}
cd $SCR
source $SCR/env2/etc/profile.d/conda.sh
conda activate $SCR/env2
conda activate --stack $SCR/env3
cd $SCR/b715

# Executable
EXEC="python ng2.py"

hostname
nvidia-smi


# Start
OPT='--mpi=pmi2 --cpu_bind=cores --distribution=block:cyclic'
echo '-- run --------------------------------'
echo '$ srun -n' $SLURM_NTASKS ${EXEC##*/}
echo '-- output -----------------------------'
# srun  $OPT  -n $SLURM_NTASKS  $EXEC  | sort
echo '~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'

Writing ng3.srm


In [12]:
! cp ng2.py /scratch${PWD#/prj}

In [13]:
%%writefile ng2.srm
#!/bin/bash
#SBATCH --job-name ng2         # Job name
#SBATCH --partition nvidia_dev # Select partition
#SBATCH --ntasks-per-node=2    # Tasks per node
#SBATCH --nodes=2              # Minimum to be allocated
#SBATCH --ntasks=4             # Total tasks
#SBATCH --time=00:01:00        # Limit execution time
#SBATCH --exclusive            # Exclusive acccess to nodes

echo '========================================'
echo '- Job ID:' $SLURM_JOB_ID
echo '- Tasks per node:' $SLURM_NTASKS_PER_NODE
echo '- # of nodes in the job:' $SLURM_JOB_NUM_NODES
echo '- # of tasks:' $SLURM_NTASKS
echo '- Partition:' $SLURM_JOB_PARTITION
echo '- Dir from which sbatch was invoked:' ${SLURM_SUBMIT_DIR##*/}
cd $SLURM_SUBMIT_DIR
echo -n '- List of nodes allocated to the job: '
nodeset -e $SLURM_JOB_NODELIST

# Environment
echo '-- modules ----------------------------'
echo 'conda activate env2, --stack env3'

cd
SCR=/scratch${PWD#/prj}
cd $SCR
source $SCR/env2/etc/profile.d/conda.sh
conda activate $SCR/env2
conda activate --stack $SCR/env3
cd $SCR/b715

# Executable
EXEC="python ng2.py"

# Start
OPT='--mpi=pmi2 --cpu_bind=cores --distribution=block:cyclic'
echo '-- run --------------------------------'
echo '$ srun -n' $SLURM_NTASKS ${EXEC##*/}
echo '-- output -----------------------------'
srun  $OPT  -n $SLURM_NTASKS  $EXEC  | sort
echo '~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'

Overwriting ng2.srm


## 1x1=1(1)

In [5]:
! sbatch  --ntasks-per-node=1  --nodes=1  --ntasks=1  ng2.srm

Submitted batch job 10351723


In [7]:
! squeue --name ng2 --format "%.19S  %.8i  %.10P %.5j  %.2t %.5M %.5D %.4C"

         START_TIME     JOBID   PARTITION  NAME  ST  TIME NODES CPUS
2021-12-18T09:46:33  10351723  nvidia_dev   ng2   R  0:02     1   24


In [9]:
! squeue --name ng2 --format "%.19S  %.8i  %.10P %.5j  %.2t %.5M %.5D %.4C"

         START_TIME     JOBID   PARTITION  NAME  ST  TIME NODES CPUS


In [10]:
! cat /scratch${PWD#/prj}/slurm-10351723.out

- Job ID: 10351723
- Tasks per node: 1
- # of nodes in the job: 1
- # of tasks: 1
- Dir from which sbatch was invoked: b715
- List of nodes allocated to the job: sdumont3054
-- modules ----------------------------
conda activate env2, --stack env3
-- run --------------------------------
$ srun -n 1 python ng2.py
-- output -----------------------------
1. hostname    rank crank  cid
2. ----------- ---- ----- ----
3. sdumont3054   00    00   00
4. ---------------------------
5. Heat:1500.0000, TT:105.5256, TK:2.3658, TC:103.0791, TE:0.0439, MPI:1
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


In [11]:
! sbatch  --ntasks-per-node=1  --nodes=1  --ntasks=1  ng2.srm

Submitted batch job 10351740


In [12]:
! squeue --name ng2 --format "%.19S  %.8i  %.10P %.5j  %.2t %.5M %.5D %.4C"

         START_TIME     JOBID   PARTITION  NAME  ST  TIME NODES CPUS
2021-12-18T09:50:19  10351740  nvidia_dev   ng2   R  0:02     1   24


In [14]:
! squeue --name ng2 --format "%.19S  %.8i  %.10P %.5j  %.2t %.5M %.5D %.4C"

         START_TIME     JOBID   PARTITION  NAME  ST  TIME NODES CPUS


In [15]:
! cat /scratch${PWD#/prj}/slurm-10351740.out

- Job ID: 10351740
- Tasks per node: 1
- # of nodes in the job: 1
- # of tasks: 1
- Dir from which sbatch was invoked: b715
- List of nodes allocated to the job: sdumont3054
-- modules ----------------------------
conda activate env2, --stack env3
-- run --------------------------------
$ srun -n 1 python ng2.py
-- output -----------------------------
1. hostname    rank crank  cid
2. ----------- ---- ----- ----
3. sdumont3054   00    00   00
4. ---------------------------
5. Heat:1500.0000, TT:105.1317, TK:2.2101, TC:102.8419, TE:0.0428, MPI:1
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


In [16]:
! sbatch  --ntasks-per-node=1  --nodes=1  --ntasks=1  ng2.srm

Submitted batch job 10351767


In [17]:
! squeue --name ng2 --format "%.19S  %.8i  %.10P %.5j  %.2t %.5M %.5D %.4C"

         START_TIME     JOBID   PARTITION  NAME  ST  TIME NODES CPUS
2021-12-18T09:54:32  10351767  nvidia_dev   ng2   R  0:01     1   24


In [20]:
! squeue --name ng2 --format "%.19S  %.8i  %.10P %.5j  %.2t %.5M %.5D %.4C"

         START_TIME     JOBID   PARTITION  NAME  ST  TIME NODES CPUS


In [21]:
! cat /scratch${PWD#/prj}/slurm-10351767.out

- Job ID: 10351767
- Tasks per node: 1
- # of nodes in the job: 1
- # of tasks: 1
- Dir from which sbatch was invoked: b715
- List of nodes allocated to the job: sdumont3054
-- modules ----------------------------
conda activate env2, --stack env3
-- run --------------------------------
$ srun -n 1 python ng2.py
-- output -----------------------------
1. hostname    rank crank  cid
2. ----------- ---- ----- ----
3. sdumont3054   00    00   00
4. ---------------------------
5. Heat:1500.0000, TT:103.7909, TK:0.7349, TC:102.9773, TE:0.0415, MPI:1
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


## 2x2=4(2)

In [141]:
! sbatch  ng2.srm

Submitted batch job 10352423


In [142]:
! squeue --name ng2 --format "%.19S  %.8i  %.10P %.5j  %.2t %.5M %.5D %.4C"

         START_TIME     JOBID   PARTITION  NAME  ST  TIME NODES CPUS
2021-12-18T11:52:48  10352423  nvidia_dev   ng2   R  0:01     2   48


In [145]:
! squeue --name ng2 --format "%.19S  %.8i  %.10P %.5j  %.2t %.5M %.5D %.4C"

         START_TIME     JOBID   PARTITION  NAME  ST  TIME NODES CPUS


# 211218: a 2a TESLA 40 sumiu.......

In [146]:
! cat /scratch${PWD#/prj}/slurm-10352423.out

- Job ID: 10352423
- Tasks per node: 2
- # of nodes in the job: 2
- # of tasks: 4
- Dir from which sbatch was invoked: b715
- List of nodes allocated to the job: sdumont3170 sdumont3171
-- lscpu ------------------------------
Architecture:          x86_64
CPU op-mode(s):        32-bit, 64-bit
Byte Order:            Little Endian
CPU(s):                24
On-line CPU(s) list:   0-23
Thread(s) per core:    1
Core(s) per socket:    12
Socket(s):             2
NUMA node(s):          2
Vendor ID:             GenuineIntel
CPU family:            6
Model:                 62
Model name:            Intel(R) Xeon(R) CPU E5-2695 v2 @ 2.40GHz
Stepping:              4
CPU MHz:               2846.044
CPU max MHz:           3200,0000
CPU min MHz:           1200,0000
BogoMIPS:              4799.74
Virtualization:        VT-x
L1d cache:             32K
L1i cache:             32K
L2 cache:              256K
L3 cache:              30720K
NUMA node0 CPU(s):     0-11
NUMA node1 CPU(s):     12-23
Flags:     

# fila nvidia_small: também está faltando uma k40

In [148]:
! sbatch  --partition=nvidia_small  --ntasks-per-node=2  --nodes=2  --ntasks=4  ng2.srm

Submitted batch job 10352488


In [155]:
! squeue --name ng2 --format "%.19S  %.8i  %.12P %.5j  %.2t %.5M %.5D %.4C"

         START_TIME     JOBID   PARTITION  NAME  ST  TIME NODES CPUS
2021-12-18T12:03:58  10352488  nvidia_sma   ng2   R  0:04     2   48


In [157]:
! squeue -u $(whoami) -h -r | wc -l

0


In [158]:
! squeue --partition=nvidia_small -h -r | wc -l

1


In [160]:
! squeue --name ng2 --format "%.19V %.19S  %.8i  %.12P %.5j  %.2t %.5M %.5D %.4C"

         START_TIME     JOBID   PARTITION  NAME  ST  TIME NODES CPUS


In [161]:
! cat /scratch${PWD#/prj}/slurm-10352488.out

- Job ID: 10352488
- Tasks per node: 2
- # of nodes in the job: 2
- # of tasks: 4
- Dir from which sbatch was invoked: b715
- List of nodes allocated to the job: sdumont3170 sdumont3171
-- modules ----------------------------
conda activate env2, --stack env3
-- run --------------------------------
$ srun -n 4 python ng2.py
-- output -----------------------------
Traceback (most recent call last):
  File "/scratch/ampemi/xxxx.xxxx/b715/ng2.py", line 72, in <module>
    cuda.select_device(crank)
  File "/scratch/ampemi/xxxx.xxxx/env3/lib/python3.9/site-packages/numba/cuda/api.py", line 452, in select_device
    context = devices.get_context(device_id)
  File "/scratch/ampemi/xxxx.xxxx/env3/lib/python3.9/site-packages/numba/cuda/cudadrv/devices.py", line 212, in get_context
    return _runtime.get_or_create_context(devnum)
  File "/scratch/ampemi/xxxx.xxxx/env3/lib/python3.9/site-packages/numba/cuda/cudadrv/devices.py", line 142, in get_or_create_context
    return self._activate_context

# module load não está mais funcionando no SDdumont

- cudnn/7.6_cuda-10.0 ñ funciona.
- mudar path tb ñ funciona.
- fica bloqueado em cuda version 11.2 e uma única GPU.

In [2]:
! sbatch  --partition=nvidia_small  --ntasks-per-node=2  --nodes=2  --ntasks=4  ng2.srm

Submitted batch job 10352851


In [6]:
! squeue --name ng2 --format "%.19V  %.19S  %.8i  %.12P %.5j  %.2t %.5M %.5D %.4C"

        SUBMIT_TIME           START_TIME     JOBID     PARTITION  NAME  ST  TIME NODES CPUS
2021-12-18T13:14:32  2021-12-18T13:14:37  10352851  nvidia_small   ng2   R  0:06     2   48


In [7]:
! squeue -u $(whoami) -h -r | wc -l

1


In [8]:
! squeue --partition=nvidia_small -h -r | wc -l

2


In [11]:
! squeue --name ng2 --format "%.19V  %.19S  %.8i  %.12P %.5j  %.2t %.5M %.5D %.4C"

        SUBMIT_TIME           START_TIME     JOBID     PARTITION  NAME  ST  TIME NODES CPUS


In [12]:
! cat /scratch${PWD#/prj}/slurm-10352851.out

- Job ID: 10352851
- Tasks per node: 2
- # of nodes in the job: 2
- # of tasks: 4
- Partition: nvidia_small
- Dir from which sbatch was invoked: b715
- List of nodes allocated to the job: sdumont3170 sdumont3171
-- nvidia-smi ----------------------
Sat Dec 18 13:14:39 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.73.01    Driver Version: 460.73.01    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K40t          On   | 00000000:01:00.0 Off |                    0 |
| N/A   41C    P8    20W / 235W |      0MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |

In [32]:
! sbatch  --partition=nvidia_small  --ntasks-per-node=2  --nodes=2  --ntasks=4  ng2.srm

Submitted batch job 10353148


In [36]:
! squeue --name ng2 --format "%.19V  %.19S  %.8i  %.12P %.5j  %.2t %.5M %.5D %.4C"

        SUBMIT_TIME           START_TIME     JOBID     PARTITION  NAME  ST  TIME NODES CPUS
2021-12-18T14:23:17  2021-12-19T18:17:17  10353148  nvidia_small   ng2  PD  0:00     2    4


In [34]:
! squeue -u $(whoami) -h -r | wc -l

1


In [35]:
! squeue --partition=nvidia_small -h -r | wc -l

2


In [43]:
! squeue --name ng2 --format "%.19V  %.19S  %.8i  %.12P %.5j  %.2t %.5M %.5D %.4C"

        SUBMIT_TIME           START_TIME     JOBID     PARTITION  NAME  ST  TIME NODES CPUS
2021-12-18T14:23:17  2021-12-19T18:17:17  10353148  nvidia_small   ng2  PD  0:00     2    4


In [None]:
! cat /scratch${PWD#/prj}/slurm-10353148.out

# 211218: agora funcionou pq? voltou a funcionar de repente, sem explicação

In [45]:
! sbatch  --ntasks-per-node=2  --nodes=2  --ntasks=4  ng2.srm

Submitted batch job 10353218


In [46]:
! squeue --name ng2 --format "%.19V  %.19S  %.8i  %.12P %.5j  %.2t %.5M %.5D %.4C"

        SUBMIT_TIME           START_TIME     JOBID     PARTITION  NAME  ST  TIME NODES CPUS
2021-12-18T14:23:17  2021-12-19T18:17:17  10353148  nvidia_small   ng2  PD  0:00     2    4
2021-12-18T14:39:00  2021-12-18T14:39:00  10353218    nvidia_dev   ng2   R  0:03     2   48


In [47]:
! squeue -u $(whoami) -h -r | wc -l

2


In [44]:
! squeue --partition=nvidia_dev -h -r | wc -l

0


In [55]:
! squeue --name ng2 --format "%.19V  %.19S  %.8i  %.12P %.5j  %.2t %.5M %.5D %.4C"

        SUBMIT_TIME           START_TIME     JOBID     PARTITION  NAME  ST  TIME NODES CPUS
2021-12-18T14:23:17  2021-12-19T18:17:17  10353148  nvidia_small   ng2  PD  0:00     2    4


In [53]:
! cat /scratch${PWD#/prj}/slurm-10353218.out

- Job ID: 10353218
- Tasks per node: 2
- # of nodes in the job: 2
- # of tasks: 4
- Partition: nvidia_dev
- Dir from which sbatch was invoked: b715
- List of nodes allocated to the job: sdumont3075 sdumont3076
-- nvidia-smi ----------------------
sdumont3075
01:00.0 3D controller: NVIDIA Corporation GK110BGL [Tesla K40t] (rev a1)
	Subsystem: NVIDIA Corporation GK110BGL [Tesla K40t]
81:00.0 3D controller: NVIDIA Corporation GK110BGL [Tesla K40t] (rev a1)
	Subsystem: NVIDIA Corporation GK110BGL [Tesla K40t]
-- nvidia-smi ----------------------
Sat Dec 18 14:39:01 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.73.01    Driver Version: 460.73.01    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                   

In [12]:
! sbatch  --partition=nvidia_dev  --ntasks-per-node=2  --nodes=2  --ntasks=4  ng3.srm

Submitted batch job 10355992


In [13]:
! squeue --name=ng3 --format="%.19V  %.19S  %.8i  %.12P %.5j  %.2t %.5M %.5D %.4C"

        SUBMIT_TIME           START_TIME     JOBID     PARTITION  NAME  ST  TIME NODES CPUS
2021-12-19T08:55:43  2021-12-19T08:55:43  10355992    nvidia_dev   ng3   R  0:02     2   48


In [14]:
! squeue -u $(whoami) -h -r | wc -l

3


In [15]:
! squeue --partition=nvidia_dev -h -r | wc -l

0


In [16]:
! cat /scratch${PWD#/prj}/slurm-10355992.out

- Job ID: 10355992
- Tasks per node: 2
- # of nodes in the job: 2
- # of tasks: 4
- Partition: nvidia_dev
- Dir from which sbatch was invoked: b715
- List of nodes allocated to the job: sdumont3052 sdumont3053
-- modules ----------------------------
conda activate env2, --stack env3
sdumont3052
Sun Dec 19 08:55:52 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.73.01    Driver Version: 460.73.01    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K40t          On   | 00000000:01:00.0 Off |                    0 |
| N/A   42C    P8    21W / 235W |      0MiB / 11441MiB |      0%      Default |
|                               

# Retomando as medições

## 2x2=4(2)

In [17]:
! sbatch  --partition=nvidia_dev  --ntasks-per-node=2  --nodes=2  --ntasks=4  ng2.srm

Submitted batch job 10353622


In [18]:
! squeue --name ng2 --format "%.19V  %.19S  %.8i  %.12P %.5j  %.2t %.5M %.5D %.4C"

        SUBMIT_TIME           START_TIME     JOBID     PARTITION  NAME  ST  TIME NODES CPUS
2021-12-18T16:02:43  2021-12-18T16:02:45  10353622    nvidia_dev   ng2   R  0:01     2   48


In [19]:
! squeue -u $(whoami) -h -r | wc -l

1


In [20]:
! squeue --partition=nvidia_dev -h -r | wc -l

1


In [22]:
! squeue --name ng2 --format "%.19V  %.19S  %.8i  %.12P %.5j  %.2t %.5M %.5D %.4C"

        SUBMIT_TIME           START_TIME     JOBID     PARTITION  NAME  ST  TIME NODES CPUS


In [23]:
! cat /scratch${PWD#/prj}/slurm-10353622.out

- Job ID: 10353622
- Tasks per node: 2
- # of nodes in the job: 2
- # of tasks: 4
- Partition: nvidia_dev
- Dir from which sbatch was invoked: b715
- List of nodes allocated to the job: sdumont3075 sdumont3076
-- modules ----------------------------
conda activate env2, --stack env3
-- run --------------------------------
$ srun -n 4 python ng2.py
-- output -----------------------------
1. hostname    rank crank  cid
2. ----------- ---- ----- ----
3. sdumont3075   00    00   00
3. sdumont3075   01    01   01
3. sdumont3076   02    00   00
3. sdumont3076   03    01   01
4. ---------------------------
5. Heat:1500.0000, TT:28.2087, TK:2.3022, TC:25.5090, TE:0.0395, MPI:4
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


In [24]:
! sbatch  --partition=nvidia_dev  --ntasks-per-node=2  --nodes=2  --ntasks=4  ng2.srm

Submitted batch job 10353628


In [25]:
! squeue --name ng2 --format "%.19V  %.19S  %.8i  %.12P %.5j  %.2t %.5M %.5D %.4C"

        SUBMIT_TIME           START_TIME     JOBID     PARTITION  NAME  ST  TIME NODES CPUS
2021-12-18T16:04:05  2021-12-18T16:04:08  10353628    nvidia_dev   ng2   R  0:11     2   48


In [28]:
! squeue --name ng2 --format "%.19V  %.19S  %.8i  %.12P %.5j  %.2t %.5M %.5D %.4C"

        SUBMIT_TIME           START_TIME     JOBID     PARTITION  NAME  ST  TIME NODES CPUS


In [29]:
! cat /scratch${PWD#/prj}/slurm-10353628.out

- Job ID: 10353628
- Tasks per node: 2
- # of nodes in the job: 2
- # of tasks: 4
- Partition: nvidia_dev
- Dir from which sbatch was invoked: b715
- List of nodes allocated to the job: sdumont3075 sdumont3076
-- modules ----------------------------
conda activate env2, --stack env3
-- run --------------------------------
$ srun -n 4 python ng2.py
-- output -----------------------------
1. hostname    rank crank  cid
2. ----------- ---- ----- ----
3. sdumont3075   00    00   00
3. sdumont3075   01    01   01
3. sdumont3076   02    00   00
3. sdumont3076   03    01   01
4. ---------------------------
5. Heat:1500.0000, TT:26.5606, TK:0.6478, TC:25.5067, TE:0.0403, MPI:4
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


In [30]:
! sbatch  --partition=nvidia_dev  --ntasks-per-node=2  --nodes=2  --ntasks=4  ng2.srm

Submitted batch job 10353638


In [31]:
! squeue --name ng2 --format "%.19V  %.19S  %.8i  %.12P %.5j  %.2t %.5M %.5D %.4C"

        SUBMIT_TIME           START_TIME     JOBID     PARTITION  NAME  ST  TIME NODES CPUS
2021-12-18T16:06:23  2021-12-18T16:06:23  10353638    nvidia_dev   ng2   R  0:02     2   48


In [44]:
! squeue --job 10353638 --name ng2 --format "%.19V  %.19S  %.8i  %.12P %.5j  %.2t %.5M %.5D %.4C"

        SUBMIT_TIME           START_TIME     JOBID     PARTITION  NAME  ST  TIME NODES CPUS


In [45]:
! cat /scratch${PWD#/prj}/slurm-10353638.out

- Job ID: 10353638
- Tasks per node: 2
- # of nodes in the job: 2
- # of tasks: 4
- Partition: nvidia_dev
- Dir from which sbatch was invoked: b715
- List of nodes allocated to the job: sdumont3075 sdumont3076
-- modules ----------------------------
conda activate env2, --stack env3
-- run --------------------------------
$ srun -n 4 python ng2.py
-- output -----------------------------
1. hostname    rank crank  cid
2. ----------- ---- ----- ----
3. sdumont3075   00    00   00
3. sdumont3075   01    01   01
3. sdumont3076   02    00   00
3. sdumont3076   03    01   01
4. ---------------------------
5. Heat:1500.0000, TT:26.9706, TK:1.0652, TC:25.5190, TE:0.0393, MPI:4
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


## 3x3=9(5)

In [32]:
! sbatch  --partition nvidia_small  --ntasks-per-node=2  --nodes=5  --ntasks=9  ng2.srm

Submitted batch job 10353639


In [3]:
! cat /scratch${PWD#/prj}/slurm-10353639.out

- Job ID: 10353639
- Tasks per node: 2
- # of nodes in the job: 5
- # of tasks: 9
- Partition: nvidia_small
- Dir from which sbatch was invoked: b715
- List of nodes allocated to the job: sdumont3080 sdumont3137 sdumont3138 sdumont3139 sdumont3140
-- modules ----------------------------
conda activate env2, --stack env3
-- run --------------------------------
$ srun -n 9 python ng2.py
-- output -----------------------------
1. hostname    rank crank  cid
2. ----------- ---- ----- ----
3. sdumont3080   00    00   00
3. sdumont3080   01    01   01
3. sdumont3137   02    00   00
3. sdumont3137   03    01   01
3. sdumont3138   04    00   00
3. sdumont3138   05    01   01
3. sdumont3139   06    00   00
3. sdumont3139   07    01   01
3. sdumont3140   08    00   00
4. ---------------------------
5. Heat:1500.0000, TT:12.5650, TK:1.8618, TC:9.7379, TE:0.0168, MPI:9
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


In [33]:
! sbatch  --partition nvidia_small  --ntasks-per-node=2  --nodes=5  --ntasks=9  ng2.srm

Submitted batch job 10353640


In [4]:
! cat /scratch${PWD#/prj}/slurm-10353640.out

- Job ID: 10353640
- Tasks per node: 2
- # of nodes in the job: 5
- # of tasks: 9
- Partition: nvidia_small
- Dir from which sbatch was invoked: b715
- List of nodes allocated to the job: sdumont3080 sdumont3137 sdumont3138 sdumont3139 sdumont3140
-- modules ----------------------------
conda activate env2, --stack env3
-- run --------------------------------
$ srun -n 9 python ng2.py
-- output -----------------------------
1. hostname    rank crank  cid
2. ----------- ---- ----- ----
3. sdumont3080   00    00   00
3. sdumont3080   01    01   01
3. sdumont3137   02    00   00
3. sdumont3137   03    01   01
3. sdumont3138   04    00   00
3. sdumont3138   05    01   01
3. sdumont3139   06    00   00
3. sdumont3139   07    01   01
3. sdumont3140   08    00   00
4. ---------------------------
5. Heat:1500.0000, TT:11.2820, TK:0.5888, TC:9.7094, TE:0.0169, MPI:9
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


In [34]:
! sbatch  --partition nvidia_small  --ntasks-per-node=2  --nodes=5  --ntasks=9  ng2.srm

Submitted batch job 10353641


In [5]:
! cat /scratch${PWD#/prj}/slurm-10353641.out

- Job ID: 10353641
- Tasks per node: 2
- # of nodes in the job: 5
- # of tasks: 9
- Partition: nvidia_small
- Dir from which sbatch was invoked: b715
- List of nodes allocated to the job: sdumont3080 sdumont3137 sdumont3138 sdumont3139 sdumont3140
-- modules ----------------------------
conda activate env2, --stack env3
-- run --------------------------------
$ srun -n 9 python ng2.py
-- output -----------------------------
1. hostname    rank crank  cid
2. ----------- ---- ----- ----
3. sdumont3080   00    00   00
3. sdumont3080   01    01   01
3. sdumont3137   02    00   00
3. sdumont3137   03    01   01
3. sdumont3138   04    00   00
3. sdumont3138   05    01   01
3. sdumont3139   06    00   00
3. sdumont3139   07    01   01
3. sdumont3140   08    00   00
4. ---------------------------
5. Heat:1500.0000, TT:11.2837, TK:0.5719, TC:9.7297, TE:0.0169, MPI:9
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


## 4x4=16(8)

In [36]:
! sbatch  --partition nvidia_small  --ntasks-per-node=2  --nodes=8  --ntasks=16  ng2.srm

Submitted batch job 10353642


In [6]:
! cat /scratch${PWD#/prj}/slurm-10353642.out

- Job ID: 10353642
- Tasks per node: 2
- # of nodes in the job: 8
- # of tasks: 16
- Partition: nvidia_small
- Dir from which sbatch was invoked: b715
- List of nodes allocated to the job: sdumont3080 sdumont3081 sdumont3137 sdumont3138 sdumont3139 sdumont3140 sdumont3170 sdumont3171
-- modules ----------------------------
conda activate env2, --stack env3
-- run --------------------------------
$ srun -n 16 python ng2.py
-- output -----------------------------
Traceback (most recent call last):
  File "/scratch/ampemi/xxxx.xxxx/b715/ng2.py", line 68, in <module>
    cuda.select_device(crank)         # 'color rank' 0 = 'gpu id' 0, etc.
  File "/scratch/ampemi/xxxx.xxxx/env3/lib/python3.9/site-packages/numba/cuda/api.py", line 452, in select_device
    context = devices.get_context(device_id)
  File "/scratch/ampemi/xxxx.xxxx/env3/lib/python3.9/site-packages/numba/cuda/cudadrv/devices.py", line 212, in get_context
    return _runtime.get_or_create_context(devnum)
  File "/scratch/ampemi

In [37]:
! sbatch  --partition nvidia_small  --ntasks-per-node=2  --nodes=8  --ntasks=16  ng2.srm

Submitted batch job 10353643


In [7]:
! cat /scratch${PWD#/prj}/slurm-10353643.out

- Job ID: 10353643
- Tasks per node: 2
- # of nodes in the job: 8
- # of tasks: 16
- Partition: nvidia_small
- Dir from which sbatch was invoked: b715
- List of nodes allocated to the job: sdumont3080 sdumont3081 sdumont3137 sdumont3138 sdumont3139 sdumont3140 sdumont3170 sdumont3171
-- modules ----------------------------
conda activate env2, --stack env3
-- run --------------------------------
$ srun -n 16 python ng2.py
-- output -----------------------------
Traceback (most recent call last):
  File "/scratch/ampemi/xxxx.xxxx/b715/ng2.py", line 68, in <module>
    cuda.select_device(crank)         # 'color rank' 0 = 'gpu id' 0, etc.
  File "/scratch/ampemi/xxxx.xxxx/env3/lib/python3.9/site-packages/numba/cuda/api.py", line 452, in select_device
    context = devices.get_context(device_id)
  File "/scratch/ampemi/xxxx.xxxx/env3/lib/python3.9/site-packages/numba/cuda/cudadrv/devices.py", line 212, in get_context
    return _runtime.get_or_create_context(devnum)
  File "/scratch/ampemi

In [None]:
! sbatch  --partition nvidia_small  --ntasks-per-node=2  --nodes=8  --ntasks=16  ng2.srm

In [9]:
! cat /scratch${PWD#/prj}/slurm-10353644.out

- Job ID: 10353644
- Tasks per node: 2
- # of nodes in the job: 8
- # of tasks: 16
- Partition: nvidia_small
- Dir from which sbatch was invoked: b715
- List of nodes allocated to the job: sdumont3080 sdumont3081 sdumont3137 sdumont3138 sdumont3139 sdumont3140 sdumont3170 sdumont3171
-- modules ----------------------------
conda activate env2, --stack env3
-- run --------------------------------
$ srun -n 16 python ng2.py
-- output -----------------------------
Traceback (most recent call last):
  File "/scratch/ampemi/xxxx.xxxx/b715/ng2.py", line 68, in <module>
    cuda.select_device(crank)         # 'color rank' 0 = 'gpu id' 0, etc.
  File "/scratch/ampemi/xxxx.xxxx/env3/lib/python3.9/site-packages/numba/cuda/api.py", line 452, in select_device
    context = devices.get_context(device_id)
  File "/scratch/ampemi/xxxx.xxxx/env3/lib/python3.9/site-packages/numba/cuda/cudadrv/devices.py", line 212, in get_context
    return _runtime.get_or_create_context(devnum)
  File "/scratch/ampemi

## SDumont está com erros intermitentes, se roda de novo funciona:

In [17]:
%%bash
sbatch  --partition nvidia_small  --ntasks-per-node=2  --nodes=8  --ntasks=16  ng2.srm
sbatch  --partition nvidia_small  --ntasks-per-node=2  --nodes=8  --ntasks=16  ng2.srm
sbatch  --partition nvidia_small  --ntasks-per-node=2  --nodes=8  --ntasks=16  ng2.srm

Submitted batch job 10356006
Submitted batch job 10356007
Submitted batch job 10356008


In [23]:
! cat /scratch${PWD#/prj}/slurm-10356006.out

- Job ID: 10356006
- Tasks per node: 2
- # of nodes in the job: 8
- # of tasks: 16
- Partition: nvidia_small
- Dir from which sbatch was invoked: b715
- List of nodes allocated to the job: sdumont3047 sdumont3048 sdumont3049 sdumont3052 sdumont3137 sdumont3138 sdumont3139 sdumont3140
-- modules ----------------------------
conda activate env2, --stack env3
-- run --------------------------------
$ srun -n 16 python ng2.py
-- output -----------------------------
1. hostname    rank crank  cid
2. ----------- ---- ----- ----
3. sdumont3047   00    00   00
3. sdumont3047   01    01   01
3. sdumont3048   02    00   00
3. sdumont3048   03    01   01
3. sdumont3049   04    00   00
3. sdumont3049   05    01   01
3. sdumont3052   06    00   00
3. sdumont3052   07    01   01
3. sdumont3137   08    00   00
3. sdumont3137   09    01   01
3. sdumont3138   10    00   00
3. sdumont3138   11    01   01
3. sdumont3139   12    00   00
3. sdumont3139   13    01   01
3. sdumont3140   14    00   00
3. sdum

In [24]:
! cat /scratch${PWD#/prj}/slurm-10356007.out

- Job ID: 10356007
- Tasks per node: 2
- # of nodes in the job: 8
- # of tasks: 16
- Partition: nvidia_small
- Dir from which sbatch was invoked: b715
- List of nodes allocated to the job: sdumont3053 sdumont3156 sdumont3157 sdumont3168 sdumont3169 sdumont3172 sdumont3173 sdumont3174
-- modules ----------------------------
conda activate env2, --stack env3
-- run --------------------------------
$ srun -n 16 python ng2.py
-- output -----------------------------
1. hostname    rank crank  cid
2. ----------- ---- ----- ----
3. sdumont3053   00    00   00
3. sdumont3053   01    01   01
3. sdumont3156   02    00   00
3. sdumont3156   03    01   01
3. sdumont3157   04    00   00
3. sdumont3157   05    01   01
3. sdumont3168   06    00   00
3. sdumont3168   07    01   01
3. sdumont3169   08    00   00
3. sdumont3169   09    01   01
3. sdumont3172   10    00   00
3. sdumont3172   11    01   01
3. sdumont3173   12    00   00
3. sdumont3173   13    01   01
3. sdumont3174   14    00   00
3. sdum

In [25]:
! cat /scratch${PWD#/prj}/slurm-10356008.out

- Job ID: 10356008
- Tasks per node: 2
- # of nodes in the job: 8
- # of tasks: 16
- Partition: nvidia_small
- Dir from which sbatch was invoked: b715
- List of nodes allocated to the job: sdumont3047 sdumont3048 sdumont3049 sdumont3052 sdumont3137 sdumont3138 sdumont3139 sdumont3140
-- modules ----------------------------
conda activate env2, --stack env3
-- run --------------------------------
$ srun -n 16 python ng2.py
-- output -----------------------------
1. hostname    rank crank  cid
2. ----------- ---- ----- ----
3. sdumont3047   00    00   00
3. sdumont3047   01    01   01
3. sdumont3048   02    00   00
3. sdumont3048   03    01   01
3. sdumont3049   04    00   00
3. sdumont3049   05    01   01
3. sdumont3052   06    00   00
3. sdumont3052   07    01   01
3. sdumont3137   08    00   00
3. sdumont3137   09    01   01
3. sdumont3138   10    00   00
3. sdumont3138   11    01   01
3. sdumont3139   12    00   00
3. sdumont3139   13    01   01
3. sdumont3140   14    00   00
3. sdum

## 6x6=36(18)

In [39]:
! sbatch  --partition nvidia_small  --ntasks-per-node=2  --nodes=18  --ntasks=36  ng2.srm

Submitted batch job 10353645


In [None]:
! squeue --job 10353645 --name ng2 --partition nvidia_small --format "%.19V  %.19S  %.8i  %.12P %.5j  %.2t %.5M %.5D %.4C"

In [2]:
! cat /scratch${PWD#/prj}/slurm-10353645.out

- Job ID: 10353645
- Tasks per node: 2
- # of nodes in the job: 18
- # of tasks: 36
- Partition: nvidia_small
- Dir from which sbatch was invoked: b715
- List of nodes allocated to the job: sdumont3057 sdumont3058 sdumont3059 sdumont3060 sdumont3061 sdumont3062 sdumont3063 sdumont3148 sdumont3149 sdumont3150 sdumont3151 sdumont3152 sdumont3153 sdumont3154 sdumont3155 sdumont3156 sdumont3157 sdumont3166
-- modules ----------------------------
conda activate env2, --stack env3
-- run --------------------------------
$ srun -n 36 python ng2.py
-- output -----------------------------
1. hostname    rank crank  cid
2. ----------- ---- ----- ----
3. sdumont3057   00    00   00
3. sdumont3057   01    01   01
3. sdumont3058   02    00   00
3. sdumont3058   03    01   01
3. sdumont3059   04    00   00
3. sdumont3059   05    01   01
3. sdumont3060   06    00   00
3. sdumont3060   07    01   01
3. sdumont3061   08    00   00
3. sdumont3061   09    01   01
3. sdumont3062   10    00   00
3. sdumont

In [40]:
! sbatch  --partition nvidia_small  --ntasks-per-node=2  --nodes=18  --ntasks=36  ng2.srm

Submitted batch job 10353647


In [None]:
! squeue --job 10353647 --name ng2 --partition nvidia_small --format "%.19V  %.19S  %.8i  %.12P %.5j  %.2t %.5M %.5D %.4C"

In [3]:
! cat /scratch${PWD#/prj}/slurm-10353647.out

- Job ID: 10353647
- Tasks per node: 2
- # of nodes in the job: 18
- # of tasks: 36
- Partition: nvidia_small
- Dir from which sbatch was invoked: b715
- List of nodes allocated to the job: sdumont3057 sdumont3058 sdumont3059 sdumont3060 sdumont3061 sdumont3062 sdumont3063 sdumont3148 sdumont3149 sdumont3150 sdumont3151 sdumont3152 sdumont3153 sdumont3154 sdumont3155 sdumont3156 sdumont3157 sdumont3166
-- modules ----------------------------
conda activate env2, --stack env3
-- run --------------------------------
$ srun -n 36 python ng2.py
-- output -----------------------------
1. hostname    rank crank  cid
2. ----------- ---- ----- ----
3. sdumont3057   00    00   00
3. sdumont3057   01    01   01
3. sdumont3058   02    00   00
3. sdumont3058   03    01   01
3. sdumont3059   04    00   00
3. sdumont3059   05    01   01
3. sdumont3060   06    00   00
3. sdumont3060   07    01   01
3. sdumont3061   08    00   00
3. sdumont3061   09    01   01
3. sdumont3062   10    00   00
3. sdumont

In [41]:
! sbatch  --partition nvidia_small  --ntasks-per-node=2  --nodes=18  --ntasks=36  ng2.srm

Submitted batch job 10353648


In [None]:
! squeue --job 10353648 --name ng2 --partition nvidia_small --format "%.19V  %.19S  %.8i  %.12P %.5j  %.2t %.5M %.5D %.4C"

In [4]:
! cat /scratch${PWD#/prj}/slurm-10353648.out

- Job ID: 10353648
- Tasks per node: 2
- # of nodes in the job: 18
- # of tasks: 36
- Partition: nvidia_small
- Dir from which sbatch was invoked: b715
- List of nodes allocated to the job: sdumont3057 sdumont3058 sdumont3059 sdumont3060 sdumont3061 sdumont3062 sdumont3063 sdumont3148 sdumont3149 sdumont3150 sdumont3151 sdumont3152 sdumont3153 sdumont3154 sdumont3155 sdumont3156 sdumont3157 sdumont3166
-- modules ----------------------------
conda activate env2, --stack env3
-- run --------------------------------
$ srun -n 36 python ng2.py
-- output -----------------------------
1. hostname    rank crank  cid
2. ----------- ---- ----- ----
3. sdumont3057   00    00   00
3. sdumont3057   01    01   01
3. sdumont3058   02    00   00
3. sdumont3058   03    01   01
3. sdumont3059   04    00   00
3. sdumont3059   05    01   01
3. sdumont3060   06    00   00
3. sdumont3060   07    01   01
3. sdumont3061   08    00   00
3. sdumont3061   09    01   01
3. sdumont3062   10    00   00
3. sdumont

In [46]:
! squeue --name ng2 --partition nvidia_small --format "%.19V  %.19S  %.8i  %.12P %.5j  %.2t %.5M %.5D %.4C"

        SUBMIT_TIME           START_TIME     JOBID     PARTITION  NAME  ST  TIME NODES CPUS
2021-12-18T16:06:37  2021-12-19T18:17:17  10353639  nvidia_small   ng2  PD  0:00     5    9
2021-12-18T16:06:40  2021-12-19T18:17:17  10353640  nvidia_small   ng2  PD  0:00     5    9
2021-12-18T16:06:43  2021-12-19T18:17:17  10353641  nvidia_small   ng2  PD  0:00     5    9
2021-12-18T16:06:57  2021-12-19T18:17:17  10353642  nvidia_small   ng2  PD  0:00     8   16
2021-12-18T16:07:01  2021-12-19T18:17:17  10353643  nvidia_small   ng2  PD  0:00     8   16
2021-12-18T16:07:03  2021-12-19T18:17:17  10353644  nvidia_small   ng2  PD  0:00     8   16
2021-12-18T16:07:04  2021-12-19T18:17:17  10353645  nvidia_small   ng2  PD  0:00    18   36
2021-12-18T16:07:06  2021-12-19T18:17:17  10353647  nvidia_small   ng2  PD  0:00    18   36
2021-12-18T16:07:08  2021-12-19T18:17:17  10353648  nvidia_small   ng2  PD  0:00    18   36


In [1]:
! squeue --name ng2 --partition nvidia_small --format "%.19V  %.19S  %.8i  %.12P %.5j  %.2t %.5M %.5D %.4C"

        SUBMIT_TIME           START_TIME     JOBID     PARTITION  NAME  ST  TIME NODES CPUS
