# Stencil Cython MPI

In [7]:
%%writefile scp2.pyx
#cython: language_level=3
#cython: cdivision=True
#cython: initializedcheck=False
#cython: infer_types=True
#cython: wraparound=False
#cython: boundscheck=False

import numpy as np

cpdef stp(double[:,::1] anew, double[:,::1] aold, Py_ssize_t by, Py_ssize_t bx) :
    for i in range(1, bx+1) :
        for j in range(1, by+1) :
            anew[i,j] =  ( aold[i,j] / 2.0 + 
                         ( aold[i-1,j] + aold[i+1,j] + 
                           aold[i,j-1] + aold[i,j+1] ) / 8.0 )

Writing scp2.pyx


In [10]:
%%writefile setup2.py
from setuptools import setup
from Cython.Build import cythonize

setup(
    ext_modules = cythonize("scp2.pyx", force=True)
)

Writing setup2.py


In [11]:
%%bash
rm scp2.*.so  # clean
python setup2.py build_ext --inplace

[1/1] Cythonizing scp2.pyx
running build_ext
building 'scp2' extension
gcc -pthread -B /scratch/app/anaconda3/2018.12/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -I/scratch/app/anaconda3/2018.12/include/python3.7m -c scp2.c -o build/temp.linux-x86_64-3.7/scp2.o
gcc -pthread -shared -B /scratch/app/anaconda3/2018.12/compiler_compat -L/scratch/app/anaconda3/2018.12/lib -Wl,-rpath=/scratch/app/anaconda3/2018.12/lib -Wl,--no-as-needed -Wl,--sysroot=/ build/temp.linux-x86_64-3.7/scp2.o -o build/lib.linux-x86_64-3.7/scp2.cpython-37m-x86_64-linux-gnu.so
copying build/lib.linux-x86_64-3.7/scp2.cpython-37m-x86_64-linux-gnu.so -> 


rm: cannot remove ‘scp2.*.so’: No such file or directory


In [12]:
import scp2
print(scp2.stp.__doc__)

None


In [20]:
%%writefile st-cy-par.py
import numpy as np
import time
from mpi4py import MPI   
import scp2

n            = 4800    # nxn grid (4800,1,500)=1500; (100,1,10)=30
energy       = 1.0     # energy to be injected per iteration
niters       = 500     # number of iterations

nsources     = 3       # sources of energy
size         = n + 2
heat         = np.zeros((1), np.float64)     # system total heat
anew         = np.zeros((size, size), np.float64)
aold         = np.zeros((size, size), np.float64)
sources      = np.empty((3,2), np.int32)
sources[:,:] = [ [n//2, n//2], [n//3, n//3], [n*4//5, n*8//9] ]
niters       = (niters+1) // 2

comm = MPI.COMM_WORLD
mpirank = comm.rank
mpisize = comm.size

# cria e inicializa as fontes de calor
nsources = 3
sources = np.zeros((nsources, 2), np.intc)
sources[:,:] = [ [n//2, n//2], [n//3, n//3], [n*4//5, n*8//9] ]

# sources in my area, local to my rank
locnsources = 0
locsources = np.empty((nsources,2), np.intc)

rheat = np.zeros(1, np.double)
bheat = np.zeros(1, np.double)

# determine my coordinates (x,y)
pdims = MPI.Compute_dims(mpisize, 2)
px    = pdims[0]
py    = pdims[1]
rx    = mpirank % px
ry    = mpirank // px

# determine my four neighbors
north = (ry - 1) * px + rx
if (ry - 1) < 0 :
    north = MPI.PROC_NULL
south = (ry + 1) * px + rx
if (ry + 1) >= py :
    south = MPI.PROC_NULL
west = ry * px + rx - 1
if (rx - 1) < 0 :
    west = MPI.PROC_NULL
east = ry * px + rx + 1
if (rx + 1) >= px :
    east = MPI.PROC_NULL

# decompose the domain
bx = n // px            # block size in x
by = n // py            # block size in y
offx = rx * bx + 1      # offset in x
offy = ry * by + 1      # offset in y

# determine which sources are in my patch
for i in range(nsources) :
    locx = sources[i, 0] - offx
    locy = sources[i, 1] - offy
    if(locx >= 0 and locx <= bx and locy >= 0 and locy <= by) :
        locsources[locnsources, 0] = locx + 2 - 1
        locsources[locnsources, 1] = locy + 2 - 1
        locnsources += 1

# working arrays with 1-wide halo zones
anew = np.zeros((bx+2, by+2), np.double)
aold = np.zeros((bx+2, by+2), np.double)

if not mpirank : t0 = time.time()

for iters in range(niters) :
    # exchange data with neighbors
    if north != MPI.PROC_NULL :
        r1=comm.irecv(source=north, tag=1)
        s1=comm.isend(aold[1, 1:bx+1], dest=north, tag=1)
    if south != MPI.PROC_NULL :
        r2=comm.irecv(source=south, tag=1)
        s2=comm.isend(aold[bx, 1:bx+1], dest=south, tag=1)
    if east != MPI.PROC_NULL :
        r3 = comm.irecv(source=east, tag=1)
        s3 = comm.isend(aold[1:bx+1, bx], dest=east, tag=1)
    if west != MPI.PROC_NULL :
        r4 = comm.irecv(source=west, tag=1)
        s4 = comm.isend(aold[1:bx+1, 1], dest=west, tag=1)
    # wait
    if north != MPI.PROC_NULL :
        s1.wait()
        aold[0, 1:bx+1] = r1.wait()
    if south != MPI.PROC_NULL :
        s2.wait()
        aold[bx+1, 1:bx+1] = r2.wait()
    if east != MPI.PROC_NULL :
        s3.wait()
        aold[1:bx+1, bx+1] = r3.wait()
    if west != MPI.PROC_NULL :
        s4.wait
        aold[1:bx+1, 0] = r4.wait()

    # update grid
    scp2.stp(anew, aold, bx, by)

    # refresh heat sources
    for i in range(locnsources) :
        anew[locsources[i, 0]-1, locsources[i, 1]-1] += energy

    # exchange data with neighbors
    if north != MPI.PROC_NULL :
        r1=comm.irecv(source=north, tag=1)
        s1=comm.isend(anew[1, 1:bx+1], dest=north, tag=1)
    if south != MPI.PROC_NULL :
        r2=comm.irecv(source=south, tag=1)
        s2=comm.isend(anew[bx, 1:bx+1], dest=south, tag=1)
    if east != MPI.PROC_NULL :
        r3 = comm.irecv(source=east, tag=1)
        s3 = comm.isend(anew[1:bx+1, bx], dest=east, tag=1)
    if west != MPI.PROC_NULL :
        r4 = comm.irecv(source=west, tag=1)
        s4 = comm.isend(anew[1:bx+1, 1], dest=west, tag=1)
    # wait
    if north != MPI.PROC_NULL :
        s1.wait()
        anew[0, 1:bx+1] = r1.wait()
    if south != MPI.PROC_NULL :
        s2.wait()
        anew[bx+1, 1:bx+1] = r2.wait()
    if east != MPI.PROC_NULL :
        s3.wait()
        anew[1:bx+1, bx+1] = r3.wait()
    if west != MPI.PROC_NULL :
        s4.wait
        anew[1:bx+1, 0] = r4.wait()

    # update grid
    scp2.stp(aold, anew, bx, by)

    # refresh heat sources
    for i in range(locnsources) :
        aold[locsources[i, 0]-1, locsources[i, 1]-1] += energy 

# get final heat in the system
bheat[0] = np.sum(aold[1:-1, 1:-1])
comm.Reduce(bheat, rheat)

if not mpirank :
    t1 = MPI.Wtime() - t0
    print('Heat={:0.4f} | Time={:0.4f} | MPISize={:d} | Dim={:d},{:d} | bx,by={:d},{:d}'
          .format(rheat[0], t1, mpisize, pdims[0], pdims[1], bx, by))

Overwriting st-cy-par.py


In [22]:
%%bash
s='/prj/ampemi/xxxx.xxxx/stnc/Cython'
d='/scratch/ampemi/xxxx.xxxx/stnc/Cython'
cp  $s/scp2.*.so  $s/st-cy-par.py  $d

In [30]:
%%writefile st-cy-par_81.srm
#!/bin/bash
#SBATCH --ntasks=81            # Total tasks(CPUs)
#SBATCH -p cpu_small           # Select partition
#SBATCH -J stcypar             # Job name
#SBATCH --time=00:10:00        # Limit execution time

echo '========================================'
echo '- Job ID:' $SLURM_JOB_ID
echo '- Number of tasks per node:' $SLURM_NTASKS_PER_NODE
echo '- Number of nodes:' $SLURM_JOB_NUM_NODES
echo '- Total tasks:' $SLURM_NTASKS
echo '- Nodes alocated:' $SLURM_JOB_NODELIST
echo '- Directory where sbatch was called ($SLURM_SUBMIT_DIR):'
echo $SLURM_SUBMIT_DIR
cd $SLURM_SUBMIT_DIR
nodeset -e $SLURM_JOB_NODELIST

# Working dir
cd /scratch/ampemi/xxxx.xxxx/stnc/Cython

# Module
module load anaconda3/2018.12

# Executable
EXEC='python st-cy-par.py'

# Run
echo '-- srun -------------------------------'
echo '$ srun --mpi=pmi2 -n' $SLURM_NTASKS $EXEC
srun --mpi=pmi2 -n $SLURM_NTASKS $EXEC
echo '-- END --------------------------------'

Overwriting st-cy-par_81.srm


<hr style="height:10px;border-width:0;background-color:green">

Submit:

In [31]:
%%bash
# 1x1=1, 2x2=4, 3x3=9, 4x4=16, 6x6=36, 7x7=49, 8x8=64, 9x9=81
sbatch st-cy-par_01.srm
sbatch st-cy-par_04.srm
sbatch st-cy-par_09.srm
sbatch st-cy-par_16.srm
sbatch st-cy-par_36.srm
sbatch st-cy-par_49.srm
sbatch st-cy-par_64.srm
sbatch st-cy-par_81.srm

Submitted batch job 781875
Submitted batch job 781876
Submitted batch job 781877
Submitted batch job 781878
Submitted batch job 781879
Submitted batch job 781880
Submitted batch job 781881
Submitted batch job 781882


In [32]:
! squeue -n stcypar

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
            781875 cpu_small  stcypar xxxx. PD       0:00      1 (Priority)
            781876 cpu_small  stcypar xxxx. PD       0:00      1 (Priority)
            781877 cpu_small  stcypar xxxx. PD       0:00      1 (Priority)
            781878 cpu_small  stcypar xxxx. PD       0:00      1 (Priority)
            781879 cpu_small  stcypar xxxx. PD       0:00      2 (Priority)
            781880 cpu_small  stcypar xxxx. PD       0:00      3 (Priority)
            781881 cpu_small  stcypar xxxx. PD       0:00      3 (Priority)
            781882 cpu_small  stcypar xxxx. PD       0:00      4 (Priority)


Output:

In [40]:
%%bash
d='/scratch/ampemi/xxxx.xxxx/stnc/Cython'
cat $d/slurm-781875.out
cat $d/slurm-781876.out
cat $d/slurm-781877.out
cat $d/slurm-781878.out
cat $d/slurm-781879.out
cat $d/slurm-781880.out
cat $d/slurm-781881.out
cat $d/slurm-781882.out

- Job ID: 781875
- Number of tasks per node:
- Number of nodes: 1
- Total tasks: 1
- Nodes alocated: sdumont1413
- Directory where sbatch was called ($SLURM_SUBMIT_DIR):
/prj/ampemi/xxxx.xxxx/stnc/Cython
sdumont1413
-- srun -------------------------------
$ srun --mpi=pmi2 -n 1 python st-cy-par.py
Heat=1500.0000 | Time=23.9610 | MPISize=1 | Dim=1,1 | bx,by=4800,4800
-- END --------------------------------
- Job ID: 781876
- Number of tasks per node:
- Number of nodes: 1
- Total tasks: 4
- Nodes alocated: sdumont1454
- Directory where sbatch was called ($SLURM_SUBMIT_DIR):
/prj/ampemi/xxxx.xxxx/stnc/Cython
sdumont1454
-- srun -------------------------------
$ srun --mpi=pmi2 -n 4 python st-cy-par.py
Heat=1500.0000 | Time=7.5115 | MPISize=4 | Dim=2,2 | bx,by=2400,2400
-- END --------------------------------
- Job ID: 781877
- Number of tasks per node:
- Number of nodes: 1
- Total tasks: 9
- Nodes alocated: sdumont1464
- Directory where sbatch was called ($SLURM_SUBMIT_DIR):
/prj/ampemi/x

<hr style="height:10px;border-width:0;background-color:green">

Second measurement:

In [2]:
! squeue -u xxxx.xxxx | grep -c ^

47


In [3]:
%%bash
# 1x1=1, 2x2=4, 3x3=9, 4x4=16, 6x6=36, 7x7=49, 8x8=64, 9x9=81
sbatch st-cy-par_01.srm
sbatch st-cy-par_04.srm
sbatch st-cy-par_09.srm
sbatch st-cy-par_16.srm
sbatch st-cy-par_36.srm
sbatch st-cy-par_49.srm
sbatch st-cy-par_64.srm
sbatch st-cy-par_81.srm

Submitted batch job 788084
Submitted batch job 788085
Submitted batch job 788086
Submitted batch job 788087
Submitted batch job 788088
Submitted batch job 788089
Submitted batch job 788090
Submitted batch job 788091


In [4]:
! squeue -n stcypar

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
            788084 cpu_small  stcypar xxxx. PD       0:00      1 (Priority)
            788085 cpu_small  stcypar xxxx. PD       0:00      1 (Priority)
            788086 cpu_small  stcypar xxxx. PD       0:00      1 (Priority)
            788087 cpu_small  stcypar xxxx. PD       0:00      1 (Priority)
            788088 cpu_small  stcypar xxxx. PD       0:00      2 (Priority)
            788089 cpu_small  stcypar xxxx. PD       0:00      3 (Priority)
            788090 cpu_small  stcypar xxxx. PD       0:00      3 (Priority)
            788091 cpu_small  stcypar xxxx. PD       0:00      4 (Priority)


In [4]:
%%bash
d='/scratch/ampemi/xxxx.xxxx/stnc/Cython'
cat $d/slurm-788084.out
cat $d/slurm-788085.out
cat $d/slurm-788086.out
cat $d/slurm-788087.out
cat $d/slurm-788088.out
cat $d/slurm-788089.out
cat $d/slurm-788090.out
cat $d/slurm-788091.out

- Job ID: 788084
- Number of tasks per node:
- Number of nodes: 1
- Total tasks: 1
- Nodes alocated: sdumont1149
- Directory where sbatch was called ($SLURM_SUBMIT_DIR):
/prj/ampemi/xxxx.xxxx/stnc/Cython
sdumont1149
-- srun -------------------------------
$ srun --mpi=pmi2 -n 1 python st-cy-par.py
Heat=1500.0000 | Time=24.0083 | MPISize=1 | Dim=1,1 | bx,by=4800,4800
-- END --------------------------------
- Job ID: 788085
- Number of tasks per node:
- Number of nodes: 1
- Total tasks: 4
- Nodes alocated: sdumont1149
- Directory where sbatch was called ($SLURM_SUBMIT_DIR):
/prj/ampemi/xxxx.xxxx/stnc/Cython
sdumont1149
-- srun -------------------------------
$ srun --mpi=pmi2 -n 4 python st-cy-par.py
Heat=1500.0000 | Time=7.4499 | MPISize=4 | Dim=2,2 | bx,by=2400,2400
-- END --------------------------------
- Job ID: 788086
- Number of tasks per node:
- Number of nodes: 1
- Total tasks: 9
- Nodes alocated: sdumont1149
- Directory where sbatch was called ($SLURM_SUBMIT_DIR):
/prj/ampemi/x

<hr style="height:10px;border-width:0;background-color:green">

Third measurement:

In [6]:
%%bash
# 1x1=1, 2x2=4, 3x3=9, 4x4=16, 6x6=36, 7x7=49, 8x8=64, 9x9=81
sbatch st-cy-par_01.srm
sbatch st-cy-par_04.srm
sbatch st-cy-par_09.srm
sbatch st-cy-par_16.srm
sbatch st-cy-par_36.srm
sbatch st-cy-par_49.srm
sbatch st-cy-par_64.srm
sbatch st-cy-par_81.srm

Submitted batch job 788094
Submitted batch job 788095
Submitted batch job 788096
Submitted batch job 788097
Submitted batch job 788098
Submitted batch job 788099
Submitted batch job 788100
Submitted batch job 788101


In [7]:
! squeue -n stcypar

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
            788091 cpu_small  stcypar xxxx. PD       0:00      4 (Resources)
            788089 cpu_small  stcypar xxxx. PD       0:00      3 (Resources)
            788090 cpu_small  stcypar xxxx. PD       0:00      3 (Resources)
            788084 cpu_small  stcypar xxxx. PD       0:00      1 (Resources)
            788085 cpu_small  stcypar xxxx. PD       0:00      1 (Resources)
            788086 cpu_small  stcypar xxxx. PD       0:00      1 (Resources)
            788087 cpu_small  stcypar xxxx. PD       0:00      1 (Resources)
            788088 cpu_small  stcypar xxxx. PD       0:00      2 (Resources)
            788094 cpu_small  stcypar xxxx. PD       0:00      1 (Priority)
            788095 cpu_small  stcypar xxxx. PD       0:00      1 (Priority)
            788096 cpu_small  stcypar xxxx. PD       0:00      1 (Priority)
            788097 cpu_small  stcypar xxxx. PD       0:00      1 (Prior

In [2]:
%%bash
d='/scratch/ampemi/xxxx.xxxx/stnc/Cython'
cat $d/slurm-788094.out
cat $d/slurm-788095.out
cat $d/slurm-788096.out
cat $d/slurm-788097.out
cat $d/slurm-788098.out
cat $d/slurm-788099.out
cat $d/slurm-788100.out
cat $d/slurm-788101.out

- Job ID: 788094
- Number of tasks per node:
- Number of nodes: 1
- Total tasks: 1
- Nodes alocated: sdumont1149
- Directory where sbatch was called ($SLURM_SUBMIT_DIR):
/prj/ampemi/xxxx.xxxx/stnc/Cython
sdumont1149
-- srun -------------------------------
$ srun --mpi=pmi2 -n 1 python st-cy-par.py
Heat=1500.0000 | Time=23.9768 | MPISize=1 | Dim=1,1 | bx,by=4800,4800
-- END --------------------------------
- Job ID: 788095
- Number of tasks per node:
- Number of nodes: 1
- Total tasks: 4
- Nodes alocated: sdumont1149
- Directory where sbatch was called ($SLURM_SUBMIT_DIR):
/prj/ampemi/xxxx.xxxx/stnc/Cython
sdumont1149
-- srun -------------------------------
$ srun --mpi=pmi2 -n 4 python st-cy-par.py
Heat=1500.0000 | Time=7.4049 | MPISize=4 | Dim=2,2 | bx,by=2400,2400
-- END --------------------------------
- Job ID: 788096
- Number of tasks per node:
- Number of nodes: 1
- Total tasks: 9
- Nodes alocated: sdumont1149
- Directory where sbatch was called ($SLURM_SUBMIT_DIR):
/prj/ampemi/x

<hr style="height:10px;border-width:0;background-color:red">

## Version

In [1]:
! cython --version

Cython version 0.29.2


In [2]:
! gcc --version

gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-36)
Copyright (C) 2015 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.



In [57]:
%%writefile setup.py
from setuptools import setup
from Cython.Build import cythonize

setup(
    ext_modules = cythonize("scp.pyx", force=True)
)

Overwriting setup.py
