# FFTW C2C Cython MPI

In [41]:
#=--------------------------------------------------------------------=#

To avoid srun errors, it is necessary to create a stacked conda environment, containing some updated packages (.../env3 can have another name):

    conda create --prefix .../env3 python=3.9

Stack and install packages

    conda activate --stack .../env3
    conda install -c conda-forge cython pyfftw mpi4py mpi4py-fft

Activate when using the environment

    conda activate --stack .../env3

In [2]:
! python --version

Python 3.9.4


In [4]:
! cython --version

Cython version 0.29.23


In [11]:
%%writefile cc2cp.pyx
#cython: boundscheck=False, wraparound=False, cdivision=True
#cython: initializedcheck=False, language_level=3, infer_types=True
import numpy as np, time as tm
from mpi4py_fft import PFFT, newDistArray
from mpi4py import MPI

def ffp():
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()

    t0 = tm.time()    # time measurement

    # data
    L = M = N = 576
    NA = np.array([N, M, L], dtype=int)
    f = PFFT(comm, NA, dtype=np.complex128, backend='pyfftw')
    u = newDistArray(f, False)
    for k in range (u.shape[2]) :
        for j in range(u.shape[1]) :
            for i in range(u.shape[0]) :
                u[i, j, k] = i + j + k + 3
    u.real = np.sin ( u.real )
    u.imag = 0

    t1 = tm.time()    # time measurement
    
    # FFT
    u_hat = f.forward(u, normalize=False)
    # checksum
    rs = np.array(0, dtype=np.complex128)
    s = np.array(np.sum(u_hat), dtype=np.complex128)
    comm.Reduce([s, MPI.DOUBLE_COMPLEX], [rs, MPI.DOUBLE_COMPLEX],
                op=MPI.SUM, root=0)
    
    t2 = tm.time()    # time measurement
    
    return rs, L, size, rank, t0, t1, t2

Overwriting cc2cp.pyx


In [12]:
%%writefile setup.py
from distutils.core import setup
from distutils.extension import Extension
from Cython.Distutils import build_ext
setup(
    name='cc2cp',
    ext_modules=[
        Extension('cc2cp',
            sources=['cc2cp.pyx'],
            extra_compile_args=['-O3']
        )
    ],
    cmdclass = {'build_ext': build_ext}
)

Overwriting setup.py


In [13]:
%%bash
rm -rf cc2cp*.so build
python setup.py build_ext --inplace

running build_ext
cythoning cc2cp.pyx to cc2cp.c
building 'cc2cp' extension
creating build
creating build/temp.linux-x86_64-3.9
/scratch/ampemi/eduardo.miranda2/env2/bin/x86_64-conda_cos6-linux-gnu-cc -Wno-unused-result -Wsign-compare -DNDEBUG -O2 -Wall -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /scratch/ampemi/eduardo.miranda2/env3/include -fno-semantic-interposition -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /scratch/ampemi/eduardo.miranda2/env3/include -fno-semantic-interposition -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /scratch/ampemi/eduardo.miranda2/env2/include -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /scratch/ampemi/eduardo.miranda2/env2/include -fPIC -I/scratch/ampemi/eduardo.miranda2/env3/include/python3.9 -c cc2cp.c -o build/temp.

In [8]:
! ls cc2cp*.so

cc2cp.cpython-39-x86_64-linux-gnu.so


In [14]:
%%writefile cc2cp_c.py
import numpy as np
import time as tm
import cc2cp

t3 = tm.time()    # time measurement

s, l, n, r, t0, t1, t2 = cc2cp.ffp()

t4 = tm.time()    # time measurement

if r == 0 :
    print(f"S:{s*1E-5:.0f}", end='')
    print(f", L:{l:0g}", end='')
    print(f", N:{n:0g}", end='')
    print(f", T1:{t1-t0:.4f}", end='')
    print(f", TF:{t2-t1:.4f}", end='')
    print(f", TT:{t2-t0:.4f}", end='')
    print(f", TO:{t4-t3:.4f}")

Overwriting cc2cp_c.py


In [16]:
! mpiexec -n 16 python cc2cp_c.py

S:270-0j, L:576, N:16, T1:5.3873, TF:4.2265, TT:9.6138, TO:9.6292


#### Copy files to /scratch

In [17]:
! cp cc2cp_c.py cc2cp*.so /scratch${PWD#"/prj"}

#### Slurm batch script

In [9]:
%%writefile cc2cp.srm
#!/bin/bash
#SBATCH --job-name cc2cp       # Job name
#SBATCH --partition cpu_small  # Select partition
#SBATCH --ntasks=1             # Total tasks
#SBATCH --time=00:05:00        # Limit execution time
#SBATCH --exclusive            # Exclusive acccess to nodes

echo '========================================'
echo '- Job ID:' $SLURM_JOB_ID
echo '- Tasks per node:' $SLURM_NTASKS_PER_NODE
echo '- # of nodes in the job:' $SLURM_JOB_NUM_NODES
echo '- # of tasks:' $SLURM_NTASKS
echo '- Dir from which sbatch was invoked:' ${SLURM_SUBMIT_DIR##*/}
cd $SLURM_SUBMIT_DIR
echo -n '- List of nodes allocated to the job: '
nodeset -e $SLURM_JOB_NODELIST

# Environment
echo '-- modules ----------------------------'
cd
dir=/scratch${PWD#"/prj"}
cd $dir
source $dir/env2/etc/profile.d/conda.sh
conda activate $dir/env2
conda activate --stack $dir/env3
cd $dir/fft
              
# Executable
EXEC="python cc2cp_c.py"

# Start
echo '-- run --------------------------------'
echo '$ srun --mpi=pmi2 -n' $SLURM_NTASKS ${EXEC##*/}
echo '-- output -----------------------------'
srun --mpi=pmi2 -n $SLURM_NTASKS $EXEC
echo '~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'

Overwriting cc2cp.srm


## Check

In [19]:
! sbatch --partition=cpu_dev --ntasks=96 cc2cp.srm

Submitted batch job 1333389


In [20]:
! squeue -n cc2cp -o "%.18i  %.9P  %.2t %.5M %.5D %.4C"

             JOBID  PARTITION  ST  TIME NODES CPUS
           1333389    cpu_dev   R  0:02     4   96


In [21]:
! squeue -n cc2cp -o "%.18i  %.9P  %.2t %.5M %.5D %.4C"

             JOBID  PARTITION  ST  TIME NODES CPUS


In [22]:
! cat /scratch${PWD#"/prj"}/slurm-1333389.out

- Job ID: 1333389
- Tasks per node:
- # of nodes in the job: 4
- # of tasks: 96
- Dir from which sbatch was invoked: fft
- List of nodes allocated to the job: sdumont1243 sdumont1244 sdumont1245 sdumont1246
-- modules ----------------------------
-- run --------------------------------
$ srun --mpi=pmi2 -n 96 python cc2cp_c.py
-- output -----------------------------
S:270-0j, L:576, N:96, T1:4.5796, TF:6.7388, TT:11.3184, TO:11.3185
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


## Run

### 1 of (1, 4, 16, 24, 48, 72, 96)

In [10]:
! sbatch --ntasks=1 cc2cp.srm
! sbatch --ntasks=1 cc2cp.srm
! sbatch --ntasks=1 cc2cp.srm

Submitted batch job 1337210
Submitted batch job 1337211
Submitted batch job 1337212


In [2]:
! cat /scratch${PWD#"/prj"}/slurm-1337210.out
! cat /scratch${PWD#"/prj"}/slurm-1337211.out
! cat /scratch${PWD#"/prj"}/slurm-1337212.out

- Job ID: 1337210
- Tasks per node:
- # of nodes in the job: 1
- # of tasks: 1
- Dir from which sbatch was invoked: fft
- List of nodes allocated to the job: sdumont1457
-- modules ----------------------------
-- run --------------------------------
$ srun --mpi=pmi2 -n 1 python cc2cp_c.py
-- output -----------------------------
S:270-0j, L:576, N:1, T1:108.3838, TF:19.1992, TT:127.5831, TO:127.5918
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Job ID: 1337211
- Tasks per node:
- # of nodes in the job: 1
- # of tasks: 1
- Dir from which sbatch was invoked: fft
- List of nodes allocated to the job: sdumont1490
-- modules ----------------------------
-- run --------------------------------
$ srun --mpi=pmi2 -n 1 python cc2cp_c.py
-- output -----------------------------
S:270-0j, L:576, N:1, T1:107.9681, TF:23.5177, TT:131.4858, TO:131.6783
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Job ID: 1337212
- Tasks per node:
- # of nodes in the job: 1
- # of tasks: 1
- Dir from which sbatch was invoked

### 4 of (1, 4, 16, 24, 48, 72, 96)

In [24]:
! sbatch --ntasks=4 cc2cp.srm
! sbatch --ntasks=4 cc2cp.srm
! sbatch --ntasks=4 cc2cp.srm

Submitted batch job 1333396
Submitted batch job 1333397
Submitted batch job 1333398


In [4]:
! cat /scratch${PWD#"/prj"}/slurm-1333396.out
! cat /scratch${PWD#"/prj"}/slurm-1333397.out
! cat /scratch${PWD#"/prj"}/slurm-1333398.out

- Job ID: 1333396
- Tasks per node:
- # of nodes in the job: 1
- # of tasks: 4
- Dir from which sbatch was invoked: fft
- List of nodes allocated to the job: sdumont1275
-- modules ----------------------------
-- run --------------------------------
$ srun --mpi=pmi2 -n 4 python cc2cp_c.py
-- output -----------------------------
S:270-0j, L:576, N:4, T1:23.9835, TF:4.8481, TT:28.8316, TO:28.8419
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Job ID: 1333397
- Tasks per node:
- # of nodes in the job: 1
- # of tasks: 4
- Dir from which sbatch was invoked: fft
- List of nodes allocated to the job: sdumont1275
-- modules ----------------------------
-- run --------------------------------
$ srun --mpi=pmi2 -n 4 python cc2cp_c.py
-- output -----------------------------
S:270-0j, L:576, N:4, T1:22.9127, TF:6.8095, TT:29.7222, TO:29.7320
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Job ID: 1333398
- Tasks per node:
- # of nodes in the job: 1
- # of tasks: 4
- Dir from which sbatch was invoked: fft
- 

### 16 of (1, 4, 16, 24, 48, 72, 96)

In [25]:
! sbatch --ntasks=16 cc2cp.srm
! sbatch --ntasks=16 cc2cp.srm
! sbatch --ntasks=16 cc2cp.srm

Submitted batch job 1333399
Submitted batch job 1333400
Submitted batch job 1333401


In [5]:
! cat /scratch${PWD#"/prj"}/slurm-1333399.out
! cat /scratch${PWD#"/prj"}/slurm-1333400.out
! cat /scratch${PWD#"/prj"}/slurm-1333401.out

- Job ID: 1333399
- Tasks per node:
- # of nodes in the job: 1
- # of tasks: 16
- Dir from which sbatch was invoked: fft
- List of nodes allocated to the job: sdumont1275
-- modules ----------------------------
-- run --------------------------------
$ srun --mpi=pmi2 -n 16 python cc2cp_c.py
-- output -----------------------------
S:270-0j, L:576, N:16, T1:6.2307, TF:1.3147, TT:7.5454, TO:7.5685
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Job ID: 1333400
- Tasks per node:
- # of nodes in the job: 1
- # of tasks: 16
- Dir from which sbatch was invoked: fft
- List of nodes allocated to the job: sdumont1275
-- modules ----------------------------
-- run --------------------------------
$ srun --mpi=pmi2 -n 16 python cc2cp_c.py
-- output -----------------------------
S:270-0j, L:576, N:16, T1:6.7684, TF:1.4145, TT:8.1829, TO:8.1853
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Job ID: 1333401
- Tasks per node:
- # of nodes in the job: 1
- # of tasks: 16
- Dir from which sbatch was invoked: fft
-

### 24 of (1, 4, 16, 24, 48, 72, 96)

In [26]:
! sbatch --ntasks=24 cc2cp.srm
! sbatch --ntasks=24 cc2cp.srm
! sbatch --ntasks=24 cc2cp.srm

Submitted batch job 1333402
Submitted batch job 1333403
Submitted batch job 1333404


In [6]:
! cat /scratch${PWD#"/prj"}/slurm-1333402.out
! cat /scratch${PWD#"/prj"}/slurm-1333403.out
! cat /scratch${PWD#"/prj"}/slurm-1333404.out

- Job ID: 1333402
- Tasks per node:
- # of nodes in the job: 1
- # of tasks: 24
- Dir from which sbatch was invoked: fft
- List of nodes allocated to the job: sdumont1275
-- modules ----------------------------
-- run --------------------------------
$ srun --mpi=pmi2 -n 24 python cc2cp_c.py
-- output -----------------------------
S:270-0j, L:576, N:24, T1:11.8080, TF:0.8497, TT:12.6577, TO:12.6596
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Job ID: 1333403
- Tasks per node:
- # of nodes in the job: 1
- # of tasks: 24
- Dir from which sbatch was invoked: fft
- List of nodes allocated to the job: sdumont1275
-- modules ----------------------------
-- run --------------------------------
$ srun --mpi=pmi2 -n 24 python cc2cp_c.py
-- output -----------------------------
S:270-0j, L:576, N:24, T1:4.2911, TF:1.0600, TT:5.3510, TO:5.3528
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Job ID: 1333404
- Tasks per node:
- # of nodes in the job: 1
- # of tasks: 24
- Dir from which sbatch was invoked: ff

### 48 of (1, 4, 16, 24, 48, 72, 96)

In [27]:
! sbatch --ntasks=48 cc2cp.srm
! sbatch --ntasks=48 cc2cp.srm
! sbatch --ntasks=48 cc2cp.srm

Submitted batch job 1333405
Submitted batch job 1333406
Submitted batch job 1333407


In [7]:
! cat /scratch${PWD#"/prj"}/slurm-1333405.out
! cat /scratch${PWD#"/prj"}/slurm-1333406.out
! cat /scratch${PWD#"/prj"}/slurm-1333407.out

- Job ID: 1333405
- Tasks per node:
- # of nodes in the job: 2
- # of tasks: 48
- Dir from which sbatch was invoked: fft
- List of nodes allocated to the job: sdumont1286 sdumont1491
-- modules ----------------------------
-- run --------------------------------
$ srun --mpi=pmi2 -n 48 python cc2cp_c.py
-- output -----------------------------
S:270+0j, L:576, N:48, T1:1.8594, TF:9.3098, TT:11.1692, TO:11.1981
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Job ID: 1333406
- Tasks per node:
- # of nodes in the job: 2
- # of tasks: 48
- Dir from which sbatch was invoked: fft
- List of nodes allocated to the job: sdumont1286 sdumont1491
-- modules ----------------------------
-- run --------------------------------
$ srun --mpi=pmi2 -n 48 python cc2cp_c.py
-- output -----------------------------
S:270+0j, L:576, N:48, T1:1.8336, TF:7.4756, TT:9.3092, TO:9.3101
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Job ID: 1333407
- Tasks per node:
- # of nodes in the job: 2
- # of tasks: 48
- Dir from which

### 72 of (1, 4, 16, 24, 48, 72, 96)

In [28]:
! sbatch --ntasks=72 cc2cp.srm
! sbatch --ntasks=72 cc2cp.srm
! sbatch --ntasks=72 cc2cp.srm

Submitted batch job 1333408
Submitted batch job 1333409
Submitted batch job 1333410


In [8]:
! cat /scratch${PWD#"/prj"}/slurm-1333408.out
! cat /scratch${PWD#"/prj"}/slurm-1333409.out
! cat /scratch${PWD#"/prj"}/slurm-1333410.out

- Job ID: 1333408
- Tasks per node:
- # of nodes in the job: 3
- # of tasks: 72
- Dir from which sbatch was invoked: fft
- List of nodes allocated to the job: sdumont1278 sdumont1286 sdumont1491
-- modules ----------------------------
-- run --------------------------------
$ srun --mpi=pmi2 -n 72 python cc2cp_c.py
-- output -----------------------------
S:270-0j, L:576, N:72, T1:1.4073, TF:9.8087, TT:11.2160, TO:11.2176
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Job ID: 1333409
- Tasks per node:
- # of nodes in the job: 3
- # of tasks: 72
- Dir from which sbatch was invoked: fft
- List of nodes allocated to the job: sdumont1275 sdumont1276 sdumont1278
-- modules ----------------------------
-- run --------------------------------
$ srun --mpi=pmi2 -n 72 python cc2cp_c.py
-- output -----------------------------
S:270-0j, L:576, N:72, T1:1.4625, TF:9.6574, TT:11.1199, TO:11.1226
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Job ID: 1333410
- Tasks per node:
- # of nodes in the job: 3
- # of 

### 96 of (1, 4, 16, 24, 48, 72, 96)

In [29]:
! sbatch --ntasks=96 cc2cp.srm
! sbatch --ntasks=96 cc2cp.srm
! sbatch --ntasks=96 cc2cp.srm

Submitted batch job 1333411
Submitted batch job 1333412
Submitted batch job 1333413


In [2]:
! cat /scratch${PWD#"/prj"}/slurm-1333411.out
! cat /scratch${PWD#"/prj"}/slurm-1333412.out
! cat /scratch${PWD#"/prj"}/slurm-1333413.out

- Job ID: 1333411
- Tasks per node:
- # of nodes in the job: 4
- # of tasks: 96
- Dir from which sbatch was invoked: fft
- List of nodes allocated to the job: sdumont1275 sdumont1276 sdumont1278 sdumont1286
-- modules ----------------------------
-- run --------------------------------
$ srun --mpi=pmi2 -n 96 python cc2cp_c.py
-- output -----------------------------
S:270-0j, L:576, N:96, T1:0.9572, TF:7.6307, TT:8.5880, TO:8.5895
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Job ID: 1333412
- Tasks per node:
- # of nodes in the job: 4
- # of tasks: 96
- Dir from which sbatch was invoked: fft
- List of nodes allocated to the job: sdumont1275 sdumont1276 sdumont1278 sdumont1286
-- modules ----------------------------
-- run --------------------------------
$ srun --mpi=pmi2 -n 96 python cc2cp_c.py
-- output -----------------------------
S:270-0j, L:576, N:96, T1:1.2002, TF:7.2525, TT:8.4527, TO:8.4538
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Job ID: 1333413
- Tasks per node:
- # of nodes i

In [30]:
! squeue -u $(whoami) -h -t pending,running -r | wc -l

33


In [31]:
! squeue -n cc2cp -o "%.18i  %.9P  %.2t %.5M %.5D %.4C"

             JOBID  PARTITION  ST  TIME NODES CPUS
           1333411  cpu_small  PD  0:00     4   96
           1333412  cpu_small  PD  0:00     4   96
           1333413  cpu_small  PD  0:00     4   96
           1333410  cpu_small  PD  0:00     3   72
           1333409  cpu_small  PD  0:00     3   72
           1333408  cpu_small  PD  0:00     3   72
           1333407  cpu_small  PD  0:00     2   48
           1333406  cpu_small  PD  0:00     2   48
           1333405  cpu_small  PD  0:00     2   48
           1333404  cpu_small  PD  0:00     1   24
           1333403  cpu_small  PD  0:00     1   24
           1333402  cpu_small  PD  0:00     1   24
           1333401  cpu_small  PD  0:00     1   16
           1333400  cpu_small  PD  0:00     1   16
           1333399  cpu_small  PD  0:00     1   16
           1333398  cpu_small  PD  0:00     1    4
           1333397  cpu_small  PD  0:00     1    4
           1333396  cpu_small  PD  0:00     1    4
           1333395  cpu_small  

In [1]:
! squeue -n cc2cp -o "%.18i  %.9P  %.2t %.5M %.5D %.4C"

             JOBID  PARTITION  ST  TIME NODES CPUS
