# Stencil F2Py Serial

- F2PY is a part of NumPy (numpy.f2py) and can also be used as a Python module.
- Works by creating an extension module that can be imported in Python using the import keyword.
- The module contains automatically generated wrapper functions that can be called from Python.
- Act as an interface between Python and the compiled Fortran routines.
- By wrapping a compiled code for Python, we can get the best of both worlds.
- A good rule of thumb is to use when performing multiple computations within (nested) loops.

Para a execução deste Notebook, os ambientes já devem estar configurados:
- source /scratch/app/modulos/intel-psxe-2019.sh
- conda activate --stack ./envs

O programa F90 pode ser compilado usando fortranmagic:

    %load_ext fortranmagic
    %%fortran -v --fcompiler=intelem --opt='-Ofast'     , ou
    %%fortran -v --fcompiler=intelem --opt='-fcheck=all'
    %%fortran -vvv --fcompiler=intelem --opt '-Ofast'

ou gravando um arquivo em disco para usar o bash:

    %%writefile stencil.f90

In [38]:
%%writefile stencil_f2py_seq.f90
subroutine st(n, energy, niters, heat, t)
    integer, intent(in) :: n, energy, niters
    double precision, intent(out) :: heat, t   
    integer, parameter :: nsources=3
    integer :: iters, i, j, x, y, size, sizeStart, sizeEnd
    integer, dimension(3, 2) :: sources
    double precision, allocatable :: aold(:,:), anew(:,:)
    double precision :: t1=0.0, t2=0.0

    call cpu_time(t1)

    size = n + 2
    sizeStart = 2
    sizeEnd = n + 1

    allocate(aold(size, size))
    allocate(anew(size, size))
    aold = 0.0
    anew = 0.0
    
    sources(1,:) = (/ n/2,   n/2   /)
    sources(2,:) = (/ n/3,   n/3   /)
    sources(3,:) = (/ n*4/5, n*8/9 /)   ! 8/9 conforme Balaji
    
    do iters = 1, niters, 2
        do j = sizeStart, sizeEnd
            do i = sizeStart, sizeEnd
                anew(i,j) = aold(i,j)/2.0 + (aold(i-1,j) + aold(i+1,j) +  &
                            aold(i,j-1) + aold(i,j+1)) / 4.0 / 2.0
            enddo
        enddo
        do i = 1, nsources
            x = sources(i,1) + 1
            y = sources(i,2) + 1
            anew(x,y) =  anew(x,y) + energy
        enddo
        do j = sizeStart, sizeEnd
            do i = sizeStart, sizeEnd
                aold(i,j) = anew(i,j)/2.0 + (anew(i-1,j) + anew(i+1,j) +  &
                            anew(i,j-1) + anew(i,j+1)) / 4.0 / 2.0
            enddo
        enddo
        do i = 1, nsources
            x = sources(i,1) + 1
            y = sources(i,2) + 1
            aold(x,y) = aold(x,y) + energy
        enddo
    enddo
    heat = 0.0
    do j = sizeStart, sizeEnd
        do i = sizeStart, sizeEnd
            heat = heat + aold(i,j)
        end do
    end do
    deallocate(aold)
    deallocate(anew)
    call cpu_time(t2)
    t = t2 - t1
end subroutine

Overwriting stencil_f2py_seq.f90


Verificando os compiladores disponíveis

    --fcompiler=gnu95    GNU Fortran 95 compiler (4.8.5)
    --fcompiler=intelem  Intel Fortran Compiler for 64-bit apps (19.0.3.199)

In [3]:
! f2py -c --help-fcompiler

Gnu95FCompiler instance properties:
  archiver        = ['/usr/bin/gfortran', '-cr']
  compile_switch  = '-c'
  compiler_f77    = ['/usr/bin/gfortran', '-Wall', '-g', '-ffixed-form', '-
                    fno-second-underscore', '-fPIC', '-O3', '-funroll-loops']
  compiler_f90    = ['/usr/bin/gfortran', '-Wall', '-g', '-fno-second-
                    underscore', '-fPIC', '-O3', '-funroll-loops']
  compiler_fix    = ['/usr/bin/gfortran', '-Wall', '-g', '-ffixed-form', '-
                    fno-second-underscore', '-Wall', '-g', '-fno-second-
                    underscore', '-fPIC', '-O3', '-funroll-loops']
  libraries       = ['gfortran']
  library_dirs    = ['/usr/lib/gcc/x86_64-redhat-linux/4.8.5',
                    '/usr/lib/gcc/x86_64-redhat-linux/4.8.5']
  linker_exe      = ['/usr/bin/gfortran', '-Wall', '-Wall']
  linker_so       = ['/usr/bin/gfortran', '-Wall', '-g', '-Wall', '-g', '-
                    shared']
  object_switch   = '-o '
  ranlib          = ['/usr/bin/gfo

Vamos usar o compilador Intel

In [54]:
%%bash
rm stencil_f2py_seq.*.so

# não compila:
# source /scratch/app/modulos/intel-psxe-2019.sh
# f2py -c stencil_f2py_seq.f90 -m stencil_f2py_seq --fcompiler=intelem

# (demora para aparecer o ".so" no diretório)
f2py -c stencil_f2py_seq.f90 -m stencil_f2py_seq

running build
running config_cc
unifing config_cc, config, build_clib, build_ext, build commands --compiler options
running config_fc
unifing config_fc, config, build_clib, build_ext, build commands --fcompiler options
running build_src
build_src
building extension "stencil_f2py_seq" sources
f2py options: []
f2py:> /tmp/tmp5rqi4bv6/src.linux-x86_64-3.7/stencil_f2py_seqmodule.c
creating /tmp/tmp5rqi4bv6/src.linux-x86_64-3.7
Reading fortran codes...
	Reading file 'stencil_f2py_seq.f90' (format:free)
Post-processing...
	Block: stencil_f2py_seq
			Block: st
Post-processing (stage 2)...
Building modules...
	Building module "stencil_f2py_seq"...
		Constructing wrapper function "st"...
		  heat,t = st(n,energy,niters)
	Wrote C/API module "stencil_f2py_seq" to file "/tmp/tmp5rqi4bv6/src.linux-x86_64-3.7/stencil_f2py_seqmodule.c"
  adding '/tmp/tmp5rqi4bv6/src.linux-x86_64-3.7/fortranobject.c' to sources.
  adding '/tmp/tmp5rqi4bv6/src.linux-x86_64-3.7' to include_dirs.
copying /scratch/app/ana

In file included from /scratch/app/anaconda3/2018.12/lib/python3.7/site-packages/numpy/core/include/numpy/ndarraytypes.h:1832:0,
                 from /scratch/app/anaconda3/2018.12/lib/python3.7/site-packages/numpy/core/include/numpy/ndarrayobject.h:12,
                 from /scratch/app/anaconda3/2018.12/lib/python3.7/site-packages/numpy/core/include/numpy/arrayobject.h:4,
                 from /tmp/tmp5rqi4bv6/src.linux-x86_64-3.7/fortranobject.h:13,
                 from /tmp/tmp5rqi4bv6/src.linux-x86_64-3.7/fortranobject.c:2:
  ^
In file included from /scratch/app/anaconda3/2018.12/lib/python3.7/site-packages/numpy/core/include/numpy/ndarraytypes.h:1832:0,
                 from /scratch/app/anaconda3/2018.12/lib/python3.7/site-packages/numpy/core/include/numpy/ndarrayobject.h:12,
                 from /scratch/app/anaconda3/2018.12/lib/python3.7/site-packages/numpy/core/include/numpy/arrayobject.h:4,
                 from /tmp/tmp5rqi4bv6/src.linux-x86_64-3.7/fortranobject.h:13,
 

In [55]:
%reload_ext autoreload
%autoreload 2
import stencil_f2py_seq
print(stencil_f2py_seq.__doc__)

This module 'stencil_f2py_seq' is auto-generated with f2py (version:2).
Functions:
  heat,t = st(n,energy,niters)
.


Código principal que chama o módulo. Será gravado em disco:

In [56]:
%%writefile stencil_seq.py
from time import time
tp = time()
import stencil_f2py_seq
import numpy as np

n            = 4800    # nxn grid; 4800,1,500→1500; 100,1,10→30; [4800]
energy       = 1       # energy to be injected per iteration; [1]
niters       = 500     # number of iterations; [500]

heat, t = stencil_f2py_seq.st(n, energy, niters)
tp = time() - tp
print("Heat = %0.4f | Tempo = %0.4f | TempoPyt = %0.4f" %(heat, t, tp))

Overwriting stencil_seq.py


Rodando uma vez para testar o funcionamento:

In [57]:
%%writefile calctempo.sh
#!/bin/sh
time python stencil_seq.py

Overwriting calctempo.sh


In [58]:
%%bash
sh calctempo.sh

Heat = 1500.0000 | Tempo = 18.7115 | TempoPyt = 18.9060



real	0m18.967s
user	0m18.399s
sys	0m0.528s


## Rodando em um nó de execução

Copia o executável para /scratch

In [59]:
%%bash
a='stencil_seq.py'
b='/stnc/F2Py'
s='/prj/ampemi/eduardo.miranda2'$b
d='/scratch/ampemi/eduardo.miranda2'$b
rm $d/stencil_f2py_seq.*.so
cp  $s/stencil_f2py_seq.*.so  $s/stencil_seq.py  $s/calctempo.sh  $d

Verificando o funcionamento em /scratch

In [60]:
%%bash
cd /scratch/ampemi/eduardo.miranda2/stnc/F2Py
sh calctempo.sh

Heat = 1500.0000 | Tempo = 18.5834 | TempoPyt = 18.7791



real	0m18.838s
user	0m18.300s
sys	0m0.500s


Arquivo de lote de submissão

In [61]:
%%writefile stencil_seq_f2py.srm
#!/bin/bash
# limites das filas (1,0 UA):
#   cpu_dev  : 20 min.,  1-4  nós, 1/1   tarefas em exec/fila máximo
#   cpu_small: 72 horas, 1-20 nós, 16/96 tarefas em exec/fila máximo
#SBATCH -p cpu_small           #Fila (partition) a ser utilizada
#SBATCH --ntasks=1             #Total de tarefas
# #SBATCH --nodes=1              #Qtd de nós
# #SBATCH --ntasks-per-node=1    #Qtd de tarefas por nó ($SLURM_NTASKS_PER_NODE)
# #SBATCH --exclusive            #Utilização exclusiva dos nós
#SBATCH -J stf2seq             #Nome do job, 8 caracteres
#SBATCH --time=00:02:00        #Tempo max. de execução

echo '========================================'
echo '- Job ID:' $SLURM_JOB_ID
echo '- Tarefas por no:' $SLURM_NTASKS_PER_NODE
echo '- Qtd. de nos:' $SLURM_JOB_NUM_NODES
echo '- Tot. de tarefas:' $SLURM_NTASKS
echo '- Nos alocados:' $SLURM_JOB_NODELIST
echo '- diretorio onde sbatch foi chamado ($SLURM_SUBMIT_DIR):'
echo $SLURM_SUBMIT_DIR
cd $SLURM_SUBMIT_DIR

#Entra no diretório de trabalho
cd /scratch/ampemi/eduardo.miranda2/stnc/F2Py

#Executavel
EXEC='sh calctempo.sh'

#Dispara a execucao
echo '-- srun -------------------------------'
echo '$ srun -n ' $SLURM_NTASKS $EXEC
srun -n $SLURM_NTASKS $EXEC
echo '-- FIM --------------------------------'

Overwriting stencil_seq_f2py.srm


In [62]:
%%bash
sbatch stencil_seq_f2py.srm
sbatch stencil_seq_f2py.srm
sbatch stencil_seq_f2py.srm
squeue -n stf2seq

Submitted batch job 772671
Submitted batch job 772672
Submitted batch job 772673
             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
            772671 cpu_small  stf2seq eduardo. PD       0:00      1 (Priority)
            772672 cpu_small  stf2seq eduardo. PD       0:00      1 (Priority)
            772673 cpu_small  stf2seq eduardo. PD       0:00      1 (Priority)


In [68]:
%%bash
squeue -n stf2seq

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)


In [69]:
%%bash
d='/scratch/ampemi/eduardo.miranda2/stnc/F2Py'
cat $d/slurm-772671.out
cat $d/slurm-772672.out
cat $d/slurm-772673.out

- Job ID: 772671
- Tarefas por no:
- Qtd. de nos: 1
- Tot. de tarefas: 1
- Nos alocados: sdumont1092
- diretorio onde sbatch foi chamado ($SLURM_SUBMIT_DIR):
/prj/ampemi/eduardo.miranda2/stnc/F2Py
-- srun -------------------------------
$ srun -n  1 sh calctempo.sh
Heat = 1500.0000 | Tempo = 18.8829 | TempoPyt = 26.2095

real	0m27.363s
user	0m18.978s
sys	0m0.295s
-- FIM --------------------------------
- Job ID: 772672
- Tarefas por no:
- Qtd. de nos: 1
- Tot. de tarefas: 1
- Nos alocados: sdumont1092
- diretorio onde sbatch foi chamado ($SLURM_SUBMIT_DIR):
/prj/ampemi/eduardo.miranda2/stnc/F2Py
-- srun -------------------------------
$ srun -n  1 sh calctempo.sh
Heat = 1500.0000 | Tempo = 18.9576 | TempoPyt = 19.6272

real	0m19.747s
user	0m19.027s
sys	0m0.272s
-- FIM --------------------------------
- Job ID: 772673
- Tarefas por no:
- Qtd. de nos: 1
- Tot. de tarefas: 1
- Nos alocados: sdumont1092
- diretorio onde sbatch foi chamado ($SLURM_SUBMIT_DIR):
/prj/ampemi/eduardo.miranda2/s