In [1]:
!pygmentize add.c
!clang-4.0 add.c -O2 -march=native -fPIC -shared -o add.so

[36m#[39;49;00m[36minclude[39;49;00m [37m<stdint.h>[39;49;00m[36m[39;49;00m
[36m#[39;49;00m[36minclude[39;49;00m [37m<stdlib.h>[39;49;00m[36m[39;49;00m

[36mvoid[39;49;00m [32madd[39;49;00m([36muint32_t[39;49;00m* res, [36muint32_t[39;49;00m [34mconst[39;49;00m* a, [36muint32_t[39;49;00m [34mconst[39;49;00m* b, [36msize_t[39;49;00m n) {
  [34mfor[39;49;00m ([36msize_t[39;49;00m i= [34m0[39;49;00m; i < n; ++i) {
    res[i] = a[i] + b[i];
  }
}


In [2]:
import pydffi
import os
pydffi.dlopen(os.path.join(os.getcwd(), "add.so"))
FFI = pydffi.FFI()
CU = FFI.cdef('''
#include <stdint.h>
#include <stdlib.h>
void add(uint32_t* res, uint32_t const* a, uint32_t const* b, size_t n);
''')
add_c = CU.funcs.add

In [4]:
import numpy as np
N = 200000
a = np.random.randint(2**32, size=N, dtype=np.uint32)
b = np.random.randint(2**32, size=N, dtype=np.uint32)

In [4]:
res = np.ndarray(N, dtype=np.uint32)
arTy = FFI.arrayType(FFI.UInt32Ty, N)
carTy = pydffi.const(arTy)
cres = pydffi.ptr(pydffi.view_as(arTy, res))
ca = pydffi.ptr(pydffi.view_as(carTy, a))
cb = pydffi.ptr(pydffi.view_as(carTy,b))
%timeit add_c(cres, ca, cb, N)

50.5 µs ± 13.8 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [1]:
%reload_ext pythran.magic

In [2]:
%%pythran -O2 -march=native
#pythran export add_pythran(uint32[],uint32[])
def add_pythran(a,b):
    return a+b

/usr/bin/ld: cannot find -lcblas
collect2: error: ld returned 1 exit status
/usr/bin/ld: cannot find -lcblas
collect2: error: ld returned 1 exit status


In file included from /tmp/tmpm_pynkz9.cpp:2:
In file included from /home/aguinet/.virtualenvs/fixcompilerbug/lib/python3.6/site-packages/pythran/pythonic/core.hpp:20:
In file included from /home/aguinet/.virtualenvs/fixcompilerbug/lib/python3.6/site-packages/pythran/pythonic/types/combined.hpp:4:
In file included from /home/aguinet/.virtualenvs/fixcompilerbug/lib/python3.6/site-packages/pythran/pythonic/include/types/combined.hpp:4:
In file included from /home/aguinet/.virtualenvs/fixcompilerbug/lib/python3.6/site-packages/pythran/pythonic/include/types/traits.hpp:4:
In file included from /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8/complex:45:
In file included from /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8/sstream:38:
In file included from /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8/istream:38:
In file included from /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8/ios:40:
In file included from /usr/lib/gcc/x86_64-linux-gnu/8/../../.

In [5]:
%timeit add_pythran(a,b)

117 µs ± 11.5 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [None]:
# Back to slides

In [8]:
!clang-4.0 -Rpass=loop-vectorize -Rpass-missed=loop-vectorize -Rpass-analysis=loop-vectorize add.c -O2 -march=native -c -o /dev/null

[1madd.c:5:3: [0m[0;1;34mremark: [0m[1mvectorized loop
      (vectorization width: 8, interleaved count: 4) [-Rpass=loop-vectorize][0m
  for (size_t i= 0; i < n; ++i) {
[0;1;32m  ^
[0m

In [9]:
!pythran -E add.py 

In [10]:
!pygmentize add.cpp

[36m#[39;49;00m[36mdefine BOOST_SIMD_NO_STRICT_ALIASING 1[39;49;00m[36m[39;49;00m
[36m#[39;49;00m[36minclude[39;49;00m [37m<pythonic/core.hpp>[39;49;00m[36m[39;49;00m
[36m#[39;49;00m[36minclude[39;49;00m [37m<pythonic/python/core.hpp>[39;49;00m[36m[39;49;00m
[36m#[39;49;00m[36minclude[39;49;00m [37m<pythonic/types/bool.hpp>[39;49;00m[36m[39;49;00m
[36m#[39;49;00m[36minclude[39;49;00m [37m<pythonic/types/int.hpp>[39;49;00m[36m[39;49;00m
[36m#[39;49;00m[36mifdef _OPENMP[39;49;00m[36m[39;49;00m
[36m#[39;49;00m[36minclude[39;49;00m [37m<omp.h>[39;49;00m[36m[39;49;00m
[36m#[39;49;00m[36mendif[39;49;00m[36m[39;49;00m
[36m#[39;49;00m[36minclude[39;49;00m [37m<pythonic/include/types/uint32.hpp>[39;49;00m[36m[39;49;00m
[36m#[39;49;00m[36minclude[39;49;00m [37m<pythonic/include/types/ndarray.hpp>[39;49;00m[36m[39;49;00m
[36m#[39;49;00m[36minclude[39;49;00m [37m<pythonic/types/ndarray.hpp>[39;49;00m[36m[39

In [11]:
!clang++-4.0  add.cpp -Rpass=loop-vectorize -DNDEBUG -O2  -march=native  -fPIC -DENABLE_PYTHON_MODULE -D__PYTHRAN__=3 -I/home/aguinet/.virtualenvs/fixcompilerbug/lib/python3.6/site-packages/pythran -I/home/aguinet/.virtualenvs/fixcompilerbug/lib/python3.6/site-packages/pythran -I/home/aguinet/.virtualenvs/fixcompilerbug/lib/python3.6/site-packages/numpy/core/include -I/usr/local/include -I/usr/include -I/home/aguinet/.virtualenvs/fixcompilerbug/include -I/home/aguinet/.virtualenvs/fixcompilerbug/lib/python3.6/site-packages/numpy/core/include -I/usr/local/include -I/usr/include -I/home/aguinet/.virtualenvs/fixcompilerbug/include -I/usr/include/python3.6m -I/home/aguinet/.virtualenvs/fixcompilerbug/include/python3.6m  -std=c++11 -c -o /dev/null

      [-Wunknown-attributes][0m
__attribute__ ((externally_visible))
[0;1;32m                ^


In [12]:
!clang++-4.0  add.cpp -Rpass=loop-vectorize -Rpass-missed=loop-vectorize -Rpass-analysis=loop-vectorize -DNDEBUG -O2  -march=native  -fPIC -DENABLE_PYTHON_MODULE -D__PYTHRAN__=3 -I/home/aguinet/.virtualenvs/fixcompilerbug/lib/python3.6/site-packages/pythran -I/home/aguinet/.virtualenvs/fixcompilerbug/lib/python3.6/site-packages/pythran -I/home/aguinet/.virtualenvs/fixcompilerbug/lib/python3.6/site-packages/numpy/core/include -I/usr/local/include -I/usr/include -I/home/aguinet/.virtualenvs/fixcompilerbug/include -I/home/aguinet/.virtualenvs/fixcompilerbug/lib/python3.6/site-packages/numpy/core/include -I/usr/local/include -I/usr/include -I/home/aguinet/.virtualenvs/fixcompilerbug/include -I/usr/include/python3.6m -I/home/aguinet/.virtualenvs/fixcompilerbug/include/python3.6m  -std=c++11 -c -o /dev/null

      [-Wunknown-attributes][0m
__attribute__ ((externally_visible))
[0;1;32m                ^
[0mIn file included from add.cpp:10:
In file included from /home/aguinet/.virtualenvs/fixcompilerbug/lib/python3.6/site-packages/pythran/pythonic/include/types/ndarray.hpp:35:
[1m/home/aguinet/.virtualenvs/fixcompilerbug/lib/python3.6/site-packages/pythran/pythonic/include/types/numpy_expr.hpp:24:20: [0m[0;1;34mremark: [0m[1m
      loop not vectorized: value that could not be identified as reduction is
      used outside the loop [-Rpass-analysis=loop-vectorize][0m
      return Op{}(*std::get<I>(iters)...);
[0;1;32m                   ^
[0mIn file included from add.cpp:2:
In file included from /home/aguinet/.virtualenvs/fixcompilerbug/lib/python3.6/site-packages/pythran/pythonic/core.hpp:20:
In file included from /home/aguinet/.virtualenvs/fixcompilerbug/lib/python3.6/site-packages/pythran/pythonic/types/combined.hpp:4:
In file included from /home/aguinet/.virtualenvs/fixcompilerbug



In [None]:
# Back to slides

In [13]:
%%pythran -O2 -DUSE_BOOST_SIMD 
#pythran export add_pythran_simd(uint32[],uint32[])
def add_pythran_simd(a,b):
    return a+b

In [14]:
%timeit add_pythran_simd(a,b)

42.9 µs ± 272 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
