# Speeding up The Z function

So the main time consumed to solve the electrostatic function is actually the evaluation of the function itself !

We will try to improve it's evaluation time


In [1]:
from scipy.special import wofz
import numpy as np
np.random.seed()

import matplotlib as plt
%load_ext line_profiler

In [4]:
N_cases = 10000
w = np.random.uniform(-10, 10, N_cases) + 1.j * np.random.uniform(-10, 10, N_cases)


In [5]:
%%timeit
z_w = wofz(w)

2.27 ms ± 23.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


# Unvectorizing it

Ok, so Alexis think that we can speed up the function using a cache.
Unfortunatly, the cache do not work with numpy array

So wee will check if un-vectorizing the call can allow us to gain from the cache

In [6]:
def unvect_wofz(ws):
    z = np.zeros(len(ws), dtype="complex128")
    for i,w in enumerate(ws):
        z[i] = wofz(w)
    return z

In [7]:
%%timeit
z_w = unvect_wofz(w)

14.9 ms ± 264 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [8]:
a = %lprun -r -f unvect_wofz z_w = unvect_wofz(w)
a.print_stats()

Timer unit: 1e-06 s

Total time: 0.036942 s
File: <ipython-input-6-28e2f432af96>
Function: unvect_wofz at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
     1                                           def unvect_wofz(ws):
     2         1        138.0    138.0      0.4      z = np.zeros(len(ws), dtype="complex128")
     3     10001       7488.0      0.7     20.3      for i,w in enumerate(ws):
     4     10000      29314.0      2.9     79.4          z[i] = wofz(w)
     5         1          2.0      2.0      0.0      return z



Well, even if we can gain a factor 10 with the cache, we loose a factor 10 with the unfectorasition


# With Cython

In [9]:
%load_ext Cython

In [14]:
import Cython
import line_profiler
directive_defaults =  Cython.Compiler.Options.get_directive_defaults()

directive_defaults['linetrace'] = True
directive_defaults['binding'] = True

z = np.ndarray(N_cases, dtype=np.complex128)


In [24]:
%%cython -f -c=-O3 

import numpy as np
cimport numpy as np

from scipy.special.cython_special cimport wofz as cwofz

DTYPE_c = np.complex128
ctypedef np.complex128_t DTYPE_c_t
cy_dict = {}

# @cython.boundscheck(False) # turn off bounds-checking for entire function
# @cython.wraparound(False)  # turn off negative index wrapping for entire function
def cythonwrapper_wofz(np.ndarray[DTYPE_c_t, ndim=1] w  not None, np.ndarray[DTYPE_c_t, ndim=1] output not None):
    assert w.dtype == DTYPE_c and output.dtype == DTYPE_c
    cython_wofz(w.shape[0], &w[0], &output[0] )

cdef cython_wofz(int n, DTYPE_c_t *w, DTYPE_c_t *output):
    cdef int i
    for i in range(n):
        output[i] = cwofz(w[i]) 
    
    
def cythonwrapper_wofz_cached(np.ndarray[DTYPE_c_t, ndim=1] w  not None, np.ndarray[DTYPE_c_t, ndim=1] output not None):
    assert w.dtype == DTYPE_c and output.dtype == DTYPE_c
    cython_wofz_cached(w.shape[0], &w[0], &output[0] )

cdef cython_wofz_cached(int n, DTYPE_c_t *w, DTYPE_c_t *output):
    cdef int i
    for i in range(n):
        if w[i] in cy_dict:
            output[i] = cy_dict[w[i]]
        else:
            output[i] = cwofz(w[i]) 
            cy_dict.update({w[i]:output[i]})

In [25]:
%timeit cythonwrapper_wofz(w, z)

2.35 ms ± 70.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [26]:
%timeit cythonwrapper_wofz_cached(w, z)

2.04 ms ± 96.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


So the cython for loop takes some mor time than the numpy call, but it is *only* 30% slower

In [48]:
%%cython -f --compile-args=-DCYTHON_TRACE=1 -c=-O3 

import numpy as np
cimport numpy as np

from scipy.special.cython_special cimport wofz as cwofz

DTYPE_c = np.complex128
ctypedef np.complex128_t DTYPE_c_t
cy_dict = {}

# @cython.boundscheck(False) # turn off bounds-checking for entire function
# @cython.wraparound(False)  # turn off negative index wrapping for entire function
def cython_wofz(np.ndarray[DTYPE_c_t, ndim=1] w  not None, np.ndarray[DTYPE_c_t, ndim=1] output not None):
    assert w.dtype == DTYPE_c and output.dtype == DTYPE_c
    cdef int i
    for i in range(w.shape[0]):
        output[i] = cwofz(w[i])     
    
    
def cython_wofz_cached(np.ndarray[DTYPE_c_t, ndim=1] w  not None, np.ndarray[DTYPE_c_t, ndim=1] output not None):
    assert w.dtype == DTYPE_c and output.dtype == DTYPE_c
    cdef int i
    for i in range(w.shape[0]):
        if w[i] in cy_dict:
            output[i] = cy_dict[w[i]]
        else:
            output[i] = cwofz(w[i]) 
            cy_dict.update({w[i]:output[i]})
    

In [49]:
a = %lprun -r -f cython_wofz cython_wofz(w, z)
a.print_stats()

Timer unit: 1e-06 s

Total time: 0.014901 s
File: /home/tavant/.cache/ipython/cython/_cython_magic_9eba7c72e7999fc91405b0f7a9d8a7ba.pyx
Function: cython_wofz at line 13

Line #      Hits         Time  Per Hit   % Time  Line Contents
    13                                           def cython_wofz(np.ndarray[DTYPE_c_t, ndim=1] w  not None, np.ndarray[DTYPE_c_t, ndim=1] output not None):
    14         1         17.0     17.0      0.1      assert w.dtype == DTYPE_c and output.dtype == DTYPE_c
    15                                               cdef int i
    16         1          3.0      3.0      0.0      for i in range(w.shape[0]):
    17     10000      14881.0      1.5     99.9          output[i] = cwofz(w[i])     



In [50]:
a = %lprun -r -f cython_wofz_cached cython_wofz_cached(w, z)
a.print_stats()
print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
a = %lprun -r -f cython_wofz_cached cython_wofz_cached(w, z)
a.print_stats()

Timer unit: 1e-06 s

Total time: 0.024243 s
File: /home/tavant/.cache/ipython/cython/_cython_magic_9eba7c72e7999fc91405b0f7a9d8a7ba.pyx
Function: cython_wofz_cached at line 20

Line #      Hits         Time  Per Hit   % Time  Line Contents
    20                                           def cython_wofz_cached(np.ndarray[DTYPE_c_t, ndim=1] w  not None, np.ndarray[DTYPE_c_t, ndim=1] output not None):
    21         1         19.0     19.0      0.1      assert w.dtype == DTYPE_c and output.dtype == DTYPE_c
    22                                               cdef int i
    23         1          2.0      2.0      0.0      for i in range(w.shape[0]):
    24     10000       6482.0      0.6     26.7          if w[i] in cy_dict:
    25                                                       output[i] = cy_dict[w[i]]
    26                                                   else:
    27     10000       8510.0      0.9     35.1              output[i] = cwofz(w[i]) 
    28     10000       9230.0   