In [1]:
import pandas as pd
import numpy as np

In [3]:
df = pd.DataFrame({'a': np.random.randn(1000),
                       'b': np.random.randn(1000),
                       'N': np.random.randint(100, 1000, (1000)),
                       'x': 'x'})

In [10]:
def f(x):
        return x * (x - 1)
def integrate_f(a, b, N):
        s = 0
        dx = (b - a) / N
        for i in range(N):
            s += f(a + i * dx)
        return s * dx    

In [11]:
%timeit df.apply(lambda x:integrate_f(x['a'],x['b'],x['N']),axis=1)

178 ms ± 3.76 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [5]:
%prun -l 5 df.apply(lambda x: integrate_f(x['a'], x['b'], x['N']), axis=1)

 

In [12]:
%load_ext Cython

The Cython extension is already loaded. To reload it, use:
  %reload_ext Cython


In [22]:
%%cython
cdef double f_plain(x) except? -2:
        return x * (x - 1)
cdef double integrate_f_plain(a, b, N):
        s = 0
        dx = (b - a) / N
        for i in range(N):
            s += f_plain(a + i * dx)
        return s * dx

In [23]:
%timeit df.apply(lambda x:integrate_f_plain(x['a'],x['b'],x['N']),axis=1)

100 ms ± 3.48 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [24]:
%prun -l 4 df.apply(lambda x: integrate_f_plain(x['a'], x['b'], x['N']), axis=1)

 

In [25]:
%%cython
cimport numpy as np
import numpy as np
cdef double f_typed(double x) except? -2:
    return x * (x - 1)
cpdef double integrate_f_typed(double a, double b, int N):
    cdef int i
    cdef double s, dx
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f_typed(a + i * dx)
    return s * dx
cpdef np.ndarray[double] apply_integrate_f(np.ndarray col_a, np.ndarray col_b, np.ndarray col_N):
    assert (col_a.dtype == np.float and col_b.dtype == np.float and col_N.dtype == np.int)
    cdef Py_ssize_t i, n = len(col_N)
    assert (len(col_a) == len(col_b) == n)
    cdef np.ndarray[double] res = np.empty(n)
    for i in range(len(col_a)):
        res[i] = integrate_f_typed(col_a[i], col_b[i], col_N[i])
    return res

In [26]:
%timeit apply_integrate_f(df['a'].values, df['b'].values, df['N'].values)

1.08 ms ± 7.93 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [27]:
%prun -l 4 apply_integrate_f(df['a'].values, df['b'].values, df['N'].values)

 

In [29]:
df = pd.DataFrame(np.random.randn(5, 2), columns=list('ab'))
newcol = np.random.randn(len(df))
%timeit df.eval('b + @newcol')

770 µs ± 14.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [30]:
%timeit df['newcol']=newcol

59.6 µs ± 888 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
