In [1]:
import numpy as np
rng = np.random.default_rng(42)
x = rng.random(1000000)
y = rng.random(1000000)
%timeit x + y

2.73 ms ± 106 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [7]:
%timeit np.fromiter((xi + yi for xi, yi in zip(x, y)), dtype=x.dtype, count=len(x))

283 ms ± 17.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [8]:
mask = (x > 0.5) & (y < 0.5)

In [9]:
tmp1 = (x > 0.5)
tmp2 = (y < 0.5)
mask = tmp1 & tmp2

In [10]:
import numexpr
mask_numexpr = numexpr = numexpr.evaluate('(x > 0.5) & (y < 0.5)')
np.all(mask == mask_numexpr)

np.True_

In [12]:
import pandas as pd
nrows, ncols = 100000, 100
df1, df2, df3, df4 = (pd.DataFrame(rng.random((nrows, ncols)))
                     for i in range(4))

In [14]:
%timeit df1 + df2 + df3 + df4

76.2 ms ± 2.3 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [15]:
%timeit pd.eval('df1 + df2 + df3 + df4')

31.6 ms ± 462 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [16]:
np.allclose(df1 + df2 + df3 + df4,
           pd.eval('df1 + df2 + df3 + df4'))

True

In [17]:
df1, df2, df3, df4, df5 = (pd.DataFrame(rng.integers(0, 1000, (100, 3)))
                          for i in range(5))

In [22]:
result1 = -df1 * df2 / (df3 + df4) -df5
result2 = pd.eval('-df1 * df2 / (df3 + df4) -df5')
np.allclose(result1, reslut2)

True

In [23]:
result1 = (df1 < df2) & (df2 <= df3) & (df3 != df4)
result2 = pd.eval('df1 < df2 <= df3 != df4')
np.allclose(result1, result2)

True

In [24]:
result1 = (df1 < 0.5) & (df2 < 0.5) | (df3 <df4)
result2 = pd.eval('(df1 < 0.5) & (df2 < 0.5) | (df3 <df4)')
np.allclose(result1, result2)

True

In [26]:
result3 = pd.eval('(df1 < 0.5) and (df2 < 0.5) or (df3 <df4)')
np.allclose(result1, result3)

True

In [28]:
result1 = df2.T[0] + df3.iloc[1]
result2 = pd.eval('df2.T[0] + df3.iloc[1]')
np.allclose(result1, result2)

True

In [30]:
df = pd.DataFrame(rng.random((1000, 3)), columns=['A', 'B', 'C'])
df.head()

Unnamed: 0,A,B,C
0,0.219787,0.701708,0.713602
1,0.647909,0.806905,0.632457
2,0.586263,0.865538,0.055499
3,0.513328,0.077981,0.138013
4,0.43132,0.634561,0.151981


In [31]:
result1 = (df['A'] + df['B']) / (df['C'] - 1)
result2 = pd.eval("(df.A + df.B) / (df.C - 1)")
np.allclose(result1, result2)

True

In [32]:
result3 = df.eval('(A + B) / (C - 1)')
np.allclose(result1, result3)

True

In [33]:
df.head()

Unnamed: 0,A,B,C
0,0.219787,0.701708,0.713602
1,0.647909,0.806905,0.632457
2,0.586263,0.865538,0.055499
3,0.513328,0.077981,0.138013
4,0.43132,0.634561,0.151981


In [34]:
df.eval('D = (A + B) / C', inplace=True)
df.head()

Unnamed: 0,A,B,C,D
0,0.219787,0.701708,0.713602,1.291329
1,0.647909,0.806905,0.632457,2.300258
2,0.586263,0.865538,0.055499,26.158985
3,0.513328,0.077981,0.138013,4.284439
4,0.43132,0.634561,0.151981,7.013267


In [35]:
df.eval('D = (A -B) / C', inplace=True)
df.head()

Unnamed: 0,A,B,C,D
0,0.219787,0.701708,0.713602,-0.675336
1,0.647909,0.806905,0.632457,-0.251394
2,0.586263,0.865538,0.055499,-5.032051
3,0.513328,0.077981,0.138013,3.154381
4,0.43132,0.634561,0.151981,-1.337283


In [36]:
column_mean = df.mean(1)
result1 = df['A'] + column_mean
result2 = df.eval('A + @column_mean')
np.allclose(result1, result2)

True

In [37]:
result1 = df[(df.A < 0.5) & (df.B < 0.5)]
result2 = pd.eval('df[(df.A < 0.5) & (df.B < 0.5)]')
np.allclose(result1, result2)

True

In [38]:
result2 = df.query('A < 0.5 and B < 0.5')
np.allclose(result1, result2)

True

In [39]:
Cmean = df['C'].mean()
result1 = df[(df.A < Cmean) & (df.B < Cmean)]
result2 = df.query('A < @Cmean and B < @Cmean')
np.allclose(result1, result2)

True

In [40]:
x = df[(df.A < 0.5) & (df.B < 0.5)]

In [41]:
tmp1 = df.A < 0.5
tmp2 = df.B < 0.5
tmp3 = tmp1 & tmp2
x = df[tmp3]

In [42]:
df.values.nbytes

32000