In [1]:
import numpy as np
rng = np.random.RandomState(42)
x = rng.rand(1000000)
y = rng.rand(1000000)
%timeit x + y

3.62 ms ± 115 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [2]:
%timeit np.fromiter((xi + yi for xi, yi in zip(x, y)), dtype=x.dtype, count=len(x))

190 ms ± 1.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [3]:
mask = (x > 0.5) & (y < 0.5)

In [5]:
tmp1 = (x > 0.5)
tmp2 = (y < 0.5)
mask = tmp1 & tmp2

In [6]:
import numexpr
mask_numexpr = numexpr.evaluate('(x > 0.5) & (y < 0.5)')
np.allclose(mask, mask_numexpr)

True

In [11]:
import pandas as pd
nrows, ncols = 100000, 100
rng = np.random.RandomState(42)
df1, df2, df3, df4 = (pd.DataFrame(rng.rand(nrows, ncols)) 
                      for i in range(4))

In [12]:
%timeit df1 + df2 + df3 + df4

94.5 ms ± 2.36 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [13]:
%timeit pd.eval('df1 + df2 + df3 + df4')

43.2 ms ± 1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [14]:
np.allclose(df1 + df2 + df3 + df4, 
            pd.eval('df1 + df2 + df3 + df4'))

True

In [16]:
df1, df2, df3, df4, df5 = (pd.DataFrame(rng.randint(0, 1000, (100, 3))) for i in range(5))

In [21]:
result1 = -df1 * df2 / (df3 + df4) - df5'
result2 = pd.eval('-df1 * df2 / (df3 + df4) - df5')
                  np.allclose(result1, result2)

SyntaxError: EOL while scanning string literal (<ipython-input-21-54f98eba713f>, line 1)

In [22]:
result1 = (df1 < df2) & (df2 <= df3) & (df3 != df4)
result2 =pd.eval('df1 < df2 <= df3 !=df4')
np.allclose(result1, result2)

True

In [24]:
result1 = (df1 < 0.5) & (df2 < 0.5) | (df3 < df4)
result2 = pd.eval('(df1 < 0.5) & (df2 < 0.5) | (df3 < df4)')
np.allclose(result1, result2)

True

In [29]:
result3 = pd.eval('(df1 < 0.5) and (df2 < 0.5) or (df3 < df4)')
np.allclose(result1, result3)

True

In [31]:
result1 = df2.T[0] + df3.iloc[1]
result2 = pd.eval('df2.T[0] + df3.iloc[1]')
np.allclose(result1, result2)

True

In [32]:
df = pd.DataFrame(rng.rand(1000, 3), columns=['A', 'B', 'C'])
df.head()

Unnamed: 0,A,B,C
0,0.375506,0.406939,0.069938
1,0.069087,0.235615,0.154374
2,0.677945,0.433839,0.652324
3,0.264038,0.808055,0.347197
4,0.589161,0.252418,0.557789


In [36]:
result1 = (df['A'] + df['B']) / (df['C'] - 1)
result2 = pd.eval('(df.A + df.B) / (df.C - 1)')
np.allclose(result1, result2)

True

In [37]:
result3 = df.eval('(A + B) / (C - 1)')
np.allclose(result1, result3)

True

In [38]:
df.head()

Unnamed: 0,A,B,C
0,0.375506,0.406939,0.069938
1,0.069087,0.235615,0.154374
2,0.677945,0.433839,0.652324
3,0.264038,0.808055,0.347197
4,0.589161,0.252418,0.557789


In [39]:
df.eval('D = (A + B) / C', inplace=True)
df.head

<bound method NDFrame.head of             A         B         C          D
0    0.375506  0.406939  0.069938  11.187620
1    0.069087  0.235615  0.154374   1.973796
2    0.677945  0.433839  0.652324   1.704344
3    0.264038  0.808055  0.347197   3.087857
4    0.589161  0.252418  0.557789   1.508776
5    0.573154  0.330581  0.144122   6.270634
6    0.210102  0.518596  0.569152   1.280322
7    0.406639  0.128631  0.160742   3.329994
8    0.020236  0.354904  0.067919   5.523342
9    0.784161  0.937668  0.490879   3.507645
10   0.932656  0.137548  0.248736   4.302567
11   0.084158  0.546133  0.235618   2.675060
12   0.362698  0.950754  0.274655   4.782188
13   0.058095  0.771863  0.915946   0.906121
14   0.643400  0.234982  0.655226   1.340579
15   0.967865  0.277742  0.987892   1.260874
16   0.110796  0.100477  0.561988   0.375939
17   0.707902  0.781147  0.479950   3.102509
18   0.502528  0.031194  0.878520   0.607524
19   0.921240  0.987053  0.609404   3.131411
20   0.576883  0.729118  

In [40]:
df.eval('D = (A - B) / C', inplace=True)
df.head

<bound method NDFrame.head of             A         B         C         D
0    0.375506  0.406939  0.069938 -0.449425
1    0.069087  0.235615  0.154374 -1.078728
2    0.677945  0.433839  0.652324  0.374209
3    0.264038  0.808055  0.347197 -1.566886
4    0.589161  0.252418  0.557789  0.603708
5    0.573154  0.330581  0.144122  1.683117
6    0.210102  0.518596  0.569152 -0.542023
7    0.406639  0.128631  0.160742  1.729526
8    0.020236  0.354904  0.067919 -4.927445
9    0.784161  0.937668  0.490879 -0.312719
10   0.932656  0.137548  0.248736  3.196594
11   0.084158  0.546133  0.235618 -1.960695
12   0.362698  0.950754  0.274655 -2.141074
13   0.058095  0.771863  0.915946 -0.779269
14   0.643400  0.234982  0.655226  0.623324
15   0.967865  0.277742  0.987892  0.698581
16   0.110796  0.100477  0.561988  0.018362
17   0.707902  0.781147  0.479950 -0.152609
18   0.502528  0.031194  0.878520  0.536510
19   0.921240  0.987053  0.609404 -0.107996
20   0.576883  0.729118  0.431805 -0.352555
21

In [41]:
column_mean = df.mean(1)
result1 = df['A'] + column_mean
result2 = df.eval('A + @column_mean')
np.allclose(result1, result2)

True

In [42]:
result1 = df[(df.A < 0.5) & (df.B < 0.5)]
result2 = pd.eval('df[(df.A < 0.5) & (df.B < 0.5)]')
np.allclose(result1, result2)

True

In [43]:
reult2 = df.query('A < 0.5 and B < 0.5')
np.allclose(result1, result2)

True

In [44]:
Cmean =df['C'].mean()
result1 = df[(df.A < Cmean) & (df.B < Cmean)]
result2 = df.query('A < @Cmean and B < @Cmean')
np.allclose(result1, result2)

True