In [1]:
import pandas as pd
import numpy as np
import timeit, sys
import re, random, itertools, string
import seaborn as sns
from matplotlib import pyplot as plt

import warnings
warnings.filterwarnings("ignore")

In [2]:
MB = 1024*1024
datasize = [100, 1000, 10000, 100000, 1000000]
speedup_all = {}

np.einsum vs. np.tensordot

In [3]:
speedup = []
for d in datasize:
    p = np.random.rand(d,1)
    w = np.random.rand(d)
    r1 = np.einsum('ij,ik,i->jk',p,p,w)
    r2 = np.tensordot(p[...,None]*p[:,None], w, axes=((0),(0)))
    assert np.allclose(r1,r2) == True
    t1 = %timeit -o np.einsum('ij,ik,i->jk',p,p,w)
    t2 = %timeit -o np.tensordot(p[...,None]*p[:,None], w, axes=((0),(0)))
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.einsum vs. np.tensordot'] = speedup

1.6 µs ± 5.49 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
13.1 µs ± 940 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
2.87 µs ± 95.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
15 µs ± 689 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
11.9 µs ± 188 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
23.1 µs ± 743 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
96.9 µs ± 2.17 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
68.5 µs ± 2.08 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
1.07 ms ± 24.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
2.69 ms ± 120 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


np.einsum vs np.sum

In [4]:
speedup = []
for d in datasize:
    a = np.arange(d*1*1).reshape(d,1,1)
    r1 = np.einsum('ijk->',a)
    r2 = np.sum(a)
    assert np.allclose(r1,r2) == True
    t1 = %timeit -o np.einsum('ijk->',a)
    t2 = %timeit -o np.sum(a)
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.einsum vs np.sum'] = speedup

1.63 µs ± 255 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
2.28 µs ± 109 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
1.55 µs ± 34.9 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
2.69 µs ± 33.4 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
3.2 µs ± 62.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
5.92 µs ± 227 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
19 µs ± 361 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
36 µs ± 2.36 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
164 µs ± 4.4 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
310 µs ± 6.08 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


np.concatenate vs. np.stack

In [5]:
speedup = []
for d in datasize:
    a = np.ones((d,1), dtype=np.float64)
    b = np.ones((d,1), dtype=np.float64)
    r1 = np.concatenate([a,b], axis=1)
    r2 = np.hstack([a,b])
    assert np.allclose(r1,r2) == True
    t1 = %timeit -o np.concatenate([a,b], axis=1)
    t2 = %timeit -o np.hstack([a,b])
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.concatenate vs. np.stack'] = speedup

1.13 µs ± 37.5 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
2.5 µs ± 76.9 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
1.85 µs ± 14.7 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
3.27 µs ± 48.1 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
10.3 µs ± 447 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
11.9 µs ± 75.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
640 µs ± 10.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
638 µs ± 21.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
7.04 ms ± 276 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
7 ms ± 189 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


pd.DataFrame.at vs pd.DataFrame.loc

In [6]:
speedup = []
for d in datasize:
    df = pd.DataFrame(np.random.randn(d, 1), columns=['a'])
    r = random.randint(0, 10)
    r1 = df.at[r, 'a']
    r2 = df.loc[r, 'a']
    assert r1 == r2
    t1 = %timeit -o df.at[r, 'a']
    t2 = %timeit -o df.loc[r, 'a']
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['pd.DataFrame.at vs pd.DataFrame.loc'] = speedup

3.03 µs ± 45.1 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
4.62 µs ± 24.1 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
3.06 µs ± 110 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
4.7 µs ± 98.9 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
3.01 µs ± 51 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
4.66 µs ± 58.1 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
3.03 µs ± 58.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
5.01 µs ± 148 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
11.9 µs ± 366 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
13.8 µs ± 628 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


pd.DataFrame.iterrows vs pd.DataFrame.itertuples

In [7]:
speedup = []
for d in datasize:
    df = pd.DataFrame(np.random.randn(d, 1), columns=['a'])
    r = random.randint(0, 10)
    r1 = [row.a for row in df.itertuples()]
    r2 = [row['a'] for _,row in df.iterrows()]
    assert r1 == r2
    t1 = %timeit -o [row.a for row in df.itertuples()]
    t2 = %timeit -o [row['a'] for _,row in df.iterrows()]
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['pd.DataFrame.iterrows vs pd.DataFrame.itertuples'] = speedup

168 µs ± 5.25 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
1.79 ms ± 80.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
445 µs ± 3.84 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
17.6 ms ± 638 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
3.34 ms ± 57.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
175 ms ± 1.99 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
32.3 ms ± 346 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
1.75 s ± 43.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
356 ms ± 5.86 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
19.2 s ± 1.89 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


np.einsum vs np.ndarray.sum

In [9]:
speedup = []
for d in datasize:
    a = np.random.rand(d)
    print("size: {} MB".format(sys.getsizeof(a)/MB))
    r1 = np.einsum('...i,...i ->...', a, a)
    r2 = (a*a).sum(axis=-1)
    assert np.allclose(r1, r2) == True
    t1 = %timeit -o np.einsum('...i,...i ->...', a, a)
    t2 = %timeit -o (a*a).sum(axis=-1)
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.einsum vs np.ndarray.sum'] = speedup

size: 0.0008544921875 MB
1.52 µs ± 34.4 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
2.32 µs ± 169 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
size: 0.007720947265625 MB
2.2 µs ± 25 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
3.11 µs ± 48.4 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
size: 0.076385498046875 MB
6.49 µs ± 59.7 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
9.01 µs ± 303 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
size: 0.763031005859375 MB
49.8 µs ± 494 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
70.6 µs ± 593 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
size: 7.629486083984375 MB
480 µs ± 11.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
2.52 ms ± 142 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


np.where vs pd.DataFrame.loc

In [11]:
speedup = []
for d in datasize:
    df = pd.DataFrame(np.random.randn(d, 1), columns=['a'])
    r1 = np.where(df.a > 0, 1, df.a)
    df.loc[df.a > 0, 'a'] = 1    
    assert np.allclose(r1, df['a']) == True
    t1 = %timeit -o x = np.where(df.a > 0, 1, df.a)
    t2 = %timeit -o df.loc[df.a > 0, 'a'] = 1
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.where vs pd.DataFrame.loc'] = speedup

91.5 µs ± 1.95 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
1.26 ms ± 27.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
97.3 µs ± 912 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
1.26 ms ± 37.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
105 µs ± 799 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
1.28 ms ± 16.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
188 µs ± 1.72 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
1.71 ms ± 18 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
3.49 ms ± 59.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
7.45 ms ± 40.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


np.dot vs np.einsum

In [12]:
speedup = []
for d in datasize:
    #a= np.random.rand(d)
    #r1 = np.dot(a[:,None],a[None,:])
    #r2 = np.einsum('i,j', a, a)
    (M, K) = (int(d/10), 10)
    C = np.random.rand(K, K)
    X = np.random.rand(M, K)
    r1 = np.einsum('ik,km->im',X,C)
    r2 = np.dot(X,C)
    assert np.allclose(r1, r2) == True
    t1 = %timeit -o np.einsum('ik,km->im',X,C)
    t2 = %timeit -o np.dot(X,C)
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.dot vs np.einsum'] = speedup

2.31 µs ± 118 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
820 ns ± 16.3 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
7.9 µs ± 77.2 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
1.76 µs ± 19.6 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
63 µs ± 1.55 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
10.6 µs ± 129 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
618 µs ± 16.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
62.4 µs ± 903 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
8.74 ms ± 152 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
3.59 ms ± 65 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


pd.DataFrame.groupby,pd.core.groupby.GroupBy.size,pd.Series.unstack vs pd.crosstab

In [13]:
speedup = []
for d in datasize:
    df = pd.DataFrame(columns=['id', 'category'])
    df['id'] = np.random.randint(3, size=d)
    df['category'] = np.random.choice(['a','b','c'], d)
    r1 = df.groupby(['id','category']).size().unstack(fill_value=0) 
    r2 = pd.crosstab(df['id'], df['category'])
    assert r1.equals(r2)
    t1 = %timeit -o df.groupby(['id','category']).size().unstack(fill_value=0)
    t2 = %timeit -o pd.crosstab(df['id'], df['category'])
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['pd.DataFrame.groupby,pd.core.groupby.GroupBy.size,pd.Series.unstack vs pd.crosstab'] = speedup

1e+03 µs ± 11.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
4.89 ms ± 66.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
1.04 ms ± 29.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
4.94 ms ± 95.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
1.41 ms ± 32 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
5.79 ms ± 414 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
6.31 ms ± 257 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
12 ms ± 496 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
72.6 ms ± 2.89 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
114 ms ± 4.79 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


np.array vs np.fromiter

In [14]:
speedup = []
for d in datasize:
    a = [x for x in range(d)]
    r1 = np.fromiter(a, dtype=int)
    r2 = np.array(a)
    assert np.allclose(r1, r2)
    t1 = %timeit -o np.fromiter(a, dtype=int)
    t2 = %timeit -o np.array(a)
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.array vs np.fromiter'] = speedup

2.67 µs ± 79 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
3.66 µs ± 33.7 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
15.8 µs ± 259 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
31.3 µs ± 356 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
143 µs ± 4.68 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
304 µs ± 1.78 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.38 ms ± 18.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
3 ms ± 21.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
16.3 ms ± 284 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
32.4 ms ± 141 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


np.einsum vs np.ndarray.dot

In [15]:
speedup = []
for d in datasize:
    A = np.ones((d,1,1))
    v = np.array([2])
    r1 = np.einsum('i, jki', v, A)
    r2 = A.dot(v)
    assert np.allclose(r1, r2)
    t1 = %timeit -o np.einsum('i, jki', v, A)
    t2 = %timeit -o A.dot(v)
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.einsum vs np.ndarray.dot'] = speedup

1.71 µs ± 43.7 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
3.57 µs ± 29.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
3.05 µs ± 30.1 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
28.2 µs ± 783 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
14.3 µs ± 83.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
274 µs ± 1.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
118 µs ± 2.07 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
2.78 ms ± 78.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
3.88 ms ± 131 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
29.7 ms ± 570 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


np.ndarray.astype vs np.where

In [16]:
speedup = []
for d in datasize:
    np.random.seed(2)
    A = np.random.rand(1,d)
    r1 = (A > 0.5).astype(int)
    r2 = np.where(A > 0.5, 1, 0)
    assert np.allclose(r1, r2)
    t1 = %timeit -o (A > 0.5).astype(int)
    t2 = %timeit -o np.where(A > 0.5, 1, 0)
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.ndarray.astype vs np.where'] = speedup

1.11 µs ± 3.19 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
1.96 µs ± 49.1 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
1.97 µs ± 90.9 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
3.24 µs ± 151 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
8.46 µs ± 227 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
11 µs ± 402 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
70.9 µs ± 2.73 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
82.4 µs ± 633 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
1.79 ms ± 31.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
1.91 ms ± 23 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


pd.Series.apply vs np.ndarray.tolist,pd.DataFrame

In [17]:
speedup = []
for d in datasize:
    df = pd.DataFrame(data=[[list(x)] for x in np.random.rand(d,2)])
    r1 = pd.DataFrame(df[0].values.tolist())
    r2 = df[0].apply(pd.Series)
    assert r1.equals(r2)
    t1 = %timeit -o pd.DataFrame(df[0].values.tolist())
    t2 = %timeit -o df[0].apply(pd.Series)
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['pd.Series.apply vs np.ndarray.tolist,pd.DataFrame'] = speedup

144 µs ± 736 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
8.21 ms ± 78.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
256 µs ± 1.35 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
81.1 ms ± 1.52 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
1.35 ms ± 27.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
830 ms ± 4.36 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
16.1 ms ± 156 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
8.41 s ± 29 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
The slowest run took 30.44 times longer than the fastest. This could mean that an intermediate result is being cached.
1.01 s ± 1.99 s per loop (mean ± std. dev. of 7 runs, 10 loops each)
1min 26s ± 438 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


np.where vs pd.DataFrame.apply

In [18]:
speedup = []
for d in datasize:
    df = pd.DataFrame(np.random.randn(d, 1), columns=['a'])
    r1 = np.where(df.a > 0, 1, -1)
    r2 = df.apply(lambda row: 1 if row['a'] > 0 else -1, axis = 1)   
    assert np.allclose(r1, r2) == True
    t1 = %timeit -o np.where(df.a > 0, 1, -1)
    t2 = %timeit -o df.apply(lambda row: 1 if row['a'] > 0 else -1, axis = 1)
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.where vs pd.DataFrame.apply'] = speedup

81.7 µs ± 1.53 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
1.12 ms ± 3.78 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
83.5 µs ± 606 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
8.21 ms ± 21.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
96 µs ± 578 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
80.7 ms ± 974 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
384 µs ± 2.53 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
800 ms ± 9.78 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.25 ms ± 15.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
7.99 s ± 41 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


pd.DataFrame.ix vs pd.DataFrame.loc

In [19]:
speedup = []
for d in datasize:
    df = pd.DataFrame(np.random.randn(d, 1), columns=['a'])
    r1 = df.loc[:, 'a']
    r2 = df.ix[:, 'a']
    assert r1.equals(r2)
    t1 = %timeit -o df.loc[:, 'a']
    t2 = %timeit -o df.ix[:, 'a']
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['pd.DataFrame.ix vs pd.DataFrame.loc'] = speedup

18.3 µs ± 381 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
38.5 µs ± 503 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
18.3 µs ± 81.6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
39.4 µs ± 1.18 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
18.1 µs ± 83 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
38.8 µs ± 1.07 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
18.3 µs ± 432 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
38 µs ± 468 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
18.2 µs ± 245 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
38 µs ± 258 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


pd.DataFrame.loc vs pd.DataFrame.reindex

In [20]:
speedup = []
for d in datasize:
    df = pd.DataFrame(np.random.randn(d, 1))
    l = np.random.randint(d, size=50)
    r1 = df.reindex(l)
    r2 = df.loc[l]
    assert r1.equals(r2)
    t1 = %timeit -o df.reindex(l)
    t2 = %timeit -o df.loc[l]
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['pd.DataFrame.loc vs pd.DataFrame.reindex'] = speedup

128 µs ± 2.08 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
284 µs ± 3.09 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
127 µs ± 558 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
371 µs ± 7.37 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
128 µs ± 1.31 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
201 µs ± 4.44 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
127 µs ± 1.23 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
197 µs ± 1.38 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
128 µs ± 1.4 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
199 µs ± 4.38 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


pd.Series.apply vs pd.Series.map

In [21]:
speedup = []
for d in datasize:
    df = pd.DataFrame(data=[[list(x)] for x in np.random.rand(d,2)], columns=['a'])
    r1 = df['a'].map(lambda x: x[0])
    r2 = df['a'].apply(lambda x: x[0])
    assert r1.equals(r2)
    t1 = %timeit -o df['a'].map(lambda x: x[0])
    t2 = %timeit -o df['a'].apply(lambda x: x[0])
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['pd.Series.apply vs pd.Series.map'] = speedup

77.7 µs ± 676 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
89.7 µs ± 1.08 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
234 µs ± 7.53 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
242 µs ± 2.57 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.86 ms ± 12.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.89 ms ± 19.9 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
19.5 ms ± 398 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
19.3 ms ± 423 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
201 ms ± 1.85 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
201 ms ± 1.87 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


pd.DataFrame.iloc vs pd.DataFrame.loc

In [22]:
speedup = []
for d in datasize:
    df = pd.DataFrame(np.random.randn(d, 1))
    l = np.random.randint(d, size=50)
    r1 = df.iloc[l]
    r2 = df.loc[l]
    assert r1.equals(r2)
    t1 = %timeit -o df.iloc[l]
    t2 = %timeit -o df.loc[l]
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['pd.DataFrame.iloc vs pd.DataFrame.loc'] = speedup

119 µs ± 1.59 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
285 µs ± 5.03 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
116 µs ± 1.2 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
369 µs ± 4.05 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
118 µs ± 1.24 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
1.13 ms ± 12.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
117 µs ± 1.13 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
198 µs ± 2.61 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
120 µs ± 1.91 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
199 µs ± 1.61 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


pd.DataFrame.iat vs pd.DataFrame.iloc

In [23]:
speedup = []
for d in datasize:
    df = pd.DataFrame(np.random.randn(d, 1))
    r = random.randint(0, 10)
    r1 = df.iat[r, 0]
    r2 = df.iloc[r, 0]
    assert r1 == r2
    t1 = %timeit -o df.iat[r, 0]
    t2 = %timeit -o df.iloc[r, 0]
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['pd.DataFrame.iat vs pd.DataFrame.iloc'] = speedup

3.81 µs ± 121 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
6.09 µs ± 245 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
3.79 µs ± 130 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
6.01 µs ± 133 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
3.91 µs ± 161 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
5.9 µs ± 134 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
3.94 µs ± 238 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
5.97 µs ± 13.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
3.79 µs ± 40.6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
5.98 µs ± 203 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


np.count_nonzero vs np.ndarray.sum

In [24]:
speedup = []
for d in datasize:
    a = np.random.random_sample(d)
    r1 = np.count_nonzero(a > 0.5)
    r2 = (a > 0.5).sum()
    assert np.allclose(r1,r2)
    t1 = %timeit -o np.count_nonzero(a > 0.5)
    t2 = %timeit -o (a > 0.5).sum()
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.count_nonzero vs np.ndarray.sum'] = speedup

956 ns ± 32 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
2.62 µs ± 126 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
1.5 µs ± 24.4 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
3.65 µs ± 20.6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
5.29 µs ± 73.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
16.2 µs ± 183 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
54.2 µs ± 501 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
115 µs ± 2.77 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
780 µs ± 5.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.34 ms ± 16.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


pd.Series.apply vs pd.Series.astype

In [25]:
speedup = []
for d in datasize:
    data = list(map(str, np.random.randint(d, size=d)))
    df = pd.DataFrame({'a': data})
    r1 = df['a'].astype(np.int64)
    r2 = df['a'].apply(lambda x: int(x))
    assert r1.equals(r2)
    t1 = %timeit -o df['a'].astype(np.int64)
    t2 = %timeit -o df['a'].apply(lambda x: int(x))
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['pd.Series.apply vs pd.Series.astype'] = speedup

43.1 µs ± 502 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
125 µs ± 2.4 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
118 µs ± 2.31 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
454 µs ± 19.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
855 µs ± 7.92 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
3.87 ms ± 123 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
8.9 ms ± 300 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
39.3 ms ± 575 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
95.9 ms ± 1.82 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
400 ms ± 7.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


pd.Series.apply vs pd.to_datetime

In [26]:
from dateutil import parser
speedup = []
for d in datasize:
    dates = pd.date_range('2015', freq='min', periods=d)
    dates = [date.strftime('%d %b %Y %H:%M:%S') for date in dates]
    ser = pd.Series(dates)
    r1 = pd.to_datetime(dates)
    r2 = ser.apply(lambda x: parser.parse(x))
    assert r1.to_series(index=range(d)).equals(r2)
    t1 = %timeit -o pd.to_datetime(dates)
    t2 = %timeit -o ser.apply(lambda x: parser.parse(x))
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['pd.Series.apply vs pd.to_datetime'] = speedup

8.68 ms ± 62 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
6 ms ± 121 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
86.9 ms ± 1.2 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
57.5 ms ± 298 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
852 ms ± 9.86 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
577 ms ± 11.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
8.69 s ± 80.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
5.8 s ± 63.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
The slowest run took 19.97 times longer than the fastest. This could mean that an intermediate result is being cached.
5min 20s ± 9min 32s per loop (mean ± std. dev. of 7 runs, 1 loop each)
59.4 s ± 41.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


np.linalg.norm vs np.ndarray.sum,np.sqrt

In [27]:
speedup = []
for d in datasize:
    a = np.random.randn(d,1)
    r1 = np.sqrt(((a[1:2] - a) ** 2).sum(1))
    r2 = np.linalg.norm(a[1:2] - a, axis=1)
    assert np.allclose(r1,r2) == True
    t1 = %timeit -o np.sqrt(((a[1:2] - a) ** 2).sum(1))
    t2 = %timeit -o np.linalg.norm(a[1:2] - a, axis=1)
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.linalg.norm vs np.ndarray.sum,np.sqrt'] = speedup

4.89 µs ± 30 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
5.82 µs ± 78 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
10.8 µs ± 80.6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
12 µs ± 151 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
33.2 µs ± 601 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
35.8 µs ± 694 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
247 µs ± 4.21 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
249 µs ± 4.77 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
8.66 ms ± 198 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
8.89 ms ± 162 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


np.empty vs np.full

In [28]:
speedup = []
for d in datasize:
    a = np.random.randn(d,1)
    r1 = np.empty(d); r1[:] = 0
    r2 = np.full(d, 0)
    assert np.allclose(r1,r2) == True
    t1 = %timeit -o r1 = np.empty(d); r1[:] = 0
    t2 = %timeit -o np.full(d, 0)
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.empty vs np.full'] = speedup

926 ns ± 16.4 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
1.34 µs ± 10.5 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
1.24 µs ± 19.3 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
1.65 µs ± 19.4 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
4.2 µs ± 50 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
2.66 µs ± 43.7 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
27.7 µs ± 403 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
15.9 µs ± 89.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
2.68 ms ± 64.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
1.33 ms ± 13.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


np.count_nonzero vs np.sum

In [29]:
speedup = []
for d in datasize:
    a = np.random.randn(d)
    r1 = np.count_nonzero(a>0)
    r2 = np.sum(a>0)
    assert np.allclose(r1,r2) == True
    t1 = %timeit -o np.count_nonzero(a>0)
    t2 = %timeit -o np.sum(a>0)
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.count_nonzero vs np.sum'] = speedup

1.12 µs ± 12.4 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
3.34 µs ± 66 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
1.74 µs ± 5.41 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
4.71 µs ± 171 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
5.58 µs ± 35 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
14.4 µs ± 266 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
48 µs ± 697 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
107 µs ± 1.21 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
537 µs ± 24.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.1 ms ± 8.34 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


pd.Series.map vs pd.Series.replace

In [30]:
speedup = []
for d in datasize:
    df = pd.DataFrame({'A': np.random.randint(1,100,d)})
    mapvalue = {i: i+1 for i in range(100)}
    r1 = df['A'].map(mapvalue)   
    r2 = df['A'].replace(mapvalue)
    assert r1.equals(r2)
    t1 = %timeit -o df['A'].map(mapvalue)
    t2 = %timeit -o df['A'].replace(mapvalue)
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['pd.Series.map vs pd.Series.replace'] = speedup

273 µs ± 2.21 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
2.17 ms ± 135 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
284 µs ± 3.51 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
2.63 ms ± 31.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
353 µs ± 3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
3.58 ms ± 120 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
984 µs ± 16.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
12.8 ms ± 70.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
13.6 ms ± 119 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
130 ms ± 3.86 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


pd.DataFrame.loc vs pd.DataFrame.query

In [31]:
speedup = []
for d in datasize:
    df = pd.DataFrame(np.random.randn(d, 1), columns=['a'])
    r1 = df.loc[df['a'] > 0]
    r2 = df.query("a>0")
    assert r1.equals(r2)
    t1 = %timeit -o df.loc[df['a'] > 0]
    t2 = %timeit -o df.query("a>0")
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['pd.DataFrame.loc vs pd.DataFrame.query'] = speedup

252 µs ± 6.84 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
896 µs ± 11.9 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
260 µs ± 3.51 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
913 µs ± 10.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
332 µs ± 5.71 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.16 ms ± 9.95 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
966 µs ± 5.96 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.91 ms ± 25.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
11.3 ms ± 79.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
16.1 ms ± 75.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


np.argmax vs np.where

In [32]:
speedup = []
for d in datasize:
    a = np.random.randn(d)
    r1 = np.argmax(a>0)
    r2 = np.where(a>0)[0][0]
    assert r1 == r2
    t1 = %timeit -o np.argmax(a>0)
    t2 = %timeit -o np.where(a>0)[0][0]
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.argmax vs np.where'] = speedup

1.94 µs ± 109 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
1.47 µs ± 18.7 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
2.12 µs ± 35.6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
5.38 µs ± 208 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
4.31 µs ± 104 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
38.3 µs ± 468 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
24.4 µs ± 477 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
368 µs ± 4.24 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
287 µs ± 28.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
4.88 ms ± 58.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


pd.DataFrame.groupby,pd.core.groupby.GroupBy.size,pd.Series.unstack vs pd.DataFrame.groupby,pd.core.groupby.SeriesGroupBy.value_counts,pd.Series.unstack

In [33]:
speedup = []
for d in datasize:
    df = pd.DataFrame(columns=['id', 'category'])
    df['id'] = np.random.randint(3, size=d)
    df['category'] = np.random.choice(['a','b','c'], d)
    r1 = df.groupby(['id','category']).size().unstack(fill_value=0) 
    r2 = df.groupby('id').category.value_counts().unstack(fill_value=0)
    assert r1.equals(r2)
    t1 = %timeit -o df.groupby(['id','category']).size().unstack(fill_value=0)
    t2 = %timeit -o df.groupby('id').category.value_counts().unstack(fill_value=0)
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['pd.DataFrame.groupby,pd.core.groupby.GroupBy.size,pd.Series.unstack vs pd.DataFrame.groupby,pd.core.groupby.SeriesGroupBy.value_counts,pd.Series.unstack'] = speedup

979 µs ± 3.97 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
964 µs ± 10.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.02 ms ± 3.87 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.07 ms ± 27.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.43 ms ± 20 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
2.09 ms ± 31.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
5.13 ms ± 52.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
15 ms ± 161 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
75.1 ms ± 1.09 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
207 ms ± 4.54 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


np.nonzero vs np.where

In [34]:
speedup = []
for d in datasize:
    a = np.random.randn(d)
    r1 = np.where(a > 0.5)
    r2 = np.nonzero(a > 0.5)
    assert np.allclose(r1,r2) == True
    t1 = %timeit -o np.where(a > 0.5)
    t2 = %timeit -o np.nonzero(a > 0.5)
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.nonzero vs np.where'] = speedup

1.08 µs ± 19.5 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
1.28 µs ± 7.43 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
3.72 µs ± 99.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
3.95 µs ± 19.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
30.7 µs ± 376 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
31.2 µs ± 312 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
299 µs ± 2.44 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
300 µs ± 2.11 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
3.81 ms ± 89.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
3.76 ms ± 41.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


np.ndarray.dot vs np.tensordot

In [35]:
speedup = []
for d in datasize:
    a = np.random.randint(100, size=(5, int(d/5)))
    b = np.random.randint(100, size=(5, int(d/5)))
    r1 = b.dot(a.T)
    r2 = np.tensordot(b,a,axes=((1,1)))
    assert np.allclose(r1, r2) == True
    t1 = %timeit -o  b.dot(a.T)
    t2 = %timeit -o np.tensordot(b,a,axes=((1,1)))
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.ndarray.dot vs np.tensordot'] = speedup

986 ns ± 9.22 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
11.8 µs ± 277 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
2.85 µs ± 41.4 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
13.7 µs ± 159 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
20 µs ± 347 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
31.1 µs ± 254 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
193 µs ± 1.49 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
203 µs ± 3.69 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.93 ms ± 7.64 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.96 ms ± 76.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


pd.DataFrame.combine_first vs pd.DataFrame.fillna

In [36]:
speedup = []
for d in datasize:
    indices = np.random.randint(d, size=int(d/2))
    df1 = pd.DataFrame(np.random.randn(d, 1))
    df2 = df1.copy()
    df2.iloc[indices, 0] =np.nan
    r1 = df1.fillna(df2)
    r2 = df1.combine_first(df2)
    assert r1.equals(r2)
    t1 = %timeit -o df1.fillna(df2)
    t2 = %timeit -o df1.combine_first(df2)
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['pd.DataFrame.combine_first vs pd.DataFrame.fillna'] = speedup

710 µs ± 14.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
682 µs ± 4.25 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
706 µs ± 5.05 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
705 µs ± 10.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
737 µs ± 4.72 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
809 µs ± 4.46 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
995 µs ± 18.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.97 ms ± 35.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
9.17 ms ± 117 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
15.5 ms ± 95.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


np.append vs np.hstack

In [37]:
speedup = []
for d in datasize:
    A = np.arange(d)
    r1 = np.append(A,A)
    r2 = np.hstack((A,A))
    assert np.allclose(r1, r2) == True
    t1 = %timeit -o np.append(A,A)
    t2 = %timeit -o np.hstack((A,A))
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.append vs np.hstack'] = speedup

2.31 µs ± 44.2 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
2.4 µs ± 59.4 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
2.39 µs ± 75.8 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
2.68 µs ± 24.8 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
5.4 µs ± 30.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
5.39 µs ± 104 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
34.1 µs ± 616 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
34.8 µs ± 1.06 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
1.58 ms ± 19.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.61 ms ± 11.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


np.c_ vs np.hstack

In [38]:
speedup = []
for d in datasize:
    A = np.random.rand(d,1)
    B = np.random.rand(d,1)
    r1 = np.hstack((A,B))
    r2 = np.c_[A, B]
    assert np.allclose(r1, r2) == True
    t1 = %timeit -o np.hstack((A,B))
    t2 = %timeit -o np.c_[A, B]
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.c_ vs np.hstack'] = speedup

2.69 µs ± 113 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
12.6 µs ± 439 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
3.53 µs ± 74.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
13.1 µs ± 258 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
15.1 µs ± 164 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
25.7 µs ± 689 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
559 µs ± 3.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
568 µs ± 5.09 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
6.57 ms ± 73.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
7.2 ms ± 1.21 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)


np.full vs np.zeros

In [39]:
speedup = []
for d in datasize:
    r1 = np.zeros(d)
    r2 = np.full(d, 0)
    assert np.allclose(r1, r2) == True
    t1 = %timeit -o np.zeros(d)
    t2 = %timeit -o np.full(d, 0)
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.full vs np.zeros'] = speedup

548 ns ± 17.2 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
1.41 µs ± 37.5 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
866 ns ± 23.5 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
1.67 µs ± 13.6 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
3.36 µs ± 101 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
2.69 µs ± 48.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
25.1 µs ± 426 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
13 µs ± 94.1 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
92.5 µs ± 783 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
1.3 ms ± 12.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


np.concatenate vs np.vstack

In [40]:
speedup = []
for d in datasize:
    a = np.random.normal(size=(1,d))
    r1 = np.concatenate((a,a),axis=0)
    r2 = np.vstack((a,a))
    assert np.allclose(r1, r2) == True
    t1 = %timeit -o np.concatenate((a,a),axis=0)
    t2 = %timeit -o np.vstack((a,a))
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.concatenate vs np.vstack'] = speedup

1.08 µs ± 11.3 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
2.42 µs ± 47.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
1.36 µs ± 17.7 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
2.81 µs ± 73.1 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
7.03 µs ± 42.7 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
8.69 µs ± 26.1 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
551 µs ± 1.22 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
554 µs ± 2.49 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
3.11 ms ± 95.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
3.11 ms ± 90.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


np.atleast_2d vs np.ndarray.reshape

In [41]:
speedup = []
for d in datasize:
    x = np.random.randint(10, size=d).reshape(5,int(d/5))
    y = np.arange(5)
    r1 = x * y.reshape(-1, 1)
    r2 = x * np.atleast_2d(y).T
    assert np.allclose(r1, r2) == True
    t1 = %timeit -o x * y.reshape(-1, 1)
    t2 = %timeit -o x * np.atleast_2d(y).T
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.atleast_2d vs np.ndarray.reshape'] = speedup

1.25 µs ± 35.8 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
2.09 µs ± 103 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
2.12 µs ± 45.9 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
2.93 µs ± 112 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
8.81 µs ± 54.9 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
10 µs ± 150 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
59.7 µs ± 387 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
60.9 µs ± 503 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
1.52 ms ± 14.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.53 ms ± 21.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


np.arange vs np.array

In [42]:
speedup = []
for d in datasize:
    r1 = np.arange(d)
    r2 = np.array(range(d))
    assert np.allclose(r1, r2) == True
    t1 = %timeit -o np.arange(d)
    t2 = %timeit -o np.array(range(d))
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.arange vs np.array'] = speedup

521 ns ± 20.4 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
7.73 µs ± 214 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
909 ns ± 14.1 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
64.1 µs ± 1.14 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
4.87 µs ± 145 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
648 µs ± 5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
43.8 µs ± 296 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
6.53 ms ± 160 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
1.38 ms ± 6.87 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
80.2 ms ± 1.69 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


np.einsum vs np.ndarray.dot,np.einsum

In [43]:
speedup = []
for d in datasize:
    X = np.random.randn(d, 1)
    M = np.random.rand(1, 1)
    r1 = np.einsum('ij,ij->i',X.dot(M),X)
    r2 = np.einsum('ij,jk,ik->i', X, M, X)
    assert np.allclose(r1, r2) == True
    t1 = %timeit -o np.einsum('ij,ij->i',X.dot(M),X)
    t2 = %timeit -o np.einsum('ij,jk,ik->i', X, M, X)
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.einsum vs np.ndarray.dot,np.einsum'] = speedup

1.98 µs ± 71.6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
1.66 µs ± 22.8 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
3.34 µs ± 104 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
2.77 µs ± 100 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
23.3 µs ± 623 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
11.8 µs ± 95.2 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
170 µs ± 1.01 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
86.9 µs ± 2.37 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
7.65 ms ± 121 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
3.35 ms ± 27.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


pd.Series.iat vs pd.Series.iloc

In [44]:
speedup = []
for d in datasize:
    df = pd.DataFrame(np.random.randn(d, 1), columns=['a'])
    r = random.randint(0, 10)
    r1 = df['a'].iat[r]
    r2 = df['a'].iloc[r]
    assert r1 == r2
    t1 = %timeit -o df['a'].iat[r]
    t2 = %timeit -o df['a'].iloc[r]
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['pd.Series.iat vs pd.Series.iloc'] = speedup

4.26 µs ± 111 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
7.93 µs ± 91.1 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
4.12 µs ± 122 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
8.02 µs ± 279 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
4.11 µs ± 103 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
7.83 µs ± 80.6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
4.11 µs ± 221 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
7.78 µs ± 173 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
4.1 µs ± 93.4 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
7.91 µs ± 292 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


np.cumprod vs pd.DataFrame.cumprod

In [45]:
speedup = []
for d in datasize:
    arr = np.random.randn(d, 1)
    df = pd.DataFrame(arr)
    r1 = np.cumprod(1 + arr, axis=0) - 1
    r2 = ((1 + df).cumprod(axis=0) - 1)
    assert np.allclose(r1,r2) == True
    t1 = %timeit -o np.cumprod(1 + arr, axis=0) - 1
    t2 = %timeit -o ((1 + df).cumprod(axis=0) - 1)
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.cumprod vs pd.DataFrame.cumprod'] = speedup

3.6 µs ± 92.1 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
314 µs ± 4.58 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
6.61 µs ± 59.2 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
321 µs ± 3.03 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
38.8 µs ± 484 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
371 µs ± 2.56 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
296 µs ± 1.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.18 ms ± 10.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
9.47 ms ± 23.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
12.6 ms ± 257 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


np.array vs np.concatenate

In [46]:
speedup = []
for d in datasize:
    X = np.random.randn(d)
    r1 = np.concatenate((X[None], X[None]), 0)
    r2 = np.array([X, X])
    assert np.allclose(r1, r2) == True
    t1 = %timeit -o np.concatenate((X[None], X[None]), 0)
    t2 = %timeit -o np.array([X, X])
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.array vs np.concatenate'] = speedup

1.23 µs ± 24.5 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
1.22 µs ± 17.6 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
1.49 µs ± 50.8 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
1.46 µs ± 16.2 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
7.24 µs ± 129 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
7.25 µs ± 47.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
552 µs ± 2.04 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
560 µs ± 16.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
3.13 ms ± 59 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
3.2 ms ± 59.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


pd.DataFrame.pivot_table vs pd.crosstab

In [47]:
speedup = []
for d in datasize:
    df = pd.DataFrame(columns=['id', 'category'])
    df['id'] = np.random.randint(3, size=d)
    df['category'] = np.random.choice(['a','b','c'], d)
    r1 = df.pivot_table(index='id', columns='category', aggfunc=len, fill_value=0)
    r2 = pd.crosstab(df['id'], df['category'])
    assert r1.equals(r2)
    t1 = %timeit -o df.pivot_table(index='id', columns='category', aggfunc=len, fill_value=0)
    t2 = %timeit -o pd.crosstab(df['id'], df['category'])
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['pd.DataFrame.pivot_table vs pd.crosstab'] = speedup

2.37 ms ± 60.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
4.68 ms ± 54.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
2.54 ms ± 135 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
4.83 ms ± 60.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
3.09 ms ± 40 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
5.27 ms ± 74.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
9.59 ms ± 299 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
10.2 ms ± 156 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
126 ms ± 2.88 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
113 ms ± 1.57 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


pd.Series.map vs pd.Series.str

In [48]:
speedup = []
for d in datasize:
    data = list(map(str, np.random.randint(d, size=d)))
    df = pd.DataFrame({'a': data})
    r1 = df['a'].map(lambda x: x[0])
    r2 = df["a"].str[0]
    assert r1.equals(r2)
    t1 = %timeit -o df['a'].map(lambda x: x[0])
    t2 = %timeit -o df["a"].str[0]
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['pd.Series.map vs pd.Series.str'] = speedup

93.2 µs ± 2.82 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
104 µs ± 1.68 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
203 µs ± 2.06 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
422 µs ± 10.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.3 ms ± 46.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
3.39 ms ± 156 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
11.6 ms ± 159 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
31.8 ms ± 666 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
125 ms ± 1.71 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
339 ms ± 9.64 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


np.where vs pd.Series.map

In [50]:
speedup = []
for d in datasize:
    df = pd.DataFrame(np.random.randn(d, 1), columns=['a'])
    r1 = np.where(df.a > 0, 1, -1)
    r2 = df.a.map(lambda x: 1 if x > 0 else -1)   
    assert np.allclose(r1, r2) == True
    t1 = %timeit -o np.where(df.a > 0, 1, -1)
    t2 = %timeit -o df.a.map(lambda x: 1 if x > 0 else -1)   
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.where vs pd.Series.map'] = speedup

84.2 µs ± 758 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
130 µs ± 3.16 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
84.1 µs ± 1.11 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
366 µs ± 8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
95.5 µs ± 1.61 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
2.72 ms ± 55.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
183 µs ± 3.15 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
26.6 ms ± 473 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
2.07 ms ± 18.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
276 ms ± 10.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


pd.DataFrame.groupby,pd.core.groupby.DataFrameGroupBy.filter vs
pd.DataFrame.groupby,pd.Series.transform

In [51]:
speedup = []
for d in datasize:
    df = pd.DataFrame(dict(A = np.random.randint(0,1000,size=d),B=np.random.randn(d)))
    r1 = df.groupby('A').filter(lambda x: len(x) > 1000)
    r2 = df[df.groupby(['A'])['A'].transform('size') > 1000]
    assert r1.equals(r2)
    t1 = %timeit -o df.groupby('A').filter(lambda x: len(x) > 1000)
    t2 = %timeit -o df[df.groupby(['A'])['A'].transform('size') > 1000]
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['pd.DataFrame.groupby,pd.core.groupby.DataFrameGroupBy.filter vs pd.DataFrame.groupby,pd.Series.transform'] = speedup

6.77 ms ± 124 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
813 µs ± 5.72 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
41.6 ms ± 340 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
893 µs ± 18.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
65.1 ms ± 594 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
1.08 ms ± 5.31 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
68.3 ms ± 475 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
2.78 ms ± 78.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
210 ms ± 10 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
50.1 ms ± 1.66 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


np.einsum,np.sqrt vs np.ndarray.sum,np.sqrt

In [52]:
speedup = []
for d in datasize:
    a = np.arange(d).reshape((-1,5))
    r1 = np.sqrt(np.einsum('ij,ij->i',a,a))
    r2 = np.sqrt((a*a).sum(axis=1))
    np.testing.assert_equal(r1,r2)
    t1 = %timeit -o np.sqrt(np.einsum('ij,ij->i',a,a))
    t2 = %timeit -o np.sqrt((a*a).sum(axis=1))
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.einsum,np.sqrt vs np.ndarray.sum,np.sqrt'] = speedup

2.38 µs ± 26.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
3.76 µs ± 183 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
4.25 µs ± 94.1 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
6.89 µs ± 120 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
20.4 µs ± 452 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
35.6 µs ± 440 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
471 µs ± 12.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
607 µs ± 12.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
5.59 ms ± 224 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
7.76 ms ± 124 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


pd.read_csv vs pd.read_hdf

In [53]:
speedup = []
for d in datasize:
    df = pd.DataFrame(np.arange(d).reshape((-1,5)))
    df.to_hdf('temp.h5', key='df', mode='w')
    df.to_csv('temp.csv', index=False)
    r1 = pd.read_csv('temp.csv')
    r2 = pd.read_hdf('temp.h5', 'df')
    t1 = %timeit -o pd.read_csv('temp.csv')
    t2 = %timeit -o pd.read_hdf('temp.h5', 'df')   
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['pd.read_csv vs pd.read_hdf'] = speedup

974 µs ± 3.13 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
2.51 ms ± 61.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
1.2 ms ± 12.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
2.55 ms ± 26.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
3.78 ms ± 91.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
2.59 ms ± 25.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
33.7 ms ± 2.32 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
2.59 ms ± 38.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
402 ms ± 29.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
4.77 ms ± 174 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


np.column_stack vs np.transpose

In [54]:
speedup = []
for d in datasize:
    a = np.random.randint(d, size=d)
    r1 = np.transpose([a,a])
    r2 = np.column_stack((a,a))
    assert np.allclose(r1,r2)
    t1 = %timeit -o np.transpose([a,a])
    t2 = %timeit -o np.column_stack((a,a))
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.column_stack vs np.transpose'] = speedup

3.06 µs ± 79.7 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
2.46 µs ± 159 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
3.36 µs ± 42.6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
3.05 µs ± 28.1 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
6.19 µs ± 43.2 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
10.7 µs ± 197 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
34.8 µs ± 754 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
81.5 µs ± 458 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
1.59 ms ± 21.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
2.95 ms ± 27.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


np.sort,pd.DataFrame,pd.DataFrame.duplicated vs pd.DataFrame.apply,pd.DataFrame.duplicated

In [55]:
speedup = []
for d in datasize:
    df = pd.DataFrame({'A': np.random.randint(10,size=d).astype(str),
                       'B': np.random.randint(10,size=d).astype(str)})
    r1 = df[~pd.DataFrame(np.sort(df.values, axis=1), index=df.index, columns=df.columns).duplicated()]
    r2 = df[~df[['A', 'B']].apply(frozenset, axis=1).duplicated()]
    assert r1.equals(r2)
    t1 = %timeit -o df[~pd.DataFrame(np.sort(df.values, axis=1), index=df.index, columns=df.columns).duplicated()]
    t2 = %timeit -o df[~df[['A', 'B']].apply(frozenset, axis=1).duplicated()]
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.sort,pd.DataFrame,pd.DataFrame.duplicated vs pd.DataFrame.apply,pd.DataFrame.duplicated'] = speedup

601 µs ± 17.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.83 ms ± 37.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
737 µs ± 12.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
10.5 ms ± 346 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
2.06 ms ± 149 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
94.6 ms ± 785 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
14.7 ms ± 117 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
963 ms ± 10.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
155 ms ± 1.61 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
9.61 s ± 50.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


np.empty vs np.zeros

In [56]:
speedup = []
for d in datasize:
    r1 = np.empty(d); r1[:] = 0
    r2 = np.zeros(d)
    assert np.allclose(r1,r2) == True
    t1 = %timeit -o r1 = np.empty(d); r1[:] = 0
    t2 = %timeit -o np.zeros(d)
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.empty vs np.zeros'] = speedup

900 ns ± 13 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
535 ns ± 5.35 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
1.26 µs ± 17.3 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
860 ns ± 11.1 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
3.45 µs ± 108 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
3.08 µs ± 23.8 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
22.8 µs ± 271 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
25 µs ± 372 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
2.59 ms ± 18.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
94.6 µs ± 1.37 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


pd.Series vs pd.Series.apply

In [57]:
speedup = []
for d in datasize:
    s = pd.Series(np.random.randint(1,100,d))
    r1 = pd.Series([[a] for a in s])
    r2 = s.apply(lambda x: [x])
    assert r1.equals(r2)
    t1 = %timeit -o pd.Series([[a] for a in s])
    t2 = %timeit -o s.apply(lambda x: [x])
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['pd.Series vs pd.Series.apply'] = speedup

64.7 µs ± 270 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
119 µs ± 854 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
137 µs ± 1.21 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
251 µs ± 5.73 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
838 µs ± 16.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.48 ms ± 17.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
8.63 ms ± 119 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
13.7 ms ± 213 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
129 ms ± 487 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
157 ms ± 2.33 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


np.column_stack vs np.vstack

In [58]:
speedup = []
for d in datasize:
    a = np.random.randint(d, size=d)
    r1 = np.vstack((a, a)).T
    r2 = np.column_stack((a,a))
    assert np.allclose(r1,r2) == True
    t1 = %timeit -o np.vstack((a, a)).T
    t2 = %timeit -o np.column_stack((a,a))
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.column_stack vs np.vstack'] = speedup

2.85 µs ± 81.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
2.36 µs ± 98.6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
3.1 µs ± 58.4 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
The slowest run took 5001.88 times longer than the fastest. This could mean that an intermediate result is being cached.
2.16 ms ± 5.29 ms per loop (mean ± std. dev. of 7 runs, 100000 loops each)
6.18 µs ± 301 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
11.2 µs ± 566 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
34.1 µs ± 648 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
80.7 µs ± 323 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
1.52 ms ± 12 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
2.99 ms ± 30.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


np.empty,np.ndarray.fill vs np.ones

In [59]:
speedup = []
for d in datasize:
    r1 = np.empty(d); r1[:] = 1
    r2 = np.ones(d)
    assert np.allclose(r1,r2) == True
    t1 = %timeit -o np.empty(d); r1[:] = 1
    t2 = %timeit -o np.ones(d)
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.empty,np.ndarray.fill vs np.ones'] = speedup

895 ns ± 10.5 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
1.14 µs ± 5.3 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
1.21 µs ± 9.73 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
1.5 µs ± 13.5 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
3.4 µs ± 19.2 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
3.8 µs ± 92.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
23.2 µs ± 445 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
23.1 µs ± 289 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
250 µs ± 3.14 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
2.63 ms ± 72.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


pd.Series.astype vs pd.Series.map

In [60]:
speedup = []
for d in datasize:
    df = pd.DataFrame(np.random.randn(d, 1), columns=['a'])
    r1 = df["a"].astype(str)
    r2 = df["a"].map(str)
    t1 = %timeit -o df["a"].astype(str)
    t2 = %timeit -o df["a"].map(str)
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['pd.Series.astype vs pd.Series.map'] = speedup

103 µs ± 574 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
177 µs ± 1.09 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
703 µs ± 6.04 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
866 µs ± 6.44 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
6.65 ms ± 42.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
7.72 ms ± 46.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
67.5 ms ± 375 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
76.4 ms ± 265 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
684 ms ± 8.89 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
788 ms ± 2.61 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


np.array vs np.hstack

In [61]:
speedup = []
for d in datasize:
    a = np.random.randint(d, size=d)
    r1 = np.array(a*2)
    r2 = np.hstack(a*2)
    assert np.allclose(r1,r2) == True
    t1 = %timeit -o np.array(a*2)
    t2 = %timeit -o np.hstack(a*2)
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.array vs np.hstack'] = speedup

997 ns ± 17 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
193 µs ± 1.9 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
2.28 µs ± 24.6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
1.91 ms ± 32.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
7.86 µs ± 44.9 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
18.9 ms ± 210 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
75 µs ± 945 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
204 ms ± 11.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.43 ms ± 109 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
2.06 s ± 51.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


np.repeat vs np.tile

In [62]:
speedup = []
for d in datasize:
    arr = np.arange(d)
    r1 = np.repeat(arr[None,:], 2, axis=0)
    r2 = np.tile(arr, (2, 1, 1))
    assert np.allclose(r1,r2) == True
    t1 = %timeit -o np.repeat(arr[None,:], 2, axis=0)
    t2 = %timeit -o  np.tile(arr, (2, 1, 1))
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.repeat vs np.tile'] = speedup

951 ns ± 18.6 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
2.99 µs ± 47.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
1.18 µs ± 11.8 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
3.41 µs ± 36.9 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
4.13 µs ± 28.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
6.75 µs ± 192 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
32.9 µs ± 656 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
35.6 µs ± 842 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
1.57 ms ± 37.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.67 ms ± 34.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [63]:
import pickle
with open('speedup-dell.pkl', 'wb') as f:
    pickle.dump(speedup_all, f)

In [3]:
import pickle
with open('speedup-dell.pkl', 'rb') as f:
    speedup_res = pickle.load(f)

In [65]:
speedup_all == speedup_res

True

Updated API pairs

np.ndarray.sum vs np.sum

In [19]:
speedup = []
for d in datasize:
    arr = np.arange(d)
    r1 = arr.sum()
    r2 = np.sum(arr)
    assert np.allclose(r1,r2) == True
    t1 = %timeit -o  arr.sum()
    t2 = %timeit -o  np.sum(arr)
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.ndarray.sum vs np.sum'] = speedup

1.38 µs ± 4.48 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
2.1 µs ± 31.4 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
1.96 µs ± 58.4 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
2.71 µs ± 87.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
5.03 µs ± 201 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
5.69 µs ± 144 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
34.4 µs ± 1.01 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
36.3 µs ± 1.82 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
380 µs ± 43.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
326 µs ± 9.44 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


pd.Series.map vs pd.DataFrame.replace

In [18]:
speedup = []
for d in datasize:
    df = pd.DataFrame(dict(gender=[f"mostly_{g}" for g in ['male', 'female'] * d]))
    r1 = df.gender.map({'mostly_male': 'male', 'mostly_female': 'female'})
    r2 = df.replace({'gender': {'mostly_': ''}}, regex=True)
    assert r1.equals(r2['gender']) == True
    t1 = %timeit -o df.gender.map({'mostly_male': 'male', 'mostly_female': 'female'})
    t2 = %timeit -o df.replace({'gender': {'mostly_': ''}}, regex=True)
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['pd.Series.map vs pd.DataFrame.replace'] = speedup

420 µs ± 12.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
745 µs ± 13.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
532 µs ± 12.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
2.84 ms ± 54.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
1.52 ms ± 73.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
27.1 ms ± 3.44 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
12.6 ms ± 239 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
246 ms ± 5.68 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
121 ms ± 5.2 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
2.58 s ± 60.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


pd.DataFrame.apply vs pd.Series.apply

In [17]:
speedup = []
for d in datasize:
    df = pd.DataFrame(np.arange(0, d, 1,), columns=["a"])
    r1 = df.apply(lambda x: x * 2)
    r2 = df.a.apply(lambda x: x * 2)
    pd.testing.assert_series_equal(r1['a'], r2, check_dtype=False)
    t1 = %timeit -o df.apply(lambda x: x * 2)
    t2 = %timeit -o df.a.apply(lambda x: x * 2)
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['pd.DataFrame.apply vs pd.Series.apply'] = speedup

1.08 ms ± 30 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
228 µs ± 20.9 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.14 ms ± 27.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
425 µs ± 19.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.11 ms ± 20.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
2.55 ms ± 84.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
1.85 ms ± 62.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
27.2 ms ± 814 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
6.56 ms ± 136 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
270 ms ± 8.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


np.where vs pd.DataFrame.where

In [16]:
speedup = []
for d in datasize:
    df = pd.DataFrame(np.random.randint(100, size=(d,3)))
    r1 = np.where(df.values <= 50, df.values, np.nan)
    r2 = df.where(df <= 50)
    assert pd.DataFrame(r1).equals(r2) == True
    t1 = %timeit -o np.where(df.values <= 50, df.values, np.nan)
    t2 = %timeit -o df.where(df <= 50)
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.where vs pd.DataFrame.where'] = speedup

15.6 µs ± 90.8 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
2.09 ms ± 40.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
22.7 µs ± 266 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
2.14 ms ± 20.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
68.1 µs ± 390 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
2.37 ms ± 40.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
1.16 ms ± 18.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
7.32 ms ± 223 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
13.8 ms ± 864 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
69.6 ms ± 1.84 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


np.frompyfunc vs np.array

In [15]:

speedup = []
for d in datasize:
    l = np.arange(0, d, 1)
    r1 = np.frompyfunc(oct, 1, 1)(l)
    r2 = np.array([oct(x) for x in l])
    assert np.array_equal(r1,r2) == True
    t1 = %timeit -o np.frompyfunc(oct, 1, 1)(l)
    t2 = %timeit -o np.array([oct(x) for x in l])
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.frompyfunc vs np.array'] = speedup

8.48 µs ± 195 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
24.2 µs ± 1.02 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
79.2 µs ± 1.59 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
238 µs ± 12.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
812 µs ± 31.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
2.29 ms ± 39.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
10 ms ± 497 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
27.9 ms ± 1.13 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
106 ms ± 4.18 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
306 ms ± 6.79 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


np.frompyfunc vs np.vectorize

In [14]:
speedup = []
for d in datasize:
    l = np.arange(0, d, 1)
    r1 = np.frompyfunc(oct, 1, 1)(l)
    r2 = np.vectorize(oct,otypes=[object])(l)
    assert np.array_equal(r1,r2) == True
    t1 = %timeit -o np.frompyfunc(oct, 1, 1)(l)
    t2 = %timeit -o np.vectorize(oct,otypes=[object])(l)
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.frompyfunc vs np.vectorize'] = speedup

8.64 µs ± 102 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
15.2 µs ± 170 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
85 µs ± 3.37 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
91.7 µs ± 2.59 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
862 µs ± 7.23 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
928 µs ± 24.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
9.69 ms ± 82.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
11 ms ± 346 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
114 ms ± 5.95 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
121 ms ± 4.06 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


pd.DataFrame.iat vs pd.DataFrame.loc

In [13]:
speedup = []
for d in datasize:
    df = pd.DataFrame(np.random.randn(d, 1))
    r = random.randint(0, d)
    r1 = df.iat[r, 0]
    r2 = df.loc[r, 0]
    assert r1 == r2
    t1 = %timeit -o df.iat[r, 0]
    t2 = %timeit -o df.loc[r, 0]
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['pd.DataFrame.iat vs pd.DataFrame.loc'] = speedup

4.44 µs ± 81.4 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
5.08 µs ± 103 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
4.53 µs ± 124 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
5.12 µs ± 38.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
4.35 µs ± 34.2 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
5.08 µs ± 114 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
4.76 µs ± 173 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
5.11 µs ± 91.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
4.54 µs ± 154 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
14.7 µs ± 316 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


pd.DataFrame.at vs pd.DataFrame.iloc

In [12]:
speedup = []
for d in datasize:
    df = pd.DataFrame(np.random.randn(d, 1))
    r = random.randint(0, d)
    r1 = df.at[r, 0]
    r2 = df.iloc[r, 0]
    assert r1 == r2
    t1 = %timeit -o df.at[r, 0]
    t2 = %timeit -o df.iloc[r, 0]
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['pd.DataFrame.at vs pd.DataFrame.iloc'] = speedup

3.26 µs ± 61.9 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
6.98 µs ± 251 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
3.17 µs ± 125 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
6.62 µs ± 94.8 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
3.1 µs ± 23.6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
6.69 µs ± 171 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
3.45 µs ± 50.9 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
6.61 µs ± 219 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
12.3 µs ± 195 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
6.72 µs ± 59.4 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


pd.DataFrame.at vs pd.DataFrame.iat

In [11]:
speedup = []
for d in datasize:
    df = pd.DataFrame(np.random.randn(d, 1))
    r = random.randint(0, d-1)
    r1 = df.at[r, 0]
    r2 = df.iat[r, 0]
    assert r1 == r2
    t1 = %timeit -o df.at[r, 0]
    t2 = %timeit -o df.iat[r, 0]
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['pd.DataFrame.at vs pd.DataFrame.iat'] = speedup

3.25 µs ± 47.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
4.33 µs ± 14.6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
3.16 µs ± 46 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
4.35 µs ± 61.8 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
3.21 µs ± 79.6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
4.61 µs ± 75.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
3.21 µs ± 86.9 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
4.47 µs ± 143 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
13.3 µs ± 280 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
4.61 µs ± 164 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


np.einsum,np.einsum vs np.einsum

In [5]:
speedup = []
for d in datasize:
    a, b= np.random.rand(int(d/5),5), np.random.rand(5,int(d/5))
    r1 = np.einsum('j,jk->k', np.einsum('ij->j', a), b)
    r2 = np.einsum('ij,jk->k',a,b)
    assert np.allclose(r1,r2)
    t1 = %timeit -o np.einsum('j,jk->k', np.einsum('ij->j', a), b)
    t2 = %timeit -o np.einsum('ij,jk->k',a,b)
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['np.einsum,np.einsum vs np.einsum'] = speedup

2.34 µs ± 91.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
2.28 µs ± 67.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
3.73 µs ± 100 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
42.4 µs ± 70.3 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
14 µs ± 607 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
4.93 ms ± 163 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
118 µs ± 459 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
626 ms ± 21.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.06 ms ± 122 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
1min 19s ± 2.65 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


pd.Series.isin vs pd.DataFrame.query

In [7]:
speedup = []
item_ids = [1, 2, 3, 4]
for d in datasize:
    df = pd.DataFrame({'item_id':np.random.choice(item_ids+ [5], p=(.125,.125,.125,.125,.5),size=d)})
    r1 = df[df['item_id'].isin(item_ids)]
    r2 = df.query('item_id in {}'.format(item_ids))
    assert r1.equals(r2)
    t1 = %timeit -o df[df['item_id'].isin(item_ids)]
    t2 = %timeit -o df.query('item_id in {}'.format(item_ids))
    time1 = np.array(t1.timings).mean()
    time2 = np.array(t2.timings).mean()
    speedup.append(time2/time1)
speedup_all['pd.Series.isin vs pd.DataFrame.query'] = speedup

431 µs ± 4.06 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.26 ms ± 31.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
433 µs ± 1.72 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.26 ms ± 14 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
602 µs ± 7.91 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.99 ms ± 59.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
2.08 ms ± 6.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
4.05 ms ± 83.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
25.7 ms ± 2.19 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
33.6 ms ± 1.8 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [9]:
speedup_all

{'np.einsum,np.einsum vs np.einsum': [0.9776514595024661,
  11.37164686452012,
  352.0672437538201,
  5302.173366774716,
  38483.85922196496],
 'pd.Series.isin vs pd.DataFrame.query': [2.9313006306337397,
  2.8980851664467093,
  3.2989869534804486,
  1.9431657615698124,
  1.3086919115791582]}

In [21]:
import pickle
with open('speedup-dell.pkl', 'rb') as f:
    speedup_old = pickle.load(f)

In [24]:
speedups = {**speedup_all,**speedup_old}

In [26]:
len(speedups)

68

In [28]:
with open('updated/speedup-new.pkl', 'wb') as f:
    pickle.dump(speedups, f)