# CuPy vs NumPy

CuPy paper: http://learningsys.org/nips17/assets/papers/paper_16.pdf

In [1]:
import cupy as cp
import numpy as np

In [2]:
m = cp.random.random((10**3, 10**3))
m_cpu = np.random.random((10**3, 10**3))

## matmul

In [3]:
%%timeit
cp.matmul(m, m.T)

12.9 ms ± 42.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [4]:
%%timeit
np.matmul(m_cpu, m_cpu.T)

8.38 ms ± 4.85 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## m + m

In [5]:
%%timeit
m + m

700 µs ± 1.57 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [6]:
%%timeit
m_cpu + m_cpu

775 µs ± 1.44 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


# m ** 2

In [7]:
%%timeit
m ** 2

1.42 ms ± 5.18 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [8]:
%%timeit
m_cpu ** 2

777 µs ± 4.64 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


## svd

In [9]:
%%timeit
cp.linalg.svd(m)

2.02 s ± 20 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [10]:
%%timeit
np.linalg.svd(m_cpu)

493 ms ± 5.64 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## eigh

In [11]:
%%timeit
cp.linalg.eigh(m)

506 ms ± 370 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [12]:
%%timeit
np.linalg.eigh(m_cpu)

322 ms ± 74.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
