## Python, numpy, scipy, numba, tensorflow, pytorch, Cython, C++, CUDA

In [78]:
import numpy as np
import numba
import tensorflow as tf
import torch
import scipy

In [69]:
n = 1000
x = np.random.randn(n,n)
y = np.random.randn(n,n)
b = np.random.rand(n,1)

In [93]:
%timeit scipy.dot(x,y)
%timeit x.dot(y)

26.4 ms ± 725 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
25.9 ms ± 262 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [87]:
%timeit cache1 = np.linalg.inv(x.T.dot(x)).dot(x.T).dot(b)
%timeit cache1 = scipy.linalg.inv(x.T.dot(x)).dot(x.T).dot(b)

237 ms ± 22.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
93.3 ms ± 2.77 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [92]:
%timeit cache2 = np.linalg.solve(x.T.dot(x), x.T.dot(b))
%timeit cache2 = scipy.linalg.solve(x.T.dot(x), x.T.dot(b))

165 ms ± 22.3 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
59.9 ms ± 1.39 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [83]:
def best():
    q,r = np.linalg.qr(x)
    np.linalg.solve(r,q.T.dot(b))
def best2():
    q,r = scipy.linalg.qr(x)
    scipy.linalg.solve(r,scipy.dot(q.T, b))

In [95]:
%timeit best()
%timeit best2()

462 ms ± 74.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
166 ms ± 11.7 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [74]:
cache1 = np.linalg.inv(x.T.dot(x)).dot(x.T).dot(b)
cache2 = np.linalg.solve(x.T.dot(x), x.T.dot(b))
q,r = np.linalg.qr(x)
cache3 = np.linalg.solve(r,q.T.dot(b))

In [77]:
print(cache1.mean())
print((cache1 - cache2).mean())
print((cache1 - cache3).mean())

-0.03960969222645883
-1.5492104994685452e-14
3.724526316702115e-13


In [102]:
def speed():
    %time np.linalg.inv(x)
    %time np.linalg.eig(x)
    %time np.dot(x,y.T)
    %time np.square(x)
speed()

Wall time: 256 ms
Wall time: 1.87 s
Wall time: 26 ms
Wall time: 4 ms


In [103]:
def speed():
    %time scipy.linalg.inv(x)
    %time scipy.linalg.eig(x)
    %time scipy.dot(x,y.T)
    %time scipy.square(x)
speed()

Wall time: 61 ms
Wall time: 1.2 s
Wall time: 28 ms
Wall time: 5 ms


In [104]:
@numba.autojit
def speed():
    %time np.linalg.inv(x)
    %time np.linalg.eig(x)
    %time np.dot(x,y.T)
    %time np.square(x)
speed()

Wall time: 240 ms
Wall time: 1.73 s
Wall time: 25 ms
Wall time: 3.96 ms


In [105]:
A = tf.random_normal([n, n])
with tf.Session() as sess:
    %time sess.run(tf.matrix_inverse(A))
    %time eigenvalues, eigenvectors = sess.run(tf.self_adjoint_eig(A))
    %time sess.run(tf.matmul( A, tf.transpose(A) ))
    %time sess.run(tf.multiply(A, A))

Wall time: 72 ms
Wall time: 1.39 s
Wall time: 724 ms
Wall time: 15 ms
