In [2]:
#Given a two dimensional array, how to extract unique rows?

import numpy as np

Z = np.random.randint(0,2,(6,3))
T = np.ascontiguousarray(Z).view(np.dtype((np.void, Z.dtype.itemsize * Z.shape[1])))
_, idx = np.unique(T, return_index=True)
uZ = Z[idx]
print(uZ)

[[0 0 1]
 [0 1 1]
 [1 0 1]
 [1 1 1]]


In [3]:
Z = np.random.randint(0,5,(10,3))
E = np.logical_and.reduce(Z[:,1:] == Z[:,:-1], axis=1)
U = Z[~E]
print(Z)
print(U)

[[4 0 2]
 [0 0 0]
 [3 3 1]
 [3 0 1]
 [2 2 4]
 [3 4 2]
 [3 2 3]
 [1 1 4]
 [0 4 3]
 [2 0 4]]
[[4 0 2]
 [3 3 1]
 [3 0 1]
 [2 2 4]
 [3 4 2]
 [3 2 3]
 [1 1 4]
 [0 4 3]
 [2 0 4]]


In [4]:
A = np.random.randint(0,5,(8,3))
B = np.random.randint(0,5,(2,2))
C = (A[..., np.newaxis, np.newaxis] == B)
rows = (C.sum(axis=(1,2,3)) >= B.shape[1]).nonzero()[0]
print(rows)

[0 1 3 4 6 7]


In [8]:
#Given an arbitrary number of vectors, build the cartesian product (every combinations of every item)

def cartesian(arrays):
    arrays = [np.asarray(a) for a in arrays]
    shape = (len(x) for x in arrays)
    
    ix = np.indices(shape, dtype=int)
    ix = ix.reshape(len(arrays), -1).T
    
    for n, arr in enumerate(arrays):
        ix[:, n] = arrays[n][ix[:, n]]
    return ix

print (cartesian(([1, 2, 3], [4, 5], [6, 7])))

[[1 4 6]
 [1 4 7]
 [1 5 6]
 [1 5 7]
 [2 4 6]
 [2 4 7]
 [2 5 6]
 [2 5 7]
 [3 4 6]
 [3 4 7]
 [3 5 6]
 [3 5 7]]


In [9]:
#find the most frequent value in an array?

Z = np.random.randint(0,10,50)
print(np.bincount(Z).argmax())

3


In [10]:
# compute the rank of a matrix

Z = np.random.uniform(0,1,(10,10))
U, S, V = np.linalg.svd(Z) # Singular Value Decomposition
rank = np.sum(S > 1e-10)

rank

10

In [11]:
#Consider a one-dimensional array Z, build a two-dimensional array from it

from numpy.lib import stride_tricks

def rolling(a, window):
    shape = (a.size - window + 1, window)
    strides = (a.itemsize, a.itemsize)
    return stride_tricks.as_strided(a, shape=shape, strides=strides)

Z = rolling(np.arange(10), 3)
print(Z)

[[0 1 2]
 [1 2 3]
 [2 3 4]
 [3 4 5]
 [4 5 6]
 [5 6 7]
 [6 7 8]
 [7 8 9]]


In [12]:
#Extract all the contiguous 3x3 blocks from a random 10x10 matrix

Z = np.random.randint(0,5,(10,10))
n = 3
i = 1 + (Z.shape[0]-3)
j = 1 + (Z.shape[1]-3)
C = stride_tricks.as_strided(Z, shape=(i, j, n, n), strides=Z.strides + Z.strides)
print(C)

[[[[2 3 0]
   [4 3 4]
   [4 3 2]]

  [[3 0 2]
   [3 4 4]
   [3 2 0]]

  [[0 2 1]
   [4 4 2]
   [2 0 2]]

  [[2 1 1]
   [4 2 4]
   [0 2 4]]

  [[1 1 2]
   [2 4 1]
   [2 4 0]]

  [[1 2 4]
   [4 1 4]
   [4 0 1]]

  [[2 4 1]
   [1 4 4]
   [0 1 3]]

  [[4 1 2]
   [4 4 3]
   [1 3 3]]]


 [[[4 3 4]
   [4 3 2]
   [3 3 3]]

  [[3 4 4]
   [3 2 0]
   [3 3 0]]

  [[4 4 2]
   [2 0 2]
   [3 0 2]]

  [[4 2 4]
   [0 2 4]
   [0 2 2]]

  [[2 4 1]
   [2 4 0]
   [2 2 1]]

  [[4 1 4]
   [4 0 1]
   [2 1 1]]

  [[1 4 4]
   [0 1 3]
   [1 1 0]]

  [[4 4 3]
   [1 3 3]
   [1 0 1]]]


 [[[4 3 2]
   [3 3 3]
   [3 3 1]]

  [[3 2 0]
   [3 3 0]
   [3 1 1]]

  [[2 0 2]
   [3 0 2]
   [1 1 1]]

  [[0 2 4]
   [0 2 2]
   [1 1 2]]

  [[2 4 0]
   [2 2 1]
   [1 2 1]]

  [[4 0 1]
   [2 1 1]
   [2 1 1]]

  [[0 1 3]
   [1 1 0]
   [1 1 1]]

  [[1 3 3]
   [1 0 1]
   [1 1 0]]]


 [[[3 3 3]
   [3 3 1]
   [2 4 0]]

  [[3 3 0]
   [3 1 1]
   [4 0 3]]

  [[3 0 2]
   [1 1 1]
   [0 3 0]]

  [[0 2 2]
   [1 1 2]
   [3 0 1]]

  [[2 2 1]
   

In [13]:
#how to compute moving averages of 3 period and 5 period over an array

def moving_average(a, n=3) :
    ret = np.cumsum(a, dtype=float)
    ret[n:] = ret[n:] - ret[:-n]
    return ret[n - 1:] / n

Z = np.arange(20)
print(moving_average(Z, n=3))

[ 1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15. 16. 17. 18.]


In [14]:
def moving_average(a, n=5) :
    ret = np.cumsum(a, dtype=float)
    ret[n:] = ret[n:] - ret[:-n]
    return ret[n - 1:] / n

Z = np.arange(20)
print(moving_average(Z, n=5))

[ 2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15. 16. 17.]


In [15]:
#Consider an array of dimension (5,5,3), how to mulitply it by an array with dimensions (5,5)?

A = np.ones((5,5,3))
B = 2*np.ones((5,5))
print(A * B[:,:,None])

[[[2. 2. 2.]
  [2. 2. 2.]
  [2. 2. 2.]
  [2. 2. 2.]
  [2. 2. 2.]]

 [[2. 2. 2.]
  [2. 2. 2.]
  [2. 2. 2.]
  [2. 2. 2.]
  [2. 2. 2.]]

 [[2. 2. 2.]
  [2. 2. 2.]
  [2. 2. 2.]
  [2. 2. 2.]
  [2. 2. 2.]]

 [[2. 2. 2.]
  [2. 2. 2.]
  [2. 2. 2.]
  [2. 2. 2.]
  [2. 2. 2.]]

 [[2. 2. 2.]
  [2. 2. 2.]
  [2. 2. 2.]
  [2. 2. 2.]
  [2. 2. 2.]]]


In [16]:
#How to get the diagonal of a dot product?

import numpy as np
a = np.arange(15).reshape(5, 3)
b = np.arange(9).reshape(3, 3)

np.diag(np.dot(np.dot(a, b), a.T))

array([  60,  672, 1932, 3840, 6396])

In [17]:
np.einsum('ij,ji->i', np.dot(a, b), a.T)

array([  60,  672, 1932, 3840, 6396])

In [18]:
np.einsum('ij,ij->i', np.dot(a, b), a)

array([  60,  672, 1932, 3840, 6396])

In [19]:
#get the whole thing in a single shot

np.einsum('ij,jk,ki->i', a, b, a.T)

array([  60,  672, 1932, 3840, 6396])

In [20]:
np.einsum('ij,jk,ik->i', a, b, a)

array([  60,  672, 1932, 3840, 6396])

In [21]:
#make performance evaluation of all the above queries

n, p = 10000, 200
a = np.random.rand(n, p)
b = np.random.rand(p, p)

In [22]:
%timeit np.einsum('ij,jk,ki->i', a, b, a.T)

23.7 ms ± 1.45 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [23]:
%timeit np.einsum('ij,ij->i', np.dot(a, b), a)

19 ms ± 2.22 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [24]:
%timeit np.diag(np.dot(np.dot(a, b), a.T))

852 ms ± 121 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [25]:
%timeit (a.dot(b) * a).sum(-1)

29.3 ms ± 5.44 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
