# Numpy
Collection of ***most useful*** numpy functions

Credits:  
- Alex Chabot-Leclerc, [Introduction to Numerical Computing with NumPy](https://www.youtube.com/watch?v=ZB7BZMhfPgk)
- Juan Nunuz-Iglesias [Advanced NumPy](https://www.youtube.com/watch?v=cYugp9IN1-Q), [Github](https://github.com/jni/scipy-japan-2019/blob/master/Advanced%20NumPy%20Patterns%20Solved.ipynb)

In [1]:
import numpy as np
print(f"numpy: {np.__version__}")

numpy: 1.19.4


<img style="float:center;" src="images/0.jpg" width="450"/> 

In [2]:
# list of lists to a numpy array

gene0 = [100, 200, 50, 400]
gene1 = [50, 0, 0, 100]
gene2 = [350, 100, 50, 200]
expression_data = [gene0, gene1, gene2]

a = np.array(expression_data)
print(a)

[[100 200  50 400]
 [ 50   0   0 100]
 [350 100  50 200]]


## Inside a numpy ndarray
<img style="float:center;" src="images/3.jpg" width="450"/>

In [3]:
def print_info(a):
    print('number of elements:', a.size)
    print('number of dimensions:', a.ndim)
    print('shape:', a.shape)
    print('data type:', a.dtype)
    print('strides:', a.strides)
    print('flags:')
    print(a.flags)

In [4]:
print(a)
print_info(a)
print(f'{a.data} points to the start of the array a in memory.')

[[100 200  50 400]
 [ 50   0   0 100]
 [350 100  50 200]]
number of elements: 12
number of dimensions: 2
shape: (3, 4)
data type: int64
strides: (32, 8)
flags:
  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

<memory at 0x7f1415667f30> points to the start of the array a in memory.


In [5]:
# viewing the elements of 'a' as 8-bit integers 
abytes = a.ravel().view(dtype=np.uint8)
print_info(abytes)
print('Notice that the stride is now 1-byte (8-bits)')

number of elements: 96
number of dimensions: 1
shape: (96,)
data type: uint8
strides: (1,)
flags:
  C_CONTIGUOUS : True
  F_CONTIGUOUS : True
  OWNDATA : False
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

Notice that the stride is now 1-byte (8-bits)


## Transpose
<img style="float:center;" src="images/5.jpg" width="450"/>


In [6]:
print(a.T)
print_info(a.T)
print('Notice that only the strides are changed, not the data')

[[100  50 350]
 [200   0 100]
 [ 50   0  50]
 [400 100 200]]
number of elements: 12
number of dimensions: 2
shape: (4, 3)
data type: int64
strides: (8, 32)
flags:
  C_CONTIGUOUS : False
  F_CONTIGUOUS : True
  OWNDATA : False
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

Notice that only the strides are changed, not the data


# Slicing

In [7]:
print(a.T[::2])
print_info(a.T[::2])
print('Noice that this array in neither C contiguous nor F \
contiguous as none of the elements are one after the other in the \
memory.')

[[100  50 350]
 [ 50   0  50]]
number of elements: 6
number of dimensions: 2
shape: (2, 3)
data type: int64
strides: (16, 32)
flags:
  C_CONTIGUOUS : False
  F_CONTIGUOUS : False
  OWNDATA : False
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

Noice that this array in neither C contiguous nor F contiguous as none of the elements are one after the other in the memory.


In [8]:
rand = np.random.random((10,3))
print(rand)

[[0.882843   0.09848295 0.40051645]
 [0.24743364 0.06116268 0.42434857]
 [0.63231331 0.77365365 0.94606827]
 [0.59838049 0.04032509 0.47796537]
 [0.70488121 0.89455829 0.44926176]
 [0.05708898 0.42519441 0.65380518]
 [0.44326825 0.17200054 0.55931795]
 [0.06258617 0.78469937 0.45935938]
 [0.80573481 0.78530965 0.31194624]
 [0.78086415 0.72311852 0.46756143]]


In [9]:
print(np.argmin(np.abs(rand-0.75), axis=1))

[0 2 1 0 0 2 2 1 1 1]


## Broadcasting
<img style="float:center;" src="images/1.jpg" width="450"/>

In [10]:
a + 5  # simplest "broadcasting": scalar - array operations

array([[105, 205,  55, 405],
       [ 55,   5,   5, 105],
       [355, 105,  55, 205]])

In [11]:
b = np.array([1, 2, 3, 4])
a + b  # broadcasting: coerce arrays to same shape by 
       # repeating as needed

array([[101, 202,  53, 404],
       [ 51,   2,   3, 104],
       [351, 102,  53, 204]])

In [12]:
b = np.array([[1], [2], [3]])
a + b  # broadcasting: shape compatibility

array([[101, 201,  51, 401],
       [ 52,   2,   2, 102],
       [353, 103,  53, 203]])

In [13]:
b + b.T # broadcasting: shape compatibility

array([[2, 3, 4],
       [3, 4, 5],
       [4, 5, 6]])

## 3D Broadcasting

In [14]:
# producing an array containing the sum of every possible 
# combination of an element in x with every element in y
x = np.random.random(size=(3, 5))
y = np.random.randint(10, size=4)
z = x + y[:, np.newaxis, np.newaxis] # adding new dimensions to y
#z = x[..., np.newaxis] + y          # adding new dimension to x
#z = x[:.:, np.newaxis] + y          # adding new dimension to x
#z = x[:,:, np.newaxis] + y[np.newaxis, np.newaxis, :] # adding new dimension to x, y
print_info(z)
print(x)
print(y)
print(z)

number of elements: 60
number of dimensions: 3
shape: (4, 3, 5)
data type: float64
strides: (120, 40, 8)
flags:
  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

[[0.3617088  0.10276689 0.91906322 0.09638772 0.37063978]
 [0.07021473 0.07910641 0.33154519 0.54005447 0.0661195 ]
 [0.91157382 0.88205116 0.46035239 0.73403075 0.21860604]]
[7 4 9 1]
[[[7.3617088  7.10276689 7.91906322 7.09638772 7.37063978]
  [7.07021473 7.07910641 7.33154519 7.54005447 7.0661195 ]
  [7.91157382 7.88205116 7.46035239 7.73403075 7.21860604]]

 [[4.3617088  4.10276689 4.91906322 4.09638772 4.37063978]
  [4.07021473 4.07910641 4.33154519 4.54005447 4.0661195 ]
  [4.91157382 4.88205116 4.46035239 4.73403075 4.21860604]]

 [[9.3617088  9.10276689 9.91906322 9.09638772 9.37063978]
  [9.07021473 9.07910641 9.33154519 9.54005447 9.0661195 ]
  [9.91157382 9.88205116 9.46035239 9.73403075 9.21860604]]

 [[1.3617088  1.10

In [15]:
# Use np.broadcast_arrays to get the same-shape arrays that numpy 
# adds together. Then use print_info on the output. Notice 
# anything weird? Check the strides of xb, yb
xb, yb = np.broadcast_arrays(x, y[:, np.newaxis, np.newaxis])
print_info(xb)
print_info(yb)

print('This is what numpy does in the backgound. \
The shape of xb, yb is different from x,y, respectively. \
However, their size is still the same. This makes broadcasting \
memory efficient. \nWARNING - if you make a copy of xb, yb then \
the full array size will be copied. See below - \n')

xCopy = xb.copy()
print_info(xCopy)

number of elements: 60
number of dimensions: 3
shape: (4, 3, 5)
data type: float64
strides: (0, 40, 8)
flags:
  C_CONTIGUOUS : False
  F_CONTIGUOUS : False
  OWNDATA : False
  WRITEABLE : True  (with WARN_ON_WRITE=True)
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

number of elements: 60
number of dimensions: 3
shape: (4, 3, 5)
data type: int64
strides: (8, 0, 0)
flags:
  C_CONTIGUOUS : False
  F_CONTIGUOUS : False
  OWNDATA : False
  WRITEABLE : True  (with WARN_ON_WRITE=True)
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

This is what numpy does in the backgound. The shape of xb, yb is different from x,y, respectively. However, their size is still the same. This makes broadcasting memory efficient. 

number of elements: 60
number of dimensions: 3
shape: (4, 3, 5)
data type: float64
strides: (120, 40, 8)
flags:
  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFC

## arange( )
<img style="float:center;" src="images/4.jpg" width="450"/>

In [16]:
# Create array using arange (not arrange) and reshape

a = np.arange(16).reshape(4,4)
print(a)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]


In [17]:
b = a[1:3, 1:3]
print(b)
print('b is a new array object (slice of a) but its NOT a new data')

[[ 5  6]
 [ 9 10]]
b is a new array object (slice of a) but its NOT a new data


In [18]:
b += 1000  # this modifies a as well
print(b)
print(a)

print('Be careful with such mistakes')

[[1005 1006]
 [1009 1010]]
[[   0    1    2    3]
 [   4 1005 1006    7]
 [   8 1009 1010   11]
 [  12   13   14   15]]
Be careful with such mistakes


## reshape( ), flatten( ) & ravel( )
<img style="float:center;" src="images/6.jpg" width="450"/>
<img style="float:center;" src="images/7.jpg" width="450"/>

In [19]:
a = np.arange(16).reshape(4,4)
b = a[2:4, 2:4] + 0 # trick (new data = slice of a + 0)
b += 1000
print(b)
print(a)

[[1010 1011]
 [1014 1015]]
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]


In [20]:
a = np.arange(16).reshape(4,4)
b = np.array(a[2:4, 2:4]) # best and safest
#b = a[2:4, 2:4].copy() # best and safest
b += 1000
print(b)
print(a)

[[1010 1011]
 [1014 1015]]
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]


# min( ), max( ), argmin( ), argmax( ), where( )

In [21]:
a = np.random.rand(5,5)
print(a)

[[0.62904088 0.64832424 0.25888202 0.36591531 0.03034993]
 [0.63888479 0.2105138  0.8183149  0.08598749 0.40614235]
 [0.19711333 0.23033457 0.61095945 0.74834207 0.42771275]
 [0.2104402  0.68964124 0.01960483 0.48614087 0.71547411]
 [0.40441977 0.03592338 0.80879568 0.5321812  0.14803992]]


In [22]:
# min / max (values) and argmin / argmax (indices)
print(a.min(axis=1))
print(a.argmin(axis=1))
print(a.max(axis=1))
print(a.argmax(axis=1))

[0.03034993 0.08598749 0.19711333 0.01960483 0.03592338]
[4 3 0 2 1]
[0.64832424 0.8183149  0.74834207 0.71547411 0.80879568]
[1 2 3 4 2]


In [23]:
# mask and location
mask = a>0.5
location = np.where(mask)
print(mask)
print(np.array(location)) # returns the coords where the mask is True

# or simply do
print(np.array(np.where(a>0.5)))

[[ True  True False False False]
 [ True False  True False False]
 [False False  True  True False]
 [False  True False False  True]
 [False False  True  True False]]
[[0 0 1 1 2 2 3 3 4 4]
 [0 1 0 2 2 3 1 4 2 3]]
[[0 0 1 1 2 2 3 3 4 4]
 [0 1 0 2 2 3 1 4 2 3]]
