## Numpy Tutorial

### import numpy

In [4]:
import numpy as np

### check python and numpy version

In [8]:
import platform
print('Python version: ' + platform.python_version())
print('Numpy version: ' + np.__version__)

Python version: 3.12.0
Numpy version: 1.26.4


# 0. Numpy Data Types

### A list of Numpy Data Types

In [9]:
import pandas as pd
dtypes = pd.DataFrame(
    {
        'Type': ['int8', 'uint8', 'int16', 'uint16', 'int32', 'uint32', 'int64', 'uint64', 'float16', 'float32', 'float64', 'float128', 'complex64', 'complex128', 'object', 'string_', 'unicode_'],
        'Type Code': ['i1', 'u1', 'i2', 'u2', 'i4', 'u4', 'i8', 'u8', 'f2', 'f4 or f', 'f8 or d', 'f16 or g', 'c8', 'c16',  'O', 'S', 'U']
    }
)

dtypes

Unnamed: 0,Type,Type Code
0,int8,i1
1,uint8,u1
2,int16,i2
3,uint16,u2
4,int32,i4
5,uint32,u4
6,int64,i8
7,uint64,u8
8,float16,f2
9,float32,f4 or f


In [7]:
# create an array with a specified data type
arr = np.array([1,2,3], dtype='f4')
print(arr)
print(arr.dtype)

arr = np.array([1+2j, 3-4j], dtype=np.complex64)
print(arr)
print(arr.dtype)


[1. 2. 3.]
float32
[1.+2.j 3.-4.j]
complex64


### string data type

In [6]:
# set the max length of the string using S + some number, such as 'S3'
# any string longer than the max length will be truncated
s = np.array(['abdc', 'defg'], dtype='S3')
print(s)
print(s.dtype)

[b'abd' b'def']
|S3


In [9]:
# numpy string and unicode data types are fixed-length
# string_ and unicode_ will treat the longest string in the array as the default length when creating an array
arr = np.array(['a', 'ab', 'abc'], dtype=np.string_)
print(arr.dtype)

arr = np.array(['a', 'ab', 'abc'], dtype=np.unicode_)
print(arr.dtype)

|S3
<U3


In [10]:
# what does "|" and "<" mean above?
# they are the byte order indicators, which is beyond the scope of this tutorial
# you can check it out here: https://docs.scipy.org/doc/numpy/reference/generated/numpy.dtype.byteorder.html
# and here: https://en.wikipedia.org/wiki/Endianness

# 1. Create Arrays

### create an array from a Python array

In [7]:
arr = np.array(range(14))
print(arr)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13]


In [12]:
arr = np.array([1,2,3,4,5])
print(arr)

[1 2 3 4 5]


### create an array in a specified data type

In [14]:
arr = np.array([[1,2,3], [4,5,6]], dtype='i2')
print(arr)

[[1 2 3]
 [4 5 6]]


### create an aray of evenly spaced values within a specified interval

In [15]:
# np.arange(start, stop, step)
arr = np.arange(0, 20, 2)  
print(arr)

[ 0  2  4  6  8 10 12 14 16 18]


### create an array of evenly spaced numbers in a specified interval

In [16]:
# np.linspace(start, stop, num_of_elements, endpoint=True, retstep=False) 
arr = np.linspace(0, 10, 20)
print(arr)

[ 0.          0.52631579  1.05263158  1.57894737  2.10526316  2.63157895
  3.15789474  3.68421053  4.21052632  4.73684211  5.26315789  5.78947368
  6.31578947  6.84210526  7.36842105  7.89473684  8.42105263  8.94736842
  9.47368421 10.        ]


In [17]:
# exclude endpoint and return setp size
arr, step = np.linspace(0, 10, 20, endpoint=False, retstep=True)
print(arr)
print(step)

[0.  0.5 1.  1.5 2.  2.5 3.  3.5 4.  4.5 5.  5.5 6.  6.5 7.  7.5 8.  8.5
 9.  9.5]
0.5


### create an array of random values in a given shape

In [18]:
arr = np.random.rand(3, 3)
print(arr)

[[0.22666514 0.91657451 0.0295647 ]
 [0.50620249 0.10354957 0.06787082]
 [0.5937202  0.53630206 0.13230955]]


### create an array of zeros in a given shape 

In [19]:
zeros = np.zeros((2,3), dtype='i4')
print(zeros)

[[0 0 0]
 [0 0 0]]


### create an array of zeros with the same shape and data type as a given array

In [20]:
zeros = np.zeros_like(arr)
print(zeros)

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


### create an array of ones in a given shape 

In [21]:
ones = np.ones((2,3))
print(ones)

[[1. 1. 1.]
 [1. 1. 1.]]


### create an array of ones with the same shape and data type as a given array

In [22]:
ones = np.ones_like(arr)
print(ones)

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]


### create an array of arbitrary values in a given shape 

In [23]:
empty = np.empty((2,3))
print(empty)

[[1. 1. 1.]
 [1. 1. 1.]]


### create an array of arbitrary values with the same shape and data type as a given array

In [24]:
empty = np.empty_like(arr)
print(empty)

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]


### create an array of constant values in a given shape  

In [25]:
p = np.full((2,3), 5)
print(p)

[[5 5 5]
 [5 5 5]]


### create an array of constant values with the same shape and data type as a given array

In [26]:
p = np.full_like(arr, 5)
print(p)

[[5. 5. 5.]
 [5. 5. 5.]
 [5. 5. 5.]]


### create an array by repetition

In [27]:
# repeat each element of an array by a specified number of times
# np.repeat(iterable, reps, axis=None)
arr = [0, 1, 2]
print(np.repeat(arr, 3))    # or np.repeat(range(3), 3)

[0 0 0 1 1 1 2 2 2]


In [28]:
# repeat along a specified axis with specified number of repetitions
arr = [[1,2], [3,4]]
print(np.repeat(arr, [1,2], axis=0))

[[1 2]
 [3 4]
 [3 4]]


In [29]:
# repeat an array by a specified number of times
arr = [0, 1, 2]
print(np.tile(arr, 3))

[0 1 2 0 1 2 0 1 2]


In [30]:
# repeat along specified axes
print(np.tile(arr, (2,2)))

[[0 1 2 0 1 2]
 [0 1 2 0 1 2]]


### create an identity matrix with a given diagonal size

In [31]:
identity_matrix = np.eye(3)
print(identity_matrix)

[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


In [32]:
identity_matrix = np.identity(3)
print(identity_matrix)

[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


### create an identity matrix with a diagonal offset

In [34]:
identity_matrix = np.eye(5, k=1)    # positive number shifts the diagonal upward
print(identity_matrix)

[[0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0.]]


In [35]:
identity_matrix = np.eye(5, k=-2)   # negative number shifts the diagonal downward
print(identity_matrix)

[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]]


### extract the diagonal array / create a diagonal array

In [36]:
arr = np.random.rand(5,5)
print(arr)

[[0.14855836 0.57407843 0.48281499 0.66635677 0.24130316]
 [0.34955254 0.18619746 0.81725646 0.00317054 0.25341224]
 [0.57742798 0.6050741  0.70360017 0.95080231 0.27988104]
 [0.94188332 0.82201767 0.94569825 0.74316084 0.29508367]
 [0.48667667 0.90520946 0.80429278 0.38307499 0.18923289]]


In [37]:
# extract the diagonal
print(np.diag(arr))

[0.14855836 0.18619746 0.70360017 0.74316084 0.18923289]


In [38]:
# create a matrix with a specified diagonal array
arr = np.diag([1,2,3,4,5])
print(arr)

[[1 0 0 0 0]
 [0 2 0 0 0]
 [0 0 3 0 0]
 [0 0 0 4 0]
 [0 0 0 0 5]]


# 2. Inspect Arrays

In [39]:
arr = np.array([[1,2,3], [4,5,6]], dtype=np.int64)

### inspect general information of an array

In [40]:
print(np.info(arr))

class:  ndarray
shape:  (2, 3)
strides:  (24, 8)
itemsize:  8
aligned:  True
contiguous:  True
fortran:  False
data pointer: 0x19ea235c170
byteorder:  little
byteswap:  False
type: int64
None


### inspect the data type of an array

In [41]:
print(arr.dtype)

int64


### inspect the dimension of an array

In [42]:
print(arr.shape)

(2, 3)


### inspect length of an array

In [43]:
print(len(arr))

2


### inspect the number of dimensions of an array

In [44]:
print(arr.ndim)

2


### inspect the number of elements in an array

In [45]:
print(arr.size)

6


### inspect the number of bytes of each element in an array

In [46]:
print(arr.itemsize)

8


### inspect the memory size of an array (in byte)

In [47]:
# arr.nbytes = arr.size * arr.itemsize
print(arr.nbytes)

48


# 3. Sampling Methods

### set seed

In [48]:
np.random.seed(123)

### set random state which is independent from the global seed

In [49]:
rs = np.random.RandomState(321)
rs.rand(10)

array([0.88594794, 0.07791236, 0.97964616, 0.24767146, 0.75288472,
       0.52667564, 0.90755375, 0.8840703 , 0.08926896, 0.5173446 ])

### generate a random sample from interval [0, 1) in a given shape

In [50]:
# generate a random scalar
print(np.random.rand())      

0.6964691855978616


In [51]:
# generate a 1-D array
print(np.random.rand(3))           

[0.28613933 0.22685145 0.55131477]


In [52]:
# generate a 2-D array
print(np.random.rand(3,3))          

[[0.71946897 0.42310646 0.9807642 ]
 [0.68482974 0.4809319  0.39211752]
 [0.34317802 0.72904971 0.43857224]]


### generate a sample from the standard normal distribution (mean = 0, var = 1)

In [53]:
print(np.random.randn(3,3))

[[-0.14337247 -0.6191909  -0.76943347]
 [ 0.57674602  0.12652592 -1.30148897]
 [ 2.20742744  0.52274247  0.46564476]]


### generate an array of random integers in a given interval [low, high)

In [54]:
# np.ranodm.randint(low, high, size, dtype)
print(np.random.randint(1, 10, 3, 'i8'))

[5 7 2]


### generate an array of random floating-point numbers in the interval [0.0, 1.0)

In [50]:
# the following methods are the same as np.random.rand()
print(np.random.random_sample(10))
print(np.random.random(10))
print(np.random.ranf(10))
print(np.random.sample(10))

[0.65472131 0.37380143 0.23451288 0.98799529 0.76599595 0.77700444
 0.02798196 0.17390652 0.15408224 0.07708648]
[0.8898657  0.7503787  0.69340324 0.51176338 0.46426806 0.56843069
 0.30254945 0.49730879 0.68326291 0.91669867]
[0.10892895 0.49549179 0.23283593 0.43686066 0.75154299 0.48089213
 0.79772841 0.28270293 0.43341824 0.00975735]
[0.34079598 0.68927201 0.86936929 0.26780382 0.45674792 0.26828131
 0.8370528  0.27051466 0.53006201 0.17537266]


In [51]:
# np.random.choice(iterable_or_int, size, replace=True, p=weights)
print(np.random.choice(range(3), 10, replace=True, p=[0.1, 0.8, 0.1]))

[1 1 1 1 1 1 1 2 2 1]


In [52]:
print(np.random.choice(3, 10))

[1 0 1 2 2 0 1 1 1 0]


In [53]:
print(np.random.choice([1,2,3], 10))

[2 2 1 3 2 3 1 2 1 3]


### shuffle an array in place

In [55]:
arr = np.array(range(10))
print(arr)

[0 1 2 3 4 5 6 7 8 9]


In [56]:
np.random.shuffle(arr)
print(arr)

[9 4 7 0 1 2 3 8 6 5]


### generate a permutation of an array

In [57]:
# similar to np.random.shuffle(), but it returns a copy rather than making changes in place
arr = np.array(range(10))
print('The initial array: ', arr)
print('A permutation of the array: ', np.random.permutation(arr))

The initial array:  [0 1 2 3 4 5 6 7 8 9]
A permutation of the array:  [7 8 1 4 9 6 2 3 0 5]


# 4. Math Functions

In [58]:
arr = np.random.rand(5,5)

### element-wise addition, subtraction, multiplication and division

In [59]:
print(arr + 10)
print(arr - 10)
print(arr * 10)
print(arr / 10)

[[10.31226122 10.42635131 10.89338916 10.94416002 10.50183668]
 [10.62395295 10.1156184  10.31728548 10.41482621 10.86630916]
 [10.25045537 10.48303426 10.98555979 10.51948512 10.61289453]
 [10.12062867 10.8263408  10.60306013 10.54506801 10.34276383]
 [10.30412079 10.41702221 10.68130077 10.87545684 10.51042234]]
[[-9.68773878 -9.57364869 -9.10661084 -9.05583998 -9.49816332]
 [-9.37604705 -9.8843816  -9.68271452 -9.58517379 -9.13369084]
 [-9.74954463 -9.51696574 -9.01444021 -9.48051488 -9.38710547]
 [-9.87937133 -9.1736592  -9.39693987 -9.45493199 -9.65723617]
 [-9.69587921 -9.58297779 -9.31869923 -9.12454316 -9.48957766]]
[[3.12261223 4.26351307 8.93389163 9.44160018 5.01836676]
 [6.23952952 1.15618395 3.17285482 4.14826212 8.66309158]
 [2.50455365 4.83034264 9.85559786 5.19485119 6.12894526]
 [1.20628666 8.26340801 6.03060128 5.45068006 3.42763834]
 [3.04120789 4.17022211 6.81300766 8.75456842 5.10422337]]
[[0.03122612 0.04263513 0.08933892 0.094416   0.05018367]
 [0.0623953  0.0115

In [61]:
arr1 = np.array([1,2,3])
# the above operations can be performed using numpy built-in functions
# which can save memory as the output can be stored in the original array rather than assigning new memoryarr = np.array([1,2,3])
np.add(arr1, [8,9,10], out=arr1)
print(arr1)

np.subtract(arr1, [8,9,10], out=arr1)
print(arr1)

np.multiply(arr1, [1,2,3], out=arr1)
print(arr1)

[ 9 11 13]
[1 2 3]
[1 4 9]


### element-wise exponentiation

In [62]:
print(np.exp(arr))

[[1.36651161 1.53165876 2.4433967  2.57065317 1.65175222]
 [1.86629084 1.12256741 1.3733946  1.51410758 2.37811738]
 [1.28461025 1.62098545 2.6793113  1.68116183 1.84576629]
 [1.12820589 2.28494236 1.82770326 1.72472567 1.408836  ]
 [1.35543277 1.51743622 1.97644696 2.39997145 1.66599466]]


### element-wise logorithm

In [63]:
# natural log
print(np.log(arr))      

[[-1.16391519 -0.85249161 -0.112733   -0.05745962 -0.68948056]
 [-0.47168031 -2.15746021 -1.14795334 -0.87989561 -0.14351344]
 [-1.38447456 -0.72766769 -0.01454549 -0.65491711 -0.48956242]
 [-2.11503833 -0.190748   -0.50573837 -0.60684471 -1.0707136 ]
 [-1.19033032 -0.87461579 -0.38375142 -0.13300942 -0.67251678]]


In [64]:
# base 2
print(np.log2(arr))     

[[-1.67917467 -1.22988542 -0.16263934 -0.0828967  -0.99471018]
 [-0.68049085 -3.11255714 -1.65614659 -1.26942104 -0.20704613]
 [-1.99737458 -1.04980256 -0.0209847  -0.94484567 -0.70628928]
 [-3.05135531 -0.27519119 -0.72962624 -0.87549185 -1.5447132 ]
 [-1.71728366 -1.26180387 -0.55363627 -0.19189204 -0.97023663]]


In [65]:
# base 10
print(np.log10(arr))    

[[-0.50548194 -0.3702324  -0.04895932 -0.02495439 -0.2994376 ]
 [-0.20484816 -0.93697306 -0.4985498  -0.38213381 -0.06232709]
 [-0.60126966 -0.31602206 -0.00631703 -0.28442689 -0.21261426]
 [-0.91854947 -0.0828408  -0.21963938 -0.26354931 -0.46500501]
 [-0.51695389 -0.37984081 -0.16666112 -0.05776526 -0.29207033]]


### element-wise square root

In [66]:
print(np.sqrt(arr))

[[0.55880338 0.65295582 0.94519266 0.97167897 0.70840432]
 [0.78990693 0.34002705 0.563281   0.64407004 0.9307573 ]
 [0.50045516 0.69500666 0.99275364 0.72075316 0.7828758 ]
 [0.34731638 0.90903289 0.77656946 0.73828721 0.58546036]
 [0.55147148 0.64577257 0.82540945 0.93565851 0.71443848]]


### element-wise sine and cosine

In [67]:
print(np.sin(arr))

[[0.30721129 0.4135515  0.77920046 0.81000463 0.48103656]
 [0.58424783 0.11536098 0.31198869 0.40303089 0.76194378]
 [0.24784514 0.46446842 0.83358144 0.49643325 0.57523755]
 [0.12033633 0.73545693 0.56716546 0.51847624 0.33609143]
 [0.29945443 0.40503966 0.62980393 0.76783626 0.4885458 ]]


In [68]:
print(np.cos(arr))

[[0.95164133 0.91048073 0.62677479 0.58642347 0.87670053]
 [0.8115753  0.99332364 0.95008582 0.91518638 0.64764317]
 [0.96879966 0.88558968 0.55239657 0.8680749  0.81798641]
 [0.99273318 0.67757147 0.82360387 0.85509203 0.94182936]
 [0.9541106  0.91429911 0.77675415 0.64064615 0.87253825]]


### sum along a specified axis

In [69]:
# sum along the row
print(np.sum(arr, axis=0))    

[1.611419   2.26836698 3.48059532 3.2989962  2.83422653]


In [70]:
# sum along the column
print(np.sum(arr, axis=1))    

[3.07799839 2.3379922  2.85142906 2.43786144 2.78832295]


### compute the min and max along a specified axis

In [71]:
# calculate min along the row
print(np.min(arr, axis=0))

[0.12062867 0.1156184  0.31728548 0.41482621 0.34276383]


In [72]:
# calculate max along the column
print(np.max(arr, axis=1))    

[0.94416002 0.86630916 0.98555979 0.8263408  0.87545684]


In [73]:
# if axis not specified, calculate the max/min value of all elements
print(np.max(arr))
print(np.min(arr))

0.985559785610705
0.11561839507929572


### compute the indices of the min and max along a specified axis

In [74]:
# along the row
print(np.argmin(arr, axis=0))
print(np.argmax(arr, axis=0))

[3 1 1 1 3]
[1 3 2 0 1]


In [75]:
# along the column
print(np.argmin(arr, axis=1))
print(np.argmax(arr, axis=1))

[0 1 0 0 0]
[3 4 2 1 3]


In [76]:
# if axis not specified, return the index of the flattened array
print(np.argmin(arr))
print(np.argmax(arr))

6
12


### compute element-wise min and max of two arrays

In [77]:
arr1 = np.array([1, 3, 5, 7, 9])
arr2 = np.array([0, 4, 3, 8, 7])
print(np.maximum(arr1, arr2))
print(np.minimum(arr1, arr2))

[1 4 5 8 9]
[0 3 3 7 7]


### split fractional and integral parts of a floating-point array

In [78]:
arr1 = np.random.rand(10) * 10
re, intg = np.modf(arr1)
print('fractional: ', re)
print('integral: ', intg)

fractional:  [0.69313783 0.85936553 0.24903502 0.74689051 0.42342438 0.83194988
 0.63682841 0.43666375 0.94222961 0.72456957]
integral:  [6. 5. 6. 6. 8. 0. 7. 2. 1. 5.]


### compute the mean

In [79]:
# compute the overall mean
print(np.mean(arr))

0.5397441610491508


In [80]:
# compute the mean along the row
print(np.mean(arr, axis=0))   

[0.3222838  0.4536734  0.69611906 0.65979924 0.56684531]


In [81]:
# compute the mean along the column
print(np.mean(arr, axis=1)) 

[0.61559968 0.46759844 0.57028581 0.48757229 0.55766459]


### compute the median

In [82]:
# compute the overall median
print(np.median(arr))

0.5104223374780111


In [83]:
# compute the median along the row
print(np.median(arr, axis=0)) 

[0.30412079 0.42635131 0.68130077 0.54506801 0.51042234]


In [84]:
# compute the median along the column
print(np.median(arr, axis=1))

[0.50183668 0.41482621 0.51948512 0.54506801 0.51042234]


### compute the percentile

In [85]:
arr1 = np.random.rand(100)
# compute 5, 65, and 95 percentiles of the array
print(np.percentile(arr1, [5, 65, 95]))

[0.08462724 0.6650086  0.92757696]


### compute the standard deviation & variance

In [86]:
# compute the overall standard deviation
print(np.std(arr))

0.24573174363500688


In [87]:
# compute the standard deviation along the row
print(np.std(arr, axis=0))

[0.16568569 0.22632457 0.23455739 0.20987251 0.17287065]


In [88]:
# compute the standard deviation along the column
print(np.std(arr, axis=1))

[0.25529986 0.25794799 0.23947987 0.23959316 0.20128308]


In [89]:
# compute the overall variance
print(np.var(arr))

0.06038408982990074


In [90]:
# compute the variance along the row
print(np.var(arr, axis=0))

[0.02745175 0.05122281 0.05501717 0.04404647 0.02988426]


In [91]:
# compute the variance along the column
print(np.var(arr, axis=1))

[0.06517802 0.06653716 0.05735061 0.05740488 0.04051488]


### compute the covariance & correlation

In [92]:
arr = np.random.rand(5,8)

In [93]:
print(np.cov(arr))

[[ 0.15399086 -0.06206547 -0.02490679 -0.00510489 -0.02522687]
 [-0.06206547  0.0627736   0.03628049 -0.05191315 -0.00205034]
 [-0.02490679  0.03628049  0.08787979 -0.01258509  0.01922105]
 [-0.00510489 -0.05191315 -0.01258509  0.08899999  0.01573883]
 [-0.02522687 -0.00205034  0.01922105  0.01573883  0.10611966]]


In [94]:
print(np.corrcoef(arr[:,0], arr[:,1]))

[[ 1.         -0.18964295]
 [-0.18964295  1.        ]]


### compute cumulative sum & product

In [95]:
# calculate the cumulative sums along the row
print(np.cumsum(arr, axis=0))    

[[0.75164399 0.07914896 0.85938908 0.82150411 0.90987166 0.1286312
  0.08178009 0.13841557]
 [1.1510227  0.50345582 1.42160745 0.94374766 1.11127116 0.94027555
  0.54976766 0.94635378]
 [1.15844908 1.05504855 2.3535396  1.52592312 1.31736689 1.65803311
  0.92875351 1.61473773]
 [1.1877688  1.69094891 2.38573754 2.27070378 1.79027989 1.77978746
  1.47138944 1.68151217]
 [1.84113367 2.68703523 3.15513487 2.84447789 1.89291515 2.47962154
  2.1325573  1.7306093 ]]


In [96]:
# calculate the cumulative sums along the column
print(np.cumsum(arr, axis=1))    

[[0.75164399 0.83079295 1.69018203 2.51168614 3.4215578  3.550189
  3.63196908 3.77038466]
 [0.39937871 0.82368557 1.38590395 1.5081475  1.709547   2.52119135
  2.98917892 3.79711713]
 [0.00742638 0.5590191  1.49095125 2.07312671 2.27922244 2.99698
  3.37596585 4.0443498 ]
 [0.02931972 0.66522008 0.69741802 1.44219867 1.91511167 2.03686603
  2.57950196 2.6462764 ]
 [0.65336487 1.6494512  2.41884854 2.99262265 3.09525791 3.79509198
  4.45625985 4.50535698]]


In [97]:
# calculate the cumulative product along the row
print(np.cumprod(arr, axis=0))   

[[7.51643989e-01 7.91489607e-02 8.59389076e-01 8.21504113e-01
  9.09871660e-01 1.28631198e-01 8.17800871e-02 1.38415573e-01]
 [3.00190607e-01 3.35834471e-02 4.83164333e-01 1.00423579e-01
  1.83247699e-01 1.04402784e-01 3.82720646e-02 1.11831230e-01]
 [2.22932908e-03 1.85243851e-02 4.50276375e-01 5.84641431e-02
  3.77665677e-02 7.49358881e-02 1.45045709e-02 7.47461990e-02]
 [6.53633109e-05 1.17796632e-02 1.44979694e-02 4.35429628e-02
  1.78603009e-02 9.12377075e-03 7.87070126e-03 4.99113582e-03]
 [4.27060912e-05 1.17335614e-02 1.11546991e-02 2.49838249e-02
  1.83309662e-03 6.38512566e-03 5.20385477e-03 2.45050447e-04]]


In [98]:
# calculate the cumulative product along the column
print(np.cumprod(arr, axis=1))  

[[7.51643989e-01 5.94918406e-02 5.11266379e-02 4.20007433e-02
  3.82152860e-02 4.91567800e-03 4.02004575e-04 5.56436935e-05]
 [3.99378710e-01 1.69459127e-01 9.52730356e-02 1.16465141e-02
  2.34560212e-03 1.90379471e-03 8.90952266e-04 7.19834379e-04]
 [7.42637854e-03 4.09633639e-03 3.81750757e-03 2.22245922e-03
  4.58039350e-04 3.28761207e-04 1.24595845e-04 8.32778630e-05]
 [2.93197229e-02 1.86444223e-02 6.00311897e-04 4.47100688e-04
  2.11439729e-04 2.57437079e-05 1.39694608e-05 9.32802964e-07]
 [6.53364871e-01 6.50807815e-01 5.00729800e-01 2.87305797e-01
  2.94877050e-02 2.06365007e-02 1.36441912e-02 6.69890636e-04]]


### element-wise comparison

In [99]:
arr1 = np.array([1,2,3,4,5])
arr2 = np.array([5,4,3,2,1])

In [100]:
# return an array of bools
print(arr1 == arr2)    
print(arr1 < 3)

[False False  True False False]
[ True  True False False False]


# 5. Slicing & Indexing

In [101]:
arr = np.array(range(100)).reshape((10,10))

### select an element by row and column indices

In [102]:
print(arr[5][5])
# or more concisely
print(arr[5,5])

55
55


### indexing with slicing

In [103]:
print(arr[1:3, 4:6])

[[14 15]
 [24 25]]


In [104]:
# ellipsis slicing: auto-complete the dimensions
arr = np.array(range(16)).reshape(2,2,2,2)
# equivalent to arr[0,:,:,:]
print(arr[0, ...])    

[[[0 1]
  [2 3]]

 [[4 5]
  [6 7]]]


### assign a scalar to a slice by broadcasting

In [105]:
arr[1:3,:] = 100    # or simply arr[1:3]
arr[:,8:] = 100
print(arr)

[[[[  0   1]
   [  2   3]]

  [[  4   5]
   [  6   7]]]


 [[[100 100]
   [100 100]]

  [[100 100]
   [100 100]]]]


### boolean indexing

In [106]:
arr1 = np.arange(25).reshape((5,5))
bools = np.array([True, True, False, True, False])
print(arr1[bools])

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [15 16 17 18 19]]


In [107]:
# negate the condition
print(arr1[~bools])    

[[10 11 12 13 14]
 [20 21 22 23 24]]


In [108]:
arr2 = np.array([1,2,3,4,5])
# multiple conditions
print(arr1[(arr2<2) | (arr2>4)])    

[[ 0  1  2  3  4]
 [20 21 22 23 24]]


### fancy indexing

In [109]:
arr = np.random.rand(10,10)

In [110]:
# select arr[3,3], arr[1,2], arr[2,1]
print(arr[[3,1,2], [3,2,1]])       

[0.37518553 0.23659981 0.09552964]


In [111]:
# select rows 3,1,2 and columns 6,4,8 
print(arr[[3,1,2]][:, [6,4,8]])    

[[0.96300447 0.09703816 0.79892273]
 [0.46365272 0.91839747 0.31366895]
 [0.30194684 0.89497829 0.53950482]]


### dimension inference

In [112]:
# dimension inference using any negative number (usually -1)
arr = np.array(range(16)).reshape((4,-1))
print(arr.shape)

(4, 4)


### find elements/indices by conditions

In [113]:
arr = np.arange(16).reshape(4,4)

In [114]:
# find the elements greater than 5 and return a flattened array
print(arr[arr>5])    # or arr[np.where(arr>5)]

[ 6  7  8  9 10 11 12 13 14 15]


In [115]:
# return values based on conditions 
# np.where(condition, true_return, false_return)
print(np.where(arr>5, -1, 10))

[[10 10 10 10]
 [10 10 -1 -1]
 [-1 -1 -1 -1]
 [-1 -1 -1 -1]]


In [116]:
# find the indices of the elements on conditions
print(np.argwhere(arr>5))

[[1 2]
 [1 3]
 [2 0]
 [2 1]
 [2 2]
 [2 3]
 [3 0]
 [3 1]
 [3 2]
 [3 3]]


# 6. Sort an Array

In [117]:
arr = np.random.rand(5,5)

### sort an array along a specified axis

In [118]:
# sort along the row and return a copy
print(np.sort(arr, axis=0))   

[[0.00353221 0.37239648 0.29824539 0.02661112 0.05209113]
 [0.40677889 0.48088904 0.60752907 0.19836569 0.33554387]
 [0.57214677 0.58749375 0.85715306 0.41862686 0.45308892]
 [0.680903   0.6609518  0.927455   0.55603475 0.50056142]
 [0.93235066 0.90422599 0.94825237 0.81195331 0.92014923]]


In [119]:
# sort along the row in place
arr.sort(axis=0)
print(arr)

[[0.00353221 0.37239648 0.29824539 0.02661112 0.05209113]
 [0.40677889 0.48088904 0.60752907 0.19836569 0.33554387]
 [0.57214677 0.58749375 0.85715306 0.41862686 0.45308892]
 [0.680903   0.6609518  0.927455   0.55603475 0.50056142]
 [0.93235066 0.90422599 0.94825237 0.81195331 0.92014923]]


In [120]:
# sort along the column and return a copy
print(np.sort(arr, axis=1))    

[[0.00353221 0.02661112 0.05209113 0.29824539 0.37239648]
 [0.19836569 0.33554387 0.40677889 0.48088904 0.60752907]
 [0.41862686 0.45308892 0.57214677 0.58749375 0.85715306]
 [0.50056142 0.55603475 0.6609518  0.680903   0.927455  ]
 [0.81195331 0.90422599 0.92014923 0.93235066 0.94825237]]


In [121]:
# sort along the column in place
arr.sort(axis=1)    
print(arr)

[[0.00353221 0.02661112 0.05209113 0.29824539 0.37239648]
 [0.19836569 0.33554387 0.40677889 0.48088904 0.60752907]
 [0.41862686 0.45308892 0.57214677 0.58749375 0.85715306]
 [0.50056142 0.55603475 0.6609518  0.680903   0.927455  ]
 [0.81195331 0.90422599 0.92014923 0.93235066 0.94825237]]


### compute the indices that would sort an array along a specified axis

In [122]:
arr = np.random.rand(5,5)

In [123]:
# along the row
print(np.argsort(arr, axis=0))

[[2 4 4 3 3]
 [4 2 2 0 0]
 [0 0 1 4 1]
 [3 1 3 1 4]
 [1 3 0 2 2]]


In [124]:
# along the column
print(np.argsort(arr, axis=1))

[[4 0 3 1 2]
 [4 2 3 1 0]
 [0 2 1 4 3]
 [4 3 2 0 1]
 [2 1 0 4 3]]


In [125]:
# if axis=None, return the indices of a flattened array
print(np.argsort(arr, axis=None))

[22 19 21 10  4 18  9 20 12 11  7  0 24  3  1 17 23  8 15  2 14 13  6  5
 16]


In [126]:
arr = np.random.rand(3,4)

# 7. Manipulate an Array

### transpose an array

In [127]:
# the following methods return a copy
print(arr.T)
# or 
print(np.transpose(arr))
# or
print(arr.transpose())

[[0.56457034 0.27802359 0.54298878]
 [0.19133572 0.74176042 0.6939847 ]
 [0.67690586 0.5597379  0.91213212]
 [0.21550545 0.33483641 0.58071321]]
[[0.56457034 0.27802359 0.54298878]
 [0.19133572 0.74176042 0.6939847 ]
 [0.67690586 0.5597379  0.91213212]
 [0.21550545 0.33483641 0.58071321]]
[[0.56457034 0.27802359 0.54298878]
 [0.19133572 0.74176042 0.6939847 ]
 [0.67690586 0.5597379  0.91213212]
 [0.21550545 0.33483641 0.58071321]]


### swap axes

In [128]:
arr1 = np.arange(16).reshape((2,2,4))
print(arr1.swapaxes(1,2))

[[[ 0  4]
  [ 1  5]
  [ 2  6]
  [ 3  7]]

 [[ 8 12]
  [ 9 13]
  [10 14]
  [11 15]]]


### change the shape of an array

In [129]:
# change the shape of an array and return a copy
arr.reshape((2,6))

array([[0.56457034, 0.19133572, 0.67690586, 0.21550545, 0.27802359,
        0.74176042],
       [0.5597379 , 0.33483641, 0.54298878, 0.6939847 , 0.91213212,
        0.58071321]])

In [130]:
# change the shape of an array in place
arr.resize((2,6))

### flatten an array

In [131]:
# return a copy
arr.flatten()    

array([0.56457034, 0.19133572, 0.67690586, 0.21550545, 0.27802359,
       0.74176042, 0.5597379 , 0.33483641, 0.54298878, 0.6939847 ,
       0.91213212, 0.58071321])

In [132]:
# return a view
# change any element in the view will change the initial array
arr.ravel()      

array([0.56457034, 0.19133572, 0.67690586, 0.21550545, 0.27802359,
       0.74176042, 0.5597379 , 0.33483641, 0.54298878, 0.6939847 ,
       0.91213212, 0.58071321])

### append elements to an array

In [133]:
arr = np.array([1,2,3])

In [134]:
# append a scalar and return a copy
arr1 = np.append(arr, 4)    
print(arr1)

[1 2 3 4]


In [135]:
# append an array and return a copy
arr2 = np.append(arr, [4,5,6])    
print(arr2)

[1 2 3 4 5 6]


### insert elements into an array

In [136]:
# np.insert(array, position, element)

# insert a scalar at a certain position
arr3 = np.insert(arr, 0, 100)    
print(arr3)

[100   1   2   3]


In [137]:
# insert multiple values at a certain position
arr3 = np.insert(arr, 0, [1,2,3])    
print(arr3)

[1 2 3 1 2 3]


### delete elements from an array

In [138]:
# remove the element at position 0
arr4 = np.delete(arr, 0)    
print(arr4)

[2 3]


In [139]:
# remove the element at multiple positions
arr4 = np.delete(arr, [0,2])    
print(arr4)

[2]


### copy an array

In [140]:
arr = np.array([1,2,3])

In [141]:
# the following methods are all deep copy
arr1 = np.copy(arr)
# or 
arr1 = arr.copy()
# or 
arr1 = np.array(arr, copy=True)

# 8. Combine & Split an Array

In [142]:
arr1 = np.array([[1,2,3,4], [1,2,3,4]])
arr2 = np.array([[5,6,7,8], [5,6,7,8]])

### ```np.concatenate((a, b), axis=0)```

In [143]:
# concat along the row
cat = np.concatenate((arr1, arr2), axis=0)        
print(cat)

[[1 2 3 4]
 [1 2 3 4]
 [5 6 7 8]
 [5 6 7 8]]


In [144]:
# concat along the column
cat = np.concatenate((arr1, arr2), axis=1)    
print(cat)

[[1 2 3 4 5 6 7 8]
 [1 2 3 4 5 6 7 8]]


### ```np.vstack((a, b))``` 
### ```np.r_[a, b]```

In [146]:
# stack arrays vertically
cat = np.vstack((arr1, arr2))
print(cat)

[[1 2 3 4]
 [1 2 3 4]
 [5 6 7 8]
 [5 6 7 8]]


In [147]:
# stack arrays vertically
cat = np.r_[arr1, arr2]
print(cat)

[[1 2 3 4]
 [1 2 3 4]
 [5 6 7 8]
 [5 6 7 8]]


### ```np.hstack((a, b))```
### ```np.c_[a, b]```

In [148]:
# stack arrays horizontally
cat = np.hstack((arr1, arr2))
print(cat)

[[1 2 3 4 5 6 7 8]
 [1 2 3 4 5 6 7 8]]


In [149]:
# stack arrays horizontally
cat = np.c_[arr1, arr2]
print(cat)

[[1 2 3 4 5 6 7 8]
 [1 2 3 4 5 6 7 8]]


### split an array 

In [150]:
arr = np.random.rand(6,6)

In [151]:
# split the array vertically into n evenly spaced chunks
arr1 = np.vsplit(arr, 2)
print(arr1)

[array([[0.23268638, 0.74669763, 0.77776902, 0.20040131, 0.82057422,
        0.46493485],
       [0.77976666, 0.23747822, 0.33258027, 0.95369712, 0.65781507,
        0.77287783],
       [0.68837434, 0.20430412, 0.47068875, 0.80896387, 0.67503513,
        0.00602789]]), array([[0.08740774, 0.34679472, 0.94436554, 0.49119048, 0.27017627,
        0.36042372],
       [0.21065263, 0.42120006, 0.21803544, 0.84575251, 0.4562706 ,
        0.27980202],
       [0.93289165, 0.31435135, 0.90971466, 0.04341809, 0.70711506,
        0.48388904]])]


In [152]:
# split the array horizontally into n evenly spaced chunks
arr2 = np.hsplit(arr, 2)
print(arr2)

[array([[0.23268638, 0.74669763, 0.77776902],
       [0.77976666, 0.23747822, 0.33258027],
       [0.68837434, 0.20430412, 0.47068875],
       [0.08740774, 0.34679472, 0.94436554],
       [0.21065263, 0.42120006, 0.21803544],
       [0.93289165, 0.31435135, 0.90971466]]), array([[0.20040131, 0.82057422, 0.46493485],
       [0.95369712, 0.65781507, 0.77287783],
       [0.80896387, 0.67503513, 0.00602789],
       [0.49119048, 0.27017627, 0.36042372],
       [0.84575251, 0.4562706 , 0.27980202],
       [0.04341809, 0.70711506, 0.48388904]])]


# 9. Set Operations

### select the unique elements from an array

In [153]:
arr = np.array([1,1,2,2,3,3,4,5,6])
print(np.unique(arr))

[1 2 3 4 5 6]


In [154]:
# return the number of times each unique item appears
arr = np.array([1,1,2,2,3,3,4,5,6])
uniques, counts = np.unique(arr, return_counts=True)
print(uniques)
print(counts)

[1 2 3 4 5 6]
[2 2 2 1 1 1]


### compute the intersection & union of two arrays

In [155]:
arr1 = np.array([1,2,3,4,5])
arr2 = np.array([3,4,5,6,7])

In [156]:
# intersection
print(np.intersect1d(arr1, arr2))

[3 4 5]


In [157]:
# union
print(np.union1d(arr1, arr2))

[1 2 3 4 5 6 7]


### compute whether each element of an array is contained in another

In [158]:
print(np.in1d(arr1, arr2))

[False False  True  True  True]


In [159]:
# preserve the shape of the array in the output, if the array is of higher dimensions
print(np.isin(arr1, arr2))

[False False  True  True  True]


### compute the elements in an array that are not in another

In [160]:
print(np.setdiff1d(arr1, arr2))

[1 2]


### compute the elements in either of two arrays, but not both

In [161]:
print(np.setxor1d(arr1, arr2))

[1 2 6 7]


# 10. Linear Algebra

In [162]:
arr1 = np.random.rand(5,5)
arr2 = np.random.rand(5,5)

### matrix multiplication

In [163]:
print(arr1.dot(arr2))
# or
print(np.dot(arr1, arr2))
# or
print(arr1 @ arr2)

[[0.95563447 1.12358808 0.39812114 0.76352183 0.24789558]
 [0.7870859  0.59992387 0.89199725 1.43019637 0.64185657]
 [1.5650989  1.46472327 1.16089527 1.36778617 1.07269596]
 [1.48337878 0.98109158 1.37254694 2.57103787 1.04851006]
 [0.72157094 0.70360163 0.60168396 1.07169143 0.53125141]]
[[0.95563447 1.12358808 0.39812114 0.76352183 0.24789558]
 [0.7870859  0.59992387 0.89199725 1.43019637 0.64185657]
 [1.5650989  1.46472327 1.16089527 1.36778617 1.07269596]
 [1.48337878 0.98109158 1.37254694 2.57103787 1.04851006]
 [0.72157094 0.70360163 0.60168396 1.07169143 0.53125141]]
[[0.95563447 1.12358808 0.39812114 0.76352183 0.24789558]
 [0.7870859  0.59992387 0.89199725 1.43019637 0.64185657]
 [1.5650989  1.46472327 1.16089527 1.36778617 1.07269596]
 [1.48337878 0.98109158 1.37254694 2.57103787 1.04851006]
 [0.72157094 0.70360163 0.60168396 1.07169143 0.53125141]]


### QR factorization 

In [164]:
arr = np.random.rand(5,5)

q, r = np.linalg.qr(arr)
print(q)
print(r)

[[-0.51152095  0.26183335 -0.61377712  0.49384524  0.22177495]
 [-0.61934426 -0.13298329  0.1131099  -0.08900503 -0.76027126]
 [-0.11938131  0.44334006 -0.34496177 -0.8161568   0.06393084]
 [-0.20004374  0.71618201  0.63980678  0.15683997  0.11451786]
 [-0.54816932 -0.45198635  0.28658761 -0.2397588   0.59632373]]
[[-1.38681649 -1.27593386 -0.46921834 -1.24039837 -0.81211368]
 [ 0.          1.03838942  0.18572823  0.47710247 -0.02421662]
 [ 0.          0.         -0.29112427  0.2132969  -0.20382503]
 [ 0.          0.          0.         -0.51308621 -0.19873211]
 [ 0.          0.          0.          0.          0.48169437]]


### singular value decomposition (SVD)

In [165]:
arr = np.random.rand(5,5)

u, s, v = np.linalg.svd(arr)
print(u)
print(s)
print(v)

[[-0.28738603  0.18884895  0.52189219  0.46409226  0.62768802]
 [-0.59307931  0.29580316  0.34180992  0.02831901 -0.66567367]
 [-0.54613093  0.37239425 -0.67361412 -0.13834522  0.30028011]
 [-0.40515297 -0.53704184  0.26856959 -0.64899517  0.23262153]
 [-0.32133826 -0.67063041 -0.29139133  0.58607361 -0.13640159]]
[2.76804372 0.99593868 0.67778284 0.41904875 0.09944154]
[[-0.36352385 -0.24625272 -0.44398196 -0.53961113 -0.56472104]
 [ 0.14239492 -0.95460371  0.15519047  0.21064098  0.00131715]
 [ 0.81986818  0.01676708 -0.5683484  -0.05932049 -0.03156274]
 [ 0.26738586  0.16324498  0.40836426  0.26329328 -0.81594801]
 [ 0.32232756 -0.03407777  0.53760084 -0.76916054  0.11967058]]


### compute eigen values

In [166]:
arr = np.random.rand(5,5)
print(np.linalg.eigvals(arr))

[ 2.67318061+0.j         -0.03150227+0.36872205j -0.03150227-0.36872205j
  0.16932733+0.j         -0.14192836+0.j        ]


### eigen value decomposition

In [167]:
arr = np.random.rand(5,5)

w, v = np.linalg.eig(arr)
print(w)    # eigen values
print(v)    # eigen vectors

[ 2.39687791+0.j         -0.60804738+0.j         -0.23582719+0.j
  0.34235005+0.41542629j  0.34235005-0.41542629j]
[[-0.34493867+0.j         -0.7113452 +0.j         -0.49930042+0.j
   0.03325291-0.16694063j  0.03325291+0.16694063j]
 [-0.49293348+0.j          0.39259471+0.j         -0.24899102+0.j
  -0.33175959-0.34518441j -0.33175959+0.34518441j]
 [-0.46891144+0.j          0.41811839+0.j         -0.02502382+0.j
  -0.10072209+0.53958879j -0.10072209-0.53958879j]
 [-0.49232749+0.j         -0.36047549+0.j          0.72979328+0.j
  -0.17068828-0.06955917j -0.17068828+0.06955917j]
 [-0.41924885+0.j          0.18732814+0.j          0.39430717+0.j
   0.6376005 +0.j          0.6376005 -0.j        ]]


### compute the trace & determinant

In [168]:
# notice this is not a function in linalg!!!
print(np.trace(arr))    

2.2377034359358414


In [169]:
print(np.linalg.det(arr))

0.09959773246193206


### calculate the inverse/psedo-inverse of a matrix

In [170]:
arr = np.random.rand(5,5)

In [171]:
# compute the inverse of a matrix
print(np.linalg.inv(arr))

[[ 0.7854227  -1.33001921  3.91989046 -1.49324004 -0.32672108]
 [ 0.61160082  0.29299114  0.30558052 -1.77008522  1.26256135]
 [-2.49604286  3.40004969 -4.94463533  5.23818436 -2.29257921]
 [-2.07554327  0.23302721 -2.26015671  2.08086526  0.54683302]
 [ 2.94534623 -2.32856283  2.23379027 -3.22755012  1.67880782]]


In [172]:
# compute the psudo-inverse of a matrix
print(np.linalg.pinv(arr))

[[ 0.7854227  -1.33001921  3.91989046 -1.49324004 -0.32672108]
 [ 0.61160082  0.29299114  0.30558052 -1.77008522  1.26256135]
 [-2.49604286  3.40004969 -4.94463533  5.23818436 -2.29257921]
 [-2.07554327  0.23302721 -2.26015671  2.08086526  0.54683302]
 [ 2.94534623 -2.32856283  2.23379027 -3.22755012  1.67880782]]
