In [None]:
# Views are different numpy array objects
# using the same source data, but not having the same shape

# Substitution makes variables indicating the same numpy array object

# Shallow copy makes a view object having a different shape 
# using the same source data

# Deep copy makes a whole new object having a different shape 
# and having the new copy of the source data

In [2]:
import numpy as np

# Normaly, it guesses the data type
# but you can decide the data type using dtype as the argument
A = np.array([1, 2, 3], dtype=np.float64)

print(A)
print(len(A))
print(type(A[0]))
print(type(A[1]))
print(type(A[2]))

[1. 2. 3.]
3
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>


In [3]:
B = np.array([-1,-2,-3])
print(B)

C = A + B # element-wise vectorization
print(C) 

[-1 -2 -3]
[0. 0. 0.]


In [4]:
D = np.array((0.1, 0.2, 0.3))
print(D)
print(D.dtype)
print(type(D[2]))

[0.1 0.2 0.3]
float64
<class 'numpy.float64'>


In [5]:
E = A.astype(np.int32) # change the data type
print(E)
print(E.dtype)
print(type(E[0]))
print(type(E[2]))
print(type(E))

[1 2 3]
int32
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.ndarray'>


In [6]:
A = np.array([[1,2,3],[4,5,6],[7,8,9],[10,11,12]])
print(A.ndim) # n-dimension, the number of the dimension of ndarray

2


In [8]:
print(A.shape) # column-wise(rows), row-wise(columns)
print(type(A.shape))

(4, 3)
<class 'tuple'>


In [10]:
print(A.size) # the number of all elements
# it equals to the product of the shape

12


In [13]:
print(A.dtype)
print(A.itemsize) # the byte size of an element

int32
4


In [36]:
print(A.data) # the buffer saving the element
print(np.array([1,2,3,4]).data)
print(np.array([[1,2,3], [4,5,6]]).data)

<memory at 0x000001EFBACB1150>
<memory at 0x000001EFBB773940>
<memory at 0x000001EFBACB1150>


In [47]:
print(A)
print('MAX:', A.max()) # about all elements in the ndarray
print('MIN:', A.min())
print('SUM:', A.sum(), ' SIZE:', A.size, '!=', 'LEN:', len(A)) # ndarray.size != len(ndarray)
print('MEAN:', A.mean())

[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]
MAX: 12
MIN: 1
SUM: 78  SIZE: 12 != LEN: 4
MEAN: 6.5


In [49]:
print(A.sum(axis=0)) # column-wise ('0' means the first dimension)
print(A.sum(axis=1)) # row-wise

[22 26 30]
[ 6 15 24 33]


In [57]:
t = np.array([[[0,0,0],[0,0,0]],[[1,1,1],[1,1,1]],[[2,2,2],[2,2,2]]])
print(t)
print('\nSUM in axis 0 direction:')
print(t.sum(axis=0))

[[[0 0 0]
  [0 0 0]]

 [[1 1 1]
  [1 1 1]]

 [[2 2 2]
  [2 2 2]]]

SUM in axis 0 direction:
[[3 3 3]
 [3 3 3]]


In [60]:
print(t.shape) # ( ndim: 3 , ndim: 2 , ndim: 1 )

(3, 2, 3)


In [66]:
A = np.array([0,1,2,3])
print(A)
print(A.shape)

[0 1 2 3]
(4,)


In [65]:
B = np.array([[0,1,2,3],[4,5,6,7],[8,9,10,11]])
print(B)
print(B.shape)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
(3, 4)


In [69]:
C = np.arange(36).reshape(3,3,4) # arange 0~n-1, reshape to the tuple(shape)
print(C)

[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]]

 [[12 13 14 15]
  [16 17 18 19]
  [20 21 22 23]]

 [[24 25 26 27]
  [28 29 30 31]
  [32 33 34 35]]]


In [78]:
# arange -> reshape
D = np.arange(10000).reshape(100,100)
print(D)

[[   0    1    2 ...   97   98   99]
 [ 100  101  102 ...  197  198  199]
 [ 200  201  202 ...  297  298  299]
 ...
 [9700 9701 9702 ... 9797 9798 9799]
 [9800 9801 9802 ... 9897 9898 9899]
 [9900 9901 9902 ... 9997 9998 9999]]


In [79]:
import sys
# np.set_printoptions(threshold=sys.maxsize) # no summarization
np.set_printoptions(threshold=1000) # numpy.set_printoptions

In [82]:
import timeit

#Python List VS Numpy Array
#Speed Test: Faster 5 times more

print(timeit.timeit('[i**2 for i in A]', setup='A=range(10)'))

print(timeit.timeit('B**2', setup='import numpy as np;B=np.arange(10)'))

2.3583386999962386
0.4329268999863416


In [86]:
# Vectorization also makes numpy array faster
# than python list,
# because the list access its element one by one using for loop,
# while numpy array process arrays without the loop
# Vectorization is element-wise processing of numpy arrays

In [3]:
A = np.array([1,2])
np.dot(A,A) 
# When it is 1-dimension array,
# numpy doesn't care whether it is row vector or column vector
# so, we can dot 1-dimension array itself
# as if it is like (1,2) dot (2,1) arrays dot processing

5

In [4]:
A = np.array([1,2,3])
A.shape = 1,3 # change the shape by force
print(A)
print(A.shape)

B = np.array([1,2,3])
B.shape = 3,1 # change the shape forcibly
print(B)
print(B.shape)

print(np.dot(A,B)) # (1,3) dot (3,1)

[[1 2 3]]
(1, 3)
[[1]
 [2]
 [3]]
(3, 1)
[[14]]


In [95]:
# zeros, ones
# default data type is 'float64'
A = np.zeros((2,3))
print(A)
print(A.dtype)
B = np.ones((2,3))
print(B)
print(B.dtype)


[[0. 0. 0.]
 [0. 0. 0.]]
float64
[[1. 1. 1.]
 [1. 1. 1.]]
float64


In [99]:
# empty is fast, becuase it doesn't initialize values
C = np.empty((3,3)) # empty is not zero, it could have garbage value
print(C)

[[2.12199579e-314 2.05833592e-312 2.10077583e-312]
 [2.14321575e-312 4.18033171e-312 4.22277162e-312]
 [4.26521154e-312 6.30232750e-312 6.34476742e-312]]


In [103]:
D = np.random.random((2,3)) # half open[closed] interval: [0, 1)
print(D)

E = np.random.randint(1,7, (2,3)) # randint 1~6
print(E)

[[0.44801202 0.97374301 0.29840625]
 [0.36103205 0.09991142 0.69925598]]
[[3 2 1]
 [5 6 5]]


In [108]:
# arange
# arange(start, end, gap)
# arange(start, end)
# arange(end)
# end is open

A = np.arange(0.00, 0.1, 0.01) # start, end(open), gap
print(A)

# default start: 0
# default gap  : 1
B = np.arange(10)
print(B)

[0.   0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09]
[0 1 2 3 4 5 6 7 8 9]


In [113]:
# linspace
# linspace(start, end, the number of samples -> size)
# linspace(start, end) default 50 samples
# start <= sample <= end (closed, closed)

A = np.linspace(0, 9, 10)
print(A)
A = np.linspace(0, 10, 10)
print(A)

B = np.linspace(1, 50) # default 50 samples
print(B)

[0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]
[ 0.          1.11111111  2.22222222  3.33333333  4.44444444  5.55555556
  6.66666667  7.77777778  8.88888889 10.        ]
[ 1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15. 16. 17. 18.
 19. 20. 21. 22. 23. 24. 25. 26. 27. 28. 29. 30. 31. 32. 33. 34. 35. 36.
 37. 38. 39. 40. 41. 42. 43. 44. 45. 46. 47. 48. 49. 50.]


In [118]:
# reshape
A = np.arange(16)
B = A.reshape(4,4)

print(A)
print(B)
print(A.base)
print(B.base)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15]
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]
None
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15]


In [119]:
# base
print(B.base is A)
print(A.data)
print(B.data)
print(B.base.data)

True
<memory at 0x000001EFBB992440>
<memory at 0x000001EFBACB2CF0>
<memory at 0x000001EFBB992440>


In [120]:
# data share
B[0] = -1
print(A)
print(B)

[-1 -1 -1 -1  4  5  6  7  8  9 10 11 12 13 14 15]
[[-1 -1 -1 -1]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]


In [121]:
C=B.reshape(2,8).copy() # copy new one
print(C)
print(C.base) # no base, it's another one (not a view)

C[0] = 0
print(C)
print(B) # no change

[[-1 -1 -1 -1  4  5  6  7]
 [ 8  9 10 11 12 13 14 15]]
None
[[ 0  0  0  0  0  0  0  0]
 [ 8  9 10 11 12 13 14 15]]
[[-1 -1 -1 -1]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]


In [124]:
# -1 in shape
# for auto completion
# -1 can be used only once -> (-1, -1, 8) error!
D = np.arange(8).reshape(4, -1) # -> 4 x 2
print(D)

[[0 1]
 [2 3]
 [4 5]
 [6 7]]


In [132]:
# ravel method transfer array to 1-dim array
# it makes a view

A = np.array([[0,1],[2,3]])
B = A.ravel() # ravel makes a view

print(A)
print(A.data)

print(B)
print(B.data)

print(B.base)
print(B.base.data)
print(B.base is A)

[[0 1]
 [2 3]]
<memory at 0x000001EFBACB2DC0>
[0 1 2 3]
<memory at 0x000001EFBB992800>
[[0 1]
 [2 3]]
<memory at 0x000001EFBACB2DC0>
True


In [140]:
# resize method resizes an array 'directly'
# it doesn't make the view of the array
A = np.arange(10)
print(A)
print(A.data)

A.resize(2,5)
print(A)
print(A.data) # ?

[0 1 2 3 4 5 6 7 8 9]
<memory at 0x000001EFBB992800>
[[0 1 2 3 4]
 [5 6 7 8 9]]
<memory at 0x000001EFBACB2B50>


In [142]:
# newaxis
# increases the dimension of an array

A = np.array([1,2,3,4])
print(A)
print(A.shape)

B = A[:, np.newaxis]
print(B)
print(B.shape)

C = A[np.newaxis, :]
print(C)
print(C.shape)

[1 2 3 4]
(4,)
[[1]
 [2]
 [3]
 [4]]
(4, 1)
[[1 2 3 4]]
(1, 4)


In [146]:
# vstack, hstack

A = np.array([[0,0],[1,1]])
B = np.array([[2,2],[3,3]])

C = np.vstack((A,B))
print(C)

D = np.hstack((A,B))
print(D)

[[0 0]
 [1 1]
 [2 2]
 [3 3]]
[[0 0 2 2]
 [1 1 3 3]]


In [147]:
# column_stack
# takes 1-dim array as column vector to make 2-dim array
A = np.array([0,1,2])
B = np.array([3,4,5])
C = np.array([6,7,8])

D = np.column_stack((A,B,C))
print(D)

[[0 3 6]
 [1 4 7]
 [2 5 8]]


In [148]:
# concatenate
# concatenate arrays in seleted direction(axis)

A = np.array([[0,0,0],[1,1,1],[2,2,2]])
B = np.array([[3,3,3],[4,4,4],[5,5,5]])

C = np.concatenate((A,B), axis=0)
print(C)

D = np.concatenate((A,B), axis=1)
print(D)

[[0 0 0]
 [1 1 1]
 [2 2 2]
 [3 3 3]
 [4 4 4]
 [5 5 5]]
[[0 0 0 3 3 3]
 [1 1 1 4 4 4]
 [2 2 2 5 5 5]]


In [161]:
# hsplit(column-wise, split along the second axis)
# Also read https://www.w3resource.com/numpy/manipulation/hsplit.php

A = np.arange(1,10,1).reshape(3,3)
a = np.hsplit(A, 3) # split it into 3 pieces
print(A)
print(a[0])

b = np.hsplit(A, (1,3)) # A[:, :1], A[:, :3]
print(b[1])

[[1 2 3]
 [4 5 6]
 [7 8 9]]
[[1]
 [4]
 [7]]
[[2 3]
 [5 6]
 [8 9]]


In [162]:
# vsplit(row-wise, )
A = np.arange(36).reshape(6,6)
a = np.vsplit(A,3)
print(A)
print(a[0])

b = np.vsplit(A, (2,5)) # A[:2,:], A[2:5,:], A[5:,:]
print(b[2])

[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]
 [24 25 26 27 28 29]
 [30 31 32 33 34 35]]
[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]]
[[30 31 32 33 34 35]]


In [163]:
# hsplit, vsplit
# when you think of the two splits above,
# imagine the direction opposite(perpendicular)
# to what you first think from the word

In [170]:
# indexing
A = np.arange(30).reshape(6,5)
print(A)

print(A[4,3])

print(A[3:5,2:4]) # slicing

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]
 [20 21 22 23 24]
 [25 26 27 28 29]]
23
[[17 18]
 [22 23]]


In [173]:
# How to get row vectors of the array
for row in A:
    print(row)


[0 1 2 3 4]
[5 6 7 8 9]
[10 11 12 13 14]
[15 16 17 18 19]
[20 21 22 23 24]
[25 26 27 28 29]


In [171]:
# How to get column vectors of the array
# : using A.T
for column in A.T:
    print(column)

[ 0  5 10 15 20 25]
[ 1  6 11 16 21 26]
[ 2  7 12 17 22 27]
[ 3  8 13 18 23 28]
[ 4  9 14 19 24 29]


In [187]:
# flat
# How to get every element of the array
# A.flat makes A flat
# like this:
A = np.array([[0,1,2],[3,4,5],[6,7,8]])
print(A)

print(A.flat)
for el in A.flat:
    print(el)

[[0 1 2]
 [3 4 5]
 [6 7 8]]
<numpy.flatiter object at 0x000001EFB9D21570>
0
1
2
3
4
5
6
7
8


In [180]:
# Slicing
# Slice is the result of slicing, and it's a view
# affected by changing of the base data

A = np.array([
    [0,1,2,3],
    [4,5,6,7],
    [8,9,10,11]
])

print(A)
print(A[1,2]) # A[1,2] == A[1][2] 
print(A[1][2])

print(A)
A[:2,:2]=0
print(A)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
6
6
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[[ 0  0  2  3]
 [ 0  0  6  7]
 [ 8  9 10 11]]


In [185]:
# ...
A = np.array([
    [0,1,2,3],
    [4,5,6,7],
    [8,9,10,11]
])

print(A)
print(A[...,3])
print(A[2,...])

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[ 3  7 11]
[ 8  9 10 11]


In [192]:
# Substitution

A = np.arange(10)
B = A

print(A==B)
print(A is B)

A[0] = 10
B[-1] = -1
print(A)
print(B)

B.shape = 2,5
print(A)

def func(a):
    print(id(a))

print(id(A))
func(A)

[ True  True  True  True  True  True  True  True  True  True]
True
[10  1  2  3  4  5  6  7  8 -1]
[10  1  2  3  4  5  6  7  8 -1]
[[10  1  2  3  4]
 [ 5  6  7  8 -1]]
2129125122160
2129125122160


In [207]:
# Shallow Copy
# sharing data, data change affects
# but view is a different object, so shape change doesn't affect

A = np.arange(10)

B = A.view()

print(A)
print(B)

A[-1] = -1
print(A)
print(B)

A.shape = 2,5
print(A)
print(B)

[0 1 2 3 4 5 6 7 8 9]
[0 1 2 3 4 5 6 7 8 9]
[ 0  1  2  3  4  5  6  7  8 -1]
[ 0  1  2  3  4  5  6  7  8 -1]
[[ 0  1  2  3  4]
 [ 5  6  7  8 -1]]
[ 0  1  2  3  4  5  6  7  8 -1]


In [209]:
# Slice is the same as the view(slicing returns a view)
# Slicing is the same as the shallow copy
# sharing data, but not the shape(a different object)

C = A[0:,1:3]
C[0,0] = 100
print(A)
print(B)
print(C)

[[  0 100   2   3   4]
 [  5   6   7   8  -1]]
[  0 100   2   3   4   5   6   7   8  -1]
[[100   2]
 [  6   7]]


In [212]:
# Deep Copy
# copy method
# no data share
# no shape share
# new array object

A = np.arange(10).reshape(2,5)

B = A.copy()

print(A)
print(B)

print(B.base is A)

B[1,4] = -1
print(A)
print(B)

B.shape = 10,
print(A)
print(B)

[[0 1 2 3 4]
 [5 6 7 8 9]]
[[0 1 2 3 4]
 [5 6 7 8 9]]
False
[[0 1 2 3 4]
 [5 6 7 8 9]]
[[ 0  1  2  3  4]
 [ 5  6  7  8 -1]]
[[0 1 2 3 4]
 [5 6 7 8 9]]
[ 0  1  2  3  4  5  6  7  8 -1]


In [219]:
# Operations between same size arrays

A = np.arange(2,19,2).reshape(3,3)
B = np.array([2,2,2,2,2,2,2,2,2]).reshape(3,3)

print(A)
print(B)

print(A+B)
print(A-B)
print(A*B)
print(A/B)

[[ 2  4  6]
 [ 8 10 12]
 [14 16 18]]
[[2 2 2]
 [2 2 2]
 [2 2 2]]
[[ 4  6  8]
 [10 12 14]
 [16 18 20]]
[[ 0  2  4]
 [ 6  8 10]
 [12 14 16]]
[[ 4  8 12]
 [16 20 24]
 [28 32 36]]
[[1. 2. 3.]
 [4. 5. 6.]
 [7. 8. 9.]]


In [220]:
# matrix multiplication
# a x b dot b x c
print(np.dot(A,B))
print(A.dot(B))
print(A@B) # python 3.5 +

[[24 24 24]
 [60 60 60]
 [96 96 96]]
[[24 24 24]
 [60 60 60]
 [96 96 96]]
[[24 24 24]
 [60 60 60]
 [96 96 96]]


In [224]:
# Operations between different size arrays
# Broadcasting
# 1. Make the ndims of two arrays same by adding (1,) 
# to the left of lesser dim one (2,3,1)(3) -> (2,3,1)(1,1,3)
# 2. If the sizes of same dimension are same or (1,),
# it can be broadcast
# 3. For example, (3, 4, 3)(3), (2,4,1)(4,4), (1,2,3)(1,3)
# 4. Scalar is broadcast
# After 1. ~ 5., the operation would be the same case 
# as the operation between same size arrays

A = np.arange(10,101,10).reshape(2,5)
B = np.array([1,2,3,4,5])
print(A.shape)
print(B.shape)
print(A + B) # (2,5)(5) -> (2,5)(1,5) Broadcast!

C = 5 # Scalar is broadcast!
print(A+C)

(2, 5)
(5,)
[[ 11  22  33  44  55]
 [ 61  72  83  94 105]]
[[ 15  25  35  45  55]
 [ 65  75  85  95 105]]
