# Numpy - Numerical Python

## 1. ndarray (Numpy Dimensional Array)

In [1]:
import numpy as np

In [2]:
# Array 생성
test_array = np.array([1, 4, 5, 8], float)
test_array

array([1., 4., 5., 8.])

In [3]:
test_array[3]

8.0

In [4]:
test_array[2:]

array([5., 8.])

In [5]:
test_array.dtype, test_array.shape

(dtype('float64'), (4,))

In [6]:
test_array = np.array([1,2,"5",4.2])
test_array

array(['1', '2', '5', '4.2'], dtype='<U11')

In [7]:
test_array = np.array([1,2,"5",4.2], float)
test_array

array([1. , 2. , 5. , 4.2])

## 2. Array Shape

In [8]:
# Vector (1차원)
test_array = np.array([1, 4, 5, 8], float)
test_array.shape

(4,)

In [9]:
# Matrix (2차원, 행렬)
matrix = [[1,2,5,8], [2,3,4,9], [4,5,6,7]]
np.array(matrix, int).shape

(3, 4)

In [10]:
np.array(matrix, int)

array([[1, 2, 5, 8],
       [2, 3, 4, 9],
       [4, 5, 6, 7]])

In [11]:
matrix

[[1, 2, 5, 8], [2, 3, 4, 9], [4, 5, 6, 7]]

In [12]:
# Tensor (3차원 이상)
tensor = [[[1,2,5,8], [2,3,4,9], [4,5,6,7]],
          [[1,2,5,8], [2,3,4,9], [4,5,6,7]],
          [[1,2,5,8], [2,3,4,9], [4,5,6,7]],
          [[1,2,5,8], [2,3,4,9], [4,5,6,7]]]
np.array(tensor, int).shape

(4, 3, 4)

In [13]:
np.array(tensor, int)

array([[[1, 2, 5, 8],
        [2, 3, 4, 9],
        [4, 5, 6, 7]],

       [[1, 2, 5, 8],
        [2, 3, 4, 9],
        [4, 5, 6, 7]],

       [[1, 2, 5, 8],
        [2, 3, 4, 9],
        [4, 5, 6, 7]],

       [[1, 2, 5, 8],
        [2, 3, 4, 9],
        [4, 5, 6, 7]]])

In [14]:
np.array(tensor, int).ndim

3

In [15]:
np.array(tensor, int).size

48

In [16]:
np.array([[1, 2, 3], [4.5, '5', '6']], dtype=np.float32)

array([[1. , 2. , 3. ],
       [4.5, 5. , 6. ]], dtype=float32)

In [17]:
np.array([[1, 2, 3], [4.5, '5', '6']], dtype=np.float32).nbytes

24

### reshape

In [18]:
test_matrix = [[1,2,3,4], [5,6,7,8]]
np.array(test_matrix).shape

(2, 4)

In [20]:
np.array(test_matrix).reshape(8)

array([1, 2, 3, 4, 5, 6, 7, 8])

In [21]:
np.array(test_matrix).reshape(8).shape

(8,)

In [22]:
np.array(test_matrix).reshape(4, 2)

array([[1, 2],
       [3, 4],
       [5, 6],
       [7, 8]])

In [23]:
np.array(test_matrix).reshape(2, 2, 2)

array([[[1, 2],
        [3, 4]],

       [[5, 6],
        [7, 8]]])

In [24]:
np.array(test_matrix).reshape(-1, 2) # -1은 크기를 기반으로 row 갯수 선정

array([[1, 2],
       [3, 4],
       [5, 6],
       [7, 8]])

In [25]:
np.array(test_matrix).reshape(-1, 1)

array([[1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8]])

In [26]:
# flatten
test_matrix = [[[1,2,3,4], [5,6,7,8]], [[2,3,4,5], [6,7,8,9]]]
np.array(test_matrix).flatten()

array([1, 2, 3, 4, 5, 6, 7, 8, 2, 3, 4, 5, 6, 7, 8, 9])

## 3. Indexing and Slicing

In [27]:
a = np.array([[1,2,3], [4,5,6]], int)
print(a)

[[1 2 3]
 [4 5 6]]


In [28]:
a[0][0]

1

In [29]:
a[0, 0]

1

In [30]:
a[1, 2]

6

In [31]:
a = np.array([[1,2,3,4,5], [6,7,8,9,10]], int)
a

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10]])

In [32]:
a[:, 2:]

array([[ 3,  4,  5],
       [ 8,  9, 10]])

In [33]:
a[1, 1:3]

array([7, 8])

In [34]:
a[1:3]

array([[ 6,  7,  8,  9, 10]])

In [35]:
a[:, ::2]

array([[ 1,  3,  5],
       [ 6,  8, 10]])

## 4. Creation

In [36]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [37]:
np.arange(0, 5, 0.5)

array([0. , 0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5])

In [38]:
list(range(0, 5, 0.5))

TypeError: 'float' object cannot be interpreted as an integer

In [39]:
np.arange(0, 5, 0.5).tolist()

[0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5]

In [40]:
np.arange(30).reshape(5, 6)

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29]])

In [41]:
np.arange(30).reshape(-1, 2, 3)

array([[[ 0,  1,  2],
        [ 3,  4,  5]],

       [[ 6,  7,  8],
        [ 9, 10, 11]],

       [[12, 13, 14],
        [15, 16, 17]],

       [[18, 19, 20],
        [21, 22, 23]],

       [[24, 25, 26],
        [27, 28, 29]]])

In [42]:
np.zeros(shape=(10,), dtype=np.int8)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8)

In [43]:
np.zeros((2, 3))

array([[0., 0., 0.],
       [0., 0., 0.]])

In [45]:
np.ones((2, 4), np.int8)

array([[1, 1, 1, 1],
       [1, 1, 1, 1]], dtype=int8)

In [50]:
np.empty((3, 5))

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [51]:
test_matrix = np.arange(30).reshape(5,6)
np.ones_like(test_matrix)

array([[1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1]])

In [52]:
np.zeros_like(test_matrix)

array([[0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0]])

In [53]:
# 단위행렬: Identity
np.identity(n=3, dtype=np.int8)

array([[1, 0, 0],
       [0, 1, 0],
       [0, 0, 1]], dtype=int8)

In [54]:
np.identity(4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

In [55]:
# eye (대각선이 1인 행렬)
np.eye(N=3, M=5, dtype=np.int8)

array([[1, 0, 0, 0, 0],
       [0, 1, 0, 0, 0],
       [0, 0, 1, 0, 0]], dtype=int8)

In [56]:
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [57]:
np.eye(3, 5, k=2)    # k : start index

array([[0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [58]:
# diag (대각 행렬의 값을 추출)
matrix = np.arange(9).reshape(3,3)
np.diag(matrix)

array([0, 4, 8])

In [59]:
np.diag(matrix, k=1)    # k : start index

array([1, 5])

## Random sampling

In [60]:
np.random.seed(1000)    # 시드로 난수 생성 초기값 지정

In [61]:
np.random.uniform(0, 1, 10).reshape(2,5)

array([[0.65358959, 0.11500694, 0.95028286, 0.4821914 , 0.87247454],
       [0.21233268, 0.04070962, 0.39719446, 0.2331322 , 0.84174072]])

In [62]:
np.random.normal(0, 1, 10).reshape(2,5)

array([[ 0.66728131, -0.80611561, -1.19606983, -0.40596016, -0.18237734],
       [ 0.10319289, -0.13842199,  0.70569237,  1.27179528, -0.98674733]])

In [63]:
np.random.binomial(20, 0.5, 10)

array([ 9,  6, 12, 11, 12, 10, 13, 13,  5,  7])

### 기타 분포
- np.random.poisson(lam. size)		# 포아송 분포
- np.random.standard_t(df, size)		# t-분포
- np.random.f(dfnum, dfden, size)		# F-분포

## 5. Operation function

In [65]:
# sum
test_array = np.arange(1,11)
test_array.sum(dtype=np.float)

55.0

In [67]:
test_array = np.arange(1,13).reshape(3,4)
test_array

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [68]:
test_array.sum()

78

In [69]:
test_array.sum(axis=0)

array([15, 18, 21, 24])

In [70]:
test_array.sum(axis=1)

array([10, 26, 42])

In [71]:
test_array = np.arange(1,25).reshape(2,3,4)
test_array

array([[[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12]],

       [[13, 14, 15, 16],
        [17, 18, 19, 20],
        [21, 22, 23, 24]]])

In [72]:
test_array.sum(axis=0)

array([[14, 16, 18, 20],
       [22, 24, 26, 28],
       [30, 32, 34, 36]])

In [73]:
test_array.sum(axis=1)

array([[15, 18, 21, 24],
       [51, 54, 57, 60]])

In [74]:
test_array.sum(axis=2)

array([[10, 26, 42],
       [58, 74, 90]])

In [75]:
# 평균, 표준편차
test_array = np.arange(1,13).reshape(3,4)
test_array.mean()

6.5

In [76]:
test_array.mean(axis=1)

array([ 2.5,  6.5, 10.5])

In [77]:
test_array.std()

3.452052529534663

In [78]:
test_array.std(axis=1)

array([1.11803399, 1.11803399, 1.11803399])

In [79]:
# Mathematical Function
np.exp(test_array)

array([[2.71828183e+00, 7.38905610e+00, 2.00855369e+01, 5.45981500e+01],
       [1.48413159e+02, 4.03428793e+02, 1.09663316e+03, 2.98095799e+03],
       [8.10308393e+03, 2.20264658e+04, 5.98741417e+04, 1.62754791e+05]])

In [80]:
np.sqrt(test_array)

array([[1.        , 1.41421356, 1.73205081, 2.        ],
       [2.23606798, 2.44948974, 2.64575131, 2.82842712],
       [3.        , 3.16227766, 3.31662479, 3.46410162]])

In [81]:
# Concatenate
a = np.array([[1, 2], [3, 4]])
a

array([[1, 2],
       [3, 4]])

In [82]:
b = np.array([[5, 6]])
b

array([[5, 6]])

In [84]:
np.vstack((a, b))

array([[1, 2],
       [3, 4],
       [5, 6]])

In [85]:
c = np.array([7, 8])
c

array([7, 8])

In [86]:
np.vstack((a, c))

array([[1, 2],
       [3, 4],
       [7, 8]])

In [87]:
np.concatenate((a,b), axis=0)    # vstack

array([[1, 2],
       [3, 4],
       [5, 6]])

In [88]:
a = np.array([[1, 2], [3, 4]])
a

array([[1, 2],
       [3, 4]])

In [90]:
b = np.array([[5], [6]])
b

array([[5],
       [6]])

In [91]:
np.hstack((a, b))

array([[1, 2, 5],
       [3, 4, 6]])

In [93]:
c = np.array([[7, 8]])
c.T                       # T - 전치행렬(Transpose)

array([[7],
       [8]])

In [95]:
np.hstack((a, c.T))

array([[1, 2, 7],
       [3, 4, 8]])

In [96]:
np.concatenate((a, c.T), axis=1)    # hstack

array([[1, 2, 7],
       [3, 4, 8]])

## 6. Array Operation

In [99]:
test_a = np.array([[1,2,3], [4,5,6]], float)
test_b = np.arange(1, 7).reshape(2, 3)

In [101]:
# element-wise plus
test_a + test_b

array([[ 2.,  4.,  6.],
       [ 8., 10., 12.]])

In [102]:
test_a - test_b

array([[0., 0., 0.],
       [0., 0., 0.]])

In [103]:
test_a * test_b

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [104]:
test_a / test_b

array([[1., 1., 1.],
       [1., 1., 1.]])

In [106]:
# Dot product (행렬의 곱)
# (l, m) x (m, n) -> (l, n)
test_a = np.arange(1,7).reshape(2,3)
test_a

array([[1, 2, 3],
       [4, 5, 6]])

In [107]:
test_b = np.arange(7,13).reshape(3,2)
test_b

array([[ 7,  8],
       [ 9, 10],
       [11, 12]])

In [108]:
test_a.dot(test_b)

array([[ 58,  64],
       [139, 154]])

In [109]:
# Transpose
test_a.dot(test_a.T)

array([[14, 32],
       [32, 77]])

In [110]:
test_a.transpose()

array([[1, 4],
       [2, 5],
       [3, 6]])

In [111]:
# Broadcasting
test_a

array([[1, 2, 3],
       [4, 5, 6]])

In [112]:
scalar = 3
test_a + scalar

array([[4, 5, 6],
       [7, 8, 9]])

In [113]:
test_a // scalar

array([[0, 0, 1],
       [1, 1, 2]], dtype=int32)

In [114]:
test_a % scalar

array([[1, 2, 0],
       [1, 2, 0]], dtype=int32)

In [115]:
test_a ** scalar

array([[  1,   8,  27],
       [ 64, 125, 216]], dtype=int32)

In [116]:
# Matrix와 Vector간의 연산도 가능
test_matrix = np.arange(1,13).reshape(4,3)
test_matrix

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [117]:
test_vector = np.arange(10,40,10)
test_vector

array([10, 20, 30])

In [118]:
test_matrix + test_vector

array([[11, 22, 33],
       [14, 25, 36],
       [17, 28, 39],
       [20, 31, 42]])

In [119]:
a = np.arange(1, 5).reshape(4, 1)
a

array([[1],
       [2],
       [3],
       [4]])

In [120]:
b = np.arange(10,40,10)
b

array([10, 20, 30])

In [121]:
a + b

array([[11, 21, 31],
       [12, 22, 32],
       [13, 23, 33],
       [14, 24, 34]])

## 7. Comparison

In [122]:
a = np.arange(10)
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [123]:
a > 5

array([False, False, False, False, False, False,  True,  True,  True,
        True])

In [124]:
np.any(a>8)    # OR 조건

True

In [125]:
np.any(a<0)

False

In [126]:
np.all(a>8)    # AND 조건

False

In [127]:
np.all(a<10)

True

In [128]:
test_a = np.array([1, 3, 0], float)
test_b = np.array([5, 2, 1], float)

In [129]:
test_a > test_b

array([False,  True, False])

In [130]:
test_a == test_b

array([False, False, False])

In [131]:
(test_a > test_b).any()

True

In [132]:
# Logical Operation
a = np.array([1, 3, 0], float)
b = np.logical_and(a>0, a<3)

In [133]:
b

array([ True, False, False])

In [134]:
c = np.logical_not(b)
c

array([False,  True,  True])

In [135]:
np.logical_or(b, c)

array([ True,  True,  True])

In [136]:
# where(condition, True, False)
np.where(a>0, 3, 2)

array([3, 3, 2])

In [138]:
# 3항 연산자
# result = condition ? when True : when False; (Java)
# result = when True if condition else when False
x = 3
y = True if x > 0 else False
y

True

In [139]:
y = '조건이 만족했을 경우의 값' if x > 0 else '조건이 틀린 경우의 값'
y

'조건이 만족했을 경우의 값'

In [140]:
if x > 0:
    y = '조건이 만족했을 경우의 값'  
else:
    y = '조건이 틀린 경우의 값'

In [141]:
a = np.arange(10, 20)
np.where(a>15)

(array([6, 7, 8, 9], dtype=int64),)

In [142]:
a = np.array([1, np.NaN, np.Inf], float)
a

array([ 1., nan, inf])

In [143]:
np.isnan(a)

array([False,  True, False])

In [144]:
np.isfinite(a)

array([ True, False, False])

In [145]:
# argmax, argmin
a = np.array([1,2,4,5,8,78,23,3])
np.argmax(a)

5

In [146]:
np.argmin(a)

0

In [147]:
a = np.array([[1,2,4,7],[9,88,6,45],[8,78,23,3]])
a

array([[ 1,  2,  4,  7],
       [ 9, 88,  6, 45],
       [ 8, 78, 23,  3]])

In [148]:
np.argmax(a, axis=1)

array([3, 1, 1], dtype=int64)

In [149]:
np.argmax(a, axis=0)

array([1, 1, 2, 1], dtype=int64)

In [150]:
np.argmin(a, axis=1)

array([0, 2, 3], dtype=int64)

## 8. Boolean and Fancy Index

In [151]:
test_array = np.array([1,4,0,2,3,8,9,7], float)
test_array > 3

array([False,  True, False, False, False,  True,  True,  True])

In [152]:
test_array[test_array > 3]

array([4., 8., 9., 7.])

In [153]:
condition = test_array < 3
test_array[condition]

array([1., 0., 2.])

In [154]:
# Fancy index
a = np.array([2, 4, 6, 8], float)
b = np.array([0, 0, 1, 3, 2, 1], int)

In [155]:
a[b]

array([2., 2., 4., 8., 6., 4.])

In [156]:
a.take(b)

array([2., 2., 4., 8., 6., 4.])