# Numpy : Numerical Python

In [49]:
# de facto standard
import numpy as np

### 1. ndarray : Numpy Dimensional Array

In [6]:
# 1 dimension : vector
a = np.array([1, 4, 5, 8], float)
print(a)

[1. 4. 5. 8.]


In [7]:
a[3], a.dtype, a.shape

(8.0, dtype('float64'), (4,))

### 2. Array shape

In [8]:
# 2 dimension : matrix
m = [[1, 4], [5, 8]]
b = np.array(m, float)
b

array([[1., 4.],
       [5., 8.]])

### 3. Tensor

In [9]:
### 3 dimension : tensor
t = [[[1, 4], [5, 8]], [[11, 14], [15, 18]]]
c = np.array(t, float)
c

array([[[ 1.,  4.],
        [ 5.,  8.]],

       [[11., 14.],
        [15., 18.]]])

In [10]:
a.shape, b.shape, c.shape

((4,), (2, 2), (2, 2, 2))

In [11]:
a.ndim, b.ndim, c.ndim  # ndim | 몇차원인지 알려줌

(1, 2, 3)

In [12]:
a.size, b.size, c.size  # size | 리스트의 사이즈를 나타냄

(4, 4, 8)

In [13]:
c = np.array(t, dtype = np.float32)
c

array([[[ 1.,  4.],
        [ 5.,  8.]],

       [[11., 14.],
        [15., 18.]]], dtype=float32)

- reshape : size는 동일

In [14]:
c.reshape(2, 4).shape

(2, 4)

In [15]:
c.reshape(2, 4) # 4개씩 2묶음

array([[ 1.,  4.,  5.,  8.],
       [11., 14., 15., 18.]], dtype=float32)

In [16]:
c.reshape(8,)

array([ 1.,  4.,  5.,  8., 11., 14., 15., 18.], dtype=float32)

In [17]:
c.reshape(-1, 4)    # 4개씩 묶음, element가 많으면 몇 묶음이 나올지 모르기 때문에 -1 사용

array([[ 1.,  4.,  5.,  8.],
       [11., 14., 15., 18.]], dtype=float32)

- flatten - 1차원으로 변환

In [18]:
c.flatten()

array([ 1.,  4.,  5.,  8., 11., 14., 15., 18.], dtype=float32)

### 4. Indexing and Slicing

In [19]:
a = np.arange(6)    # a = np.array(range(6))
a

array([0, 1, 2, 3, 4, 5])

In [20]:
a = np.arange(6).reshape(2, 3)
a

array([[0, 1, 2],
       [3, 4, 5]])

In [21]:
a[0][1], a[0, 1]

(1, 1)

In [22]:
a[:, :2]

array([[0, 1],
       [3, 4]])

In [23]:
b = np.arange(16).reshape(-1, 4)
b

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [24]:
b[1:3, 1:3]

array([[ 5,  6],
       [ 9, 10]])

In [25]:
b[0:3, 0:2]

array([[0, 1],
       [4, 5],
       [8, 9]])

In [26]:
# 세번째 열까지 x, 마지막 열은 y
X, Y = b[:, :-1], b[:, -1]
X.shape, Y.shape

((4, 3), (4,))

### 5. Creation

- arange

In [27]:
np.arange(1, 10)

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [28]:
np.arange(0, 1.1, 0.1)

array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ])

In [29]:
np.arange(0, 1.1, 0.1).tolist() # tolist() | 리스트로 만들기

[0.0,
 0.1,
 0.2,
 0.30000000000000004,
 0.4,
 0.5,
 0.6000000000000001,
 0.7000000000000001,
 0.8,
 0.9,
 1.0]

In [30]:
np.arange(0, 1.1, 0.1).tolist() == 0.3

False

In [31]:
np.arange(0, 1.1, 0.1).tolist()[3] - 0.3 < 1e-8 # = 보다는 <,> 비교가 이상적

True

- ones, zeros, empty

In [32]:
a = np.zeros((5,), dtype = np.int8) # zeros((row, col)) | element를 0로 다 초기화
a

array([0, 0, 0, 0, 0], dtype=int8)

In [33]:
b = np.ones((2, 4), dtype = np.int8)    # ones((row, col)) | element를 1으로 다 초기화
b

array([[1, 1, 1, 1],
       [1, 1, 1, 1]], dtype=int8)

In [34]:
np.empty((2, 5))    # empty | 초기화 되지 않은 array 값 출력

array([[0.1, 0.2, 0.3, 0.4, 0.5],
       [0.6, 0.7, 0.8, 0.9, 1. ]])

In [35]:
a = np.arange(24).reshape(4, -1)
np.ones_like(a) # a의 형식은 그대로, 1로 채우기

array([[1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1]])

In [36]:
np.zeros_like(a)

array([[0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0]])

- identity, eye

In [37]:
np.identity(3)  # identity(n) | 단위행렬 출력 - 대각선 값이 1, 나머지가 0인 n x n 행렬(정방행렬)을 나타냄

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [38]:
np.eye(3, 5)    # np.eye(row, col) | 대각선이 1인 row x col 행렬 출력

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.]])

In [39]:
np.eye(N = 3, M = 5, k = -1), np.eye(3, 5, 2)

(array([[0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.]]),
 array([[0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]]))

- Random sampling

In [40]:
np.random.seed(2021)
np.random.uniform(0, 1, 10).reshape(2, 5)   # uniform() | 균등분포 - 요소들의 출력 확률을 각각 동일하게 부여

array([[0.60597828, 0.73336936, 0.13894716, 0.31267308, 0.99724328],
       [0.12816238, 0.17899311, 0.75292543, 0.66216051, 0.78431013]])

In [41]:
np.random.normal(0, 1, 10).reshape(2, 5)    # normal() | 정규분포 - 요소들이 정규분포를 그리도록 샘플링

array([[ 0.10641374,  0.42215483,  0.12420684, -0.83795346,  0.4090157 ],
       [ 0.10275122, -1.90772239,  1.1002243 , -1.40232506, -0.22508127]])

In [42]:
np.random.binomial(100, 1/6, 10)    # binomial(전체개수, 확률, 선택할 개수) | 이항분포

array([17, 13, 17, 16, 17, 20, 19, 11, 18, 18])

In [43]:
np.random.poisson() # poisson() | 포아송 분포

1

In [None]:
np.random.standard_t()  # standard_t() | t-분포

In [None]:
np.random.f()   # f() | f-분포

### 6. Operation

In [68]:
a = np.arange(1, 11, dtype = np.float32)
a.sum()

55.0

In [70]:
b = np.arange(1, 13).reshape(3, 4)
b

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [71]:
b.sum(axis = 1) # axis = 1 | 행 방향으로 더하기

array([10, 26, 42])

In [72]:
b.sum(axis = 0) # axis = 0 | 열 방향으로 더하기

array([15, 18, 21, 24])

In [73]:
print(b.mean())
print(b.mean(axis = 1))
print(b.mean(axis = 0))

6.5
[ 2.5  6.5 10.5]
[5. 6. 7. 8.]


In [75]:
print(b.std())  # std() | 표준편차
print(b.std(axis = 1))
print(b.std(axis = 0))

3.452052529534663
[1.11803399 1.11803399 1.11803399]
[3.26598632 3.26598632 3.26598632 3.26598632]


In [22]:
x = np.arange(-1, 1.01, 0.01)
x.size

201

In [25]:
y = np.exp(x)
y

array([0.36787944, 0.37157669, 0.3753111 , 0.37908304, 0.38289289,
       0.38674102, 0.39062784, 0.39455371, 0.39851904, 0.40252422,
       0.40656966, 0.41065575, 0.41478291, 0.41895155, 0.42316208,
       0.42741493, 0.43171052, 0.43604929, 0.44043165, 0.44485807,
       0.44932896, 0.4538448 , 0.45840601, 0.46301307, 0.46766643,
       0.47236655, 0.47711392, 0.48190899, 0.48675226, 0.4916442 ,
       0.4965853 , 0.50157607, 0.50661699, 0.51170858, 0.51685133,
       0.52204578, 0.52729242, 0.5325918 , 0.53794444, 0.54335087,
       0.54881164, 0.55432728, 0.55989837, 0.56552544, 0.57120906,
       0.57694981, 0.58274825, 0.58860497, 0.59452055, 0.60049558,
       0.60653066, 0.61262639, 0.61878339, 0.62500227, 0.63128365,
       0.63762815, 0.64403642, 0.65050909, 0.65704682, 0.66365025,
       0.67032005, 0.67705687, 0.68386141, 0.69073433, 0.69767633,
       0.70468809, 0.71177032, 0.71892373, 0.72614904, 0.73344696,
       0.74081822, 0.74826357, 0.75578374, 0.76337949, 0.77105

In [24]:
sigmoid = 1/(1 + np.exp(-x))
sigmoid

array([0.26894142, 0.27091208, 0.27289178, 0.2748805 , 0.27687819,
       0.27888482, 0.28090034, 0.28292471, 0.28495789, 0.28699984,
       0.2890505 , 0.29110983, 0.29317778, 0.2952543 , 0.29733935,
       0.29943286, 0.30153478, 0.30364507, 0.30576366, 0.3078905 ,
       0.31002552, 0.31216867, 0.31431989, 0.31647911, 0.31864627,
       0.3208213 , 0.32300414, 0.32519473, 0.32739298, 0.32959884,
       0.33181223, 0.33403307, 0.3362613 , 0.33849684, 0.34073961,
       0.34298954, 0.34524654, 0.34751054, 0.34978145, 0.3520592 ,
       0.35434369, 0.35663485, 0.35893259, 0.36123682, 0.36354746,
       0.36586441, 0.36818758, 0.37051689, 0.37285223, 0.37519353,
       0.37754067, 0.37989357, 0.38225213, 0.38461624, 0.38698582,
       0.38936077, 0.39174097, 0.39412633, 0.39651675, 0.39891212,
       0.40131234, 0.4037173 , 0.4061269 , 0.40854102, 0.41095957,
       0.41338242, 0.41580948, 0.41824062, 0.42067575, 0.42311474,
       0.42555748, 0.42800387, 0.43045378, 0.4329071 , 0.43536

- concatenate

In [26]:
a = np.arange(1, 5).reshape(2, 2)
b = np.array([[5, 6]])
np.vstack((a, b))   # vstack() | vertical stack : 행렬을 세로로 결합

array([[1, 2],
       [3, 4],
       [5, 6]])

In [27]:
np.concatenate((a, b), axis = 0)    # concatenate(a, b), axix = 0 or 1) | 행렬 결합

array([[1, 2],
       [3, 4],
       [5, 6]])

In [28]:
b.T # T | 전치행렬 transpose

array([[5],
       [6]])

In [29]:
np.concatenate((a, b.T), axis = 1)

array([[1, 2, 5],
       [3, 4, 6]])

In [30]:
np.hstack((a, b.T)) # hstack() | horizontal stack : 행렬을 가로로 결합

array([[1, 2, 5],
       [3, 4, 6]])

### 7. Array operation

In [34]:
a + a

array([[2, 4],
       [6, 8]])

In [31]:
a - a

array([[0, 0],
       [0, 0]])

In [32]:
a * a

array([[ 1,  4],
       [ 9, 16]])

In [33]:
a / a

array([[1., 1.],
       [1., 1.]])

- Dot product

In [36]:
a = np.arange(1, 7).reshape(2, 3)
a.dot(a.T)  # dot() | 행렬끼리 곱하기

array([[14, 32],
       [32, 77]])

In [37]:
np.dot(a, a.T)

array([[14, 32],
       [32, 77]])

- Broadcasting

In [40]:
a + 10

array([[11, 12, 13],
       [14, 15, 16]])

In [41]:
a * 2

array([[ 2,  4,  6],
       [ 8, 10, 12]])

In [42]:
a ** 2

array([[ 1,  4,  9],
       [16, 25, 36]])

In [43]:
a + np.array([10, 20, 30])

array([[11, 22, 33],
       [14, 25, 36]])

### 8. Comparison

In [44]:
a = np.arange(5)
np.all(a), np.any(a)    # all() | 모든게 True인가(0이 없는가) / any() | 하나라더 True인가

(False, True)

In [45]:
a > 2

array([False, False, False,  True,  True])

In [46]:
a[a>2]

array([3, 4])

- Logical operation

In [57]:
a = np.arange(5)
b = np.logical_and(a > 0, a < 3)    # logical_and() | 조건을 만족하면 True, 아니면 False
a, b

(array([0, 1, 2, 3, 4]), array([False,  True,  True, False, False]))

In [59]:
c = np.logical_not(b)   # logical_not() | True - False 반대로
c

array([ True, False, False,  True,  True])

In [60]:
np.logical_or(b, c) # logical_or() | 조건 중 하나라도 True이면 True

array([ True,  True,  True,  True,  True])

In [61]:
np.logical_xor(b, c)    #logical_xor() | 

array([ True,  True,  True,  True,  True])

In [62]:
a = np.arange(5)
np.where(a > 2) # where() | 조건을 만족하는 요소의 인덱스

(array([3, 4]),)

- NaN(Not a Number), Inf(Infinite)

In [54]:
a = np.array([1, np.NaN, np.Inf], dtype = float)    # NaN | 정해지지 않은 값 / Inf | 무한대, 가장 큰 수
a

array([ 1., nan, inf])

In [55]:
np.isnan(a), np.isfinite(a) # isfinite() | 유한수 여부 검사

(array([False,  True, False]), array([ True, False, False]))

- argmax, argmin

In [66]:
a = np.arange(6)
np.argmax(a), np.argmin(a)  # argmax(), argmin() | 가장 큰, 작은 값의 인덱스

(5, 0)

In [67]:
b = np.arange(12).reshape(3, 4)
b

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [68]:
np.argmax(b, axis = 0)

array([2, 2, 2, 2])

In [71]:
np.argmin(b, axis = 1)

array([0, 0, 0])

### 9. Boolean and Fancy Index

In [75]:
np.random.seed(2021)
a = np.random.uniform(1, 10, 4)
a

array([6.45380451, 7.60032425, 2.25052441, 3.81405775])

In [76]:
a > 3

array([ True,  True, False,  True])

In [77]:
# Boolean indexing
a[a > 3]    # a > 3을 만족하는 요소만 가져오기

array([6.45380451, 7.60032425, 3.81405775])

In [78]:
# Fancy indexing
a = np.arange(1, 5) * 2
a

array([2, 4, 6, 8])

In [80]:
b = np.array([0, 1, 3, 2, 1, 3])
a[b]    # b값을 인덱스로 가져옴

array([2, 4, 8, 6, 4, 8])

In [82]:
a = np.arange(8).reshape(2, 4)
a

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [81]:
b = np.array([0, 0, 1, 1, 0])
c = np.array([0, 2, 3, 1, 2])
a[b, c] # n[row index, col index]

array([0, 2, 7, 5, 2])