# Numpy Tutorial

In [1]:
import numpy as np

In [2]:
an_array = np.array([3, 33, 333])

In [3]:
print('type: ',type(an_array))
print('shape: ', an_array.shape)

type:  <class 'numpy.ndarray'>
shape:  (3,)


In [4]:
print(an_array[0])
print(an_array[1])
print(an_array[2])

3
33
333


In [5]:
an_array[0] = 888
print(an_array)

[888  33 333]


In [6]:
another = np.array([[11,12,13],[21,22,23]])
print(another)

[[11 12 13]
 [21 22 23]]


In [7]:
print('shape: ',another.shape)
print(another[0,0],another[0,1],another[1,0])

shape:  (2, 3)
11 12 21


In [8]:
ex1 = np.zeros((2,2))
print(ex1)

[[0. 0.]
 [0. 0.]]


In [9]:
ex2 = np.full((2,2), 9.0)
print(ex2)

[[9. 9.]
 [9. 9.]]


In [10]:
ex3 = np.eye(2,2)
print(ex3)

[[1. 0.]
 [0. 1.]]


In [11]:
ex4 = np.ones((1,2))
print(ex4)

[[1. 1.]]


In [12]:
print(ex4.shape)
print()
print(ex4[0,1])

(1, 2)

1.0


In [13]:
ex5 = np.random.random((2,2))  # np.random.random(): 0 ~ 1 사이
print(ex5)

[[0.96113048 0.07710612]
 [0.31649589 0.19680481]]


In [15]:
an_array = np.array([[11,12,13,14],[21,22,23,24],[31,32,33,34]])
print(an_array)

[[11 12 13 14]
 [21 22 23 24]
 [31 32 33 34]]


In [16]:
a_slice = an_array[:2, 1:3]
print(a_slice)

[[12 13]
 [22 23]]


In [17]:
print(an_array[0, 1])

12


In [18]:
# 슬라이싱한 원소를 변경해도 원래 배열에 영향을 준다
a_slice[0, 0] = 1000
print(an_array)

[[  11 1000   13   14]
 [  21   22   23   24]
 [  31   32   33   34]]


In [19]:
an_array = np.array([[11,12,13,14],[21,22,23,24],[31,32,33,34]])
print(an_array)

[[11 12 13 14]
 [21 22 23 24]
 [31 32 33 34]]


In [20]:
row_rank1 = an_array[1, :]  # 하나의 행을 선택
print(row_rank1)
print(row_rank1.shape)  # 1차원이다

[21 22 23 24]
(4,)


In [21]:
row_rank2 = an_array[1:2, :]  # 슬라이싱
print(row_rank2)
print(row_rank2.shape)  # 2차원이다

[[21 22 23 24]]
(1, 4)


In [22]:
col_rank1 = an_array[:, 1]
col_rank2 = an_array[:, 1:2]

print(col_rank1, col_rank1.shape)  # 1차원
print()
print(col_rank2, col_rank2.shape)  # 2차원

[12 22 32] (3,)

[[12]
 [22]
 [32]] (3, 1)


In [34]:
an_array = np.array([[11,12,13], [21,22,23], [31,32,33], [41,42,43]])
print(an_array)

[[11 12 13]
 [21 22 23]
 [31 32 33]
 [41 42 43]]


In [35]:
col_indices = np.array([0,1,2,0])
print(col_indices)

[0 1 2 0]


In [36]:
row_indices = np.arange(4)
print(row_indices)

[0 1 2 3]


In [37]:
for row, col in zip(row_indices, col_indices):
    print(row, ',' ,col)

0 , 0
1 , 1
2 , 2
3 , 0


In [38]:
print(an_array[row_indices, col_indices])  # 각 리스트를 짝지은 좌표에 속하는 원소

[11 22 33 41]


In [39]:
an_array[row_indices, col_indices] += 100000
print(an_array)

[[100011     12     13]
 [    21 100022     23]
 [    31     32 100033]
 [100041     42     43]]


In [40]:
an_array = np.array([[11,12],[21,22],[31,32]])
print(an_array)

[[11 12]
 [21 22]
 [31 32]]


In [41]:
filter = (an_array > 15)
filter

array([[False, False],
       [ True,  True],
       [ True,  True]])

In [42]:
print(an_array[filter])  # 필터링한 원소가 한 줄로 출력

[21 22 31 32]


In [43]:
an_array[(an_array % 2 == 0)]  # 짝수인 원소

array([12, 22, 32])

In [44]:
an_array[an_array % 2 == 0] += 100
print(an_array)

[[ 11 112]
 [ 21 122]
 [ 31 132]]


In [45]:
ex1 = np.array([11,12])
print(ex1.dtype)  # 데이터타입

int32


In [46]:
ex2 = np.array([11.0, 12.0])
print(ex2.dtype)

float64


In [47]:
ex3 = np.array([11,21], dtype=np.int64)  # dtype으로 데이터타입 직접 지정
print(ex3.dtype)

int64


In [48]:
ex4 = np.array([11.1,12.7], dtype=np.int64)  # float 소수점 부분이 사라짐
print(ex4)

[11 12]


In [49]:
print(ex4.dtype)

int64


In [50]:
ex5 = np.array([11,21], dtype=np.float64)
print(ex5.dtype)
print()
print(ex5)

float64

[11. 21.]


### 연산

In [51]:
x = np.array([[111,112],[121,122]], dtype=np.int)
y = np.array([[211.1,212.1],[221.1,222.1]], dtype=np.float64)

print(x)
print()
print(y)

[[111 112]
 [121 122]]

[[211.1 212.1]
 [221.1 222.1]]


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  x = np.array([[111,112],[121,122]], dtype=np.int)


In [52]:
print(x + y)
print()
print(np.add(x, y))

[[322.1 324.1]
 [342.1 344.1]]

[[322.1 324.1]
 [342.1 344.1]]


In [53]:
print(x - y)
print()
print(np.subtract(x, y))

[[-100.1 -100.1]
 [-100.1 -100.1]]

[[-100.1 -100.1]
 [-100.1 -100.1]]


In [54]:
print(x * y)
print()
print(np.multiply(x, y))

[[23432.1 23755.2]
 [26753.1 27096.2]]

[[23432.1 23755.2]
 [26753.1 27096.2]]


In [55]:
print(x / y)
print()
print(np.divide(x, y))

[[0.52581715 0.52805281]
 [0.54726368 0.54930212]]

[[0.52581715 0.52805281]
 [0.54726368 0.54930212]]


In [56]:
print(np.sqrt(x))

[[10.53565375 10.58300524]
 [11.         11.04536102]]


In [57]:
print(np.exp(x))  # e ** x

[[1.60948707e+48 4.37503945e+48]
 [3.54513118e+52 9.63666567e+52]]


### 기본 통계

In [58]:
arr = 10 * np.random.randn(2, 5)
print(arr)

[[ 12.08077047   2.61606528 -16.08286959 -17.85395699   0.62527835]
 [ -0.99077657  -4.64610109   7.57579688   2.00459269   1.05125884]]


In [59]:
print(arr.mean())  # 평균

-1.3619941736027703


In [60]:
print(arr.mean(axis=1))  # 열 기준 평균

[-3.7229425   0.99895415]


In [61]:
print(arr.mean(axis=0))  # 행 기준 평균

[ 5.54499695 -1.01501791 -4.25353635 -7.92468215  0.83826859]


In [62]:
print(arr.sum())  # 합계

-13.619941736027704


In [63]:
print(np.median(arr, axis=1))  # 중앙값

[0.62527835 1.05125884]


### Sorting

In [1]:
import numpy as np

unsorted = np.random.randn(10)
print(unsorted)

[ 1.75441269 -0.18076018  0.23268711  0.04005952 -1.80433856  0.94580831
  0.76884918 -0.81759336  0.11215169 -1.32808002]


In [2]:
sorted_ = np.array(unsorted)
sorted_.sort()
print(sorted_)
print()
print(unsorted)

[-1.80433856 -1.32808002 -0.81759336 -0.18076018  0.04005952  0.11215169
  0.23268711  0.76884918  0.94580831  1.75441269]

[ 1.75441269 -0.18076018  0.23268711  0.04005952 -1.80433856  0.94580831
  0.76884918 -0.81759336  0.11215169 -1.32808002]


In [3]:
unsorted.sort()
print(unsorted)

[-1.80433856 -1.32808002 -0.81759336 -0.18076018  0.04005952  0.11215169
  0.23268711  0.76884918  0.94580831  1.75441269]


### Unique element

In [4]:
array = np.array([1,2,1,3,2,1,3,2])
print(np.unique(array))

[1 2 3]


In [5]:
s1 = np.array(['desk','chair','bulb'])
s2 = np.array(['lamb','bulb','chair'])
print(s1, s2)

['desk' 'chair' 'bulb'] ['lamb' 'bulb' 'chair']


In [6]:
print(np.intersect1d(s1, s2))  # 교집합

['bulb' 'chair']


In [7]:
print(np.union1d(s1, s2))  # 합집합

['bulb' 'chair' 'desk' 'lamb']


In [8]:
print(np.setdiff1d(s1, s2))  # 차집합 s1 - s2

['desk']


In [9]:
print(np.in1d(s1, s2))  # 교집합 여부 - bool

[False  True  True]


### Broadcasting

In [10]:
start = np.zeros((4,3))
print(start)

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


In [11]:
add_rows = np.array([1,0,2])
print(add_rows)

[1 0 2]


In [12]:
y = start + add_rows  # 자동으로 브로드캐스팅 됨
print(y)

[[1. 0. 2.]
 [1. 0. 2.]
 [1. 0. 2.]
 [1. 0. 2.]]


In [13]:
add_cols = np.array([[0,1,2,3]])
add_cols = add_cols.T

print(add_cols)

[[0]
 [1]
 [2]
 [3]]


In [14]:
y = start + add_cols
print(y)

[[0. 0. 0.]
 [1. 1. 1.]
 [2. 2. 2.]
 [3. 3. 3.]]


In [15]:
add_scalar = np.array([1])
print(start + add_scalar)

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]


In [16]:
arrA = np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12]])
print(arrA)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


In [17]:
arrB = [0,1,0,2]
print(arrB)

[0, 1, 0, 2]


In [18]:
print(arrA + arrB)

[[ 1  3  3  6]
 [ 5  7  7 10]
 [ 9 11 11 14]]


### Speed Test: ndarrays vs lists

In [19]:
from numpy import arange
from timeit import Timer

size = 1000000
timeits = 1000

In [20]:
nd_array = arange(size)
print(type(nd_array))

<class 'numpy.ndarray'>


In [21]:
timer_numpy = Timer('nd_array.sum()', 'from __main__ import nd_array')
print('Time taken by numpy ndarray: %f seconds' % 
      (timer_numpy.timeit(timeits)/timeits))

Time taken by numpy ndarray: 0.001311 seconds


In [22]:
a_list = list(range(size))
print(type(a_list))

<class 'list'>


In [23]:
timer_list = Timer ('sum(a_list)', 'from __main__ import a_list')
print('Time taken by list: %f seconds' %
      (timer_list.timeit(timeits)/timeits))

Time taken by list: 0.049567 seconds


#### ndarrays 압승!

### Read or Write to Disk

In [24]:
x = np.array([23.23, 24.24])

In [25]:
np.save('an_array', x)

In [26]:
np.load('an_array.npy')

array([23.23, 24.24])

In [27]:
np.savetxt('array.txt', X=x, delimiter=',')

In [28]:
!cat array.txt

'cat'은(는) 내부 또는 외부 명령, 실행할 수 있는 프로그램, 또는
배치 파일이 아닙니다.


In [29]:
np.loadtxt('array.txt', delimiter=',')

array([23.23, 24.24])

In [30]:
x2d = np.array([[1,1],[1,1]])
y2d = np.array([[2,2],[2,2]])

print(x2d.dot(y2d))
print()
print(np.dot(x2d, y2d))

[[4 4]
 [4 4]]

[[4 4]
 [4 4]]


In [31]:
a1d = np.array([9,9])
b1d = np.array([10,10])

print(a1d.dot(b1d))
print()
print(np.dot(a1d, b1d))

180

180


In [32]:
print(x2d.dot(a1d))
print()
print(np.dot(x2d, a1d))

[18 18]

[18 18]


In [33]:
x = np.random.randn(8)
x

array([-4.58545785e-01, -1.71218624e-01,  1.03700026e+00, -5.08853787e-01,
       -1.05439663e+00, -1.28558340e+00, -2.32813898e-03,  2.52141345e+00])

In [34]:
y = np.random.randn(8)
y

array([ 2.36096974, -0.27560871, -0.2163989 , -0.95573004, -1.13858348,
        0.18095776,  0.85059438, -0.6896835 ])

In [35]:
np.maximum(x, y)

array([ 2.36096974, -0.17121862,  1.03700026, -0.50885379, -1.05439663,
        0.18095776,  0.85059438,  2.52141345])

In [36]:
arr = np.arange(20)
print(arr)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]


In [39]:
arr.reshape(4,5)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

### Transpose

In [40]:
ex1 = np.array([[11,12],[21,22]])
ex1.T

array([[11, 21],
       [12, 22]])

### Indexing using where()

In [42]:
x_1 = np.array([1,2,3,4,5])
y_1 = np.array([11,22,33,44,55])

filter = np.array([True, False, True, False, True])

In [44]:
output = np.where(filter, x_1, y_1)  # True이면 x_1, False이면 y_1
print(output)

[ 1 22  3 44  5]


In [45]:
mat = np.random.rand(5,5)
mat

array([[1.56227909e-01, 8.61490133e-01, 9.07347360e-01, 3.86482483e-01,
        9.19638111e-04],
       [3.92265412e-01, 9.63211743e-01, 1.88034137e-01, 9.70427752e-01,
        1.29716613e-01],
       [9.97209701e-01, 3.84240549e-01, 1.11021276e-01, 4.38714476e-01,
        1.99721520e-01],
       [1.49838517e-02, 9.04020328e-01, 5.23618662e-01, 9.70876837e-01,
        8.42420853e-01],
       [3.31605998e-01, 6.60315090e-01, 5.28995195e-01, 8.74074885e-01,
        2.37328616e-01]])

In [46]:
np.where(mat > 0.5, 1000, -1)

array([[  -1, 1000, 1000,   -1,   -1],
       [  -1, 1000,   -1, 1000,   -1],
       [1000,   -1,   -1,   -1,   -1],
       [  -1, 1000, 1000, 1000, 1000],
       [  -1, 1000, 1000, 1000,   -1]])

In [47]:
arr_bools = np.array([True, False, True, True, False])

In [49]:
arr_bools.any()

True

In [51]:
arr_bools.all()

False

### Random Number Generation

In [52]:
Y = np.random.normal(size = (1, 5))[0]
print(Y)

[ 0.13751118 -0.36051848  0.56412635  0.71688954  0.6113688 ]


In [54]:
Z = np.random.randint(low=2, high=50, size=4)
print(Z)

[13 10 11 26]


In [55]:
np.random.permutation(Z)  # 순서 섞기

array([13, 11, 26, 10])

In [56]:
np.random.uniform(size=4)  # 0 ~ 1

array([0.31310356, 0.88988607, 0.16953379, 0.04956809])

In [57]:
np.random.normal(size=4)

array([ 1.43261596, -1.38905639, -0.22990898,  1.83483447])

### Merging data sets

In [58]:
K = np.random.randint(low=2, high=50, size=(2,2))
print(K)
print()
M = np.random.randint(low=2, high=50, size=(2,2))
print(M)

[[43 24]
 [ 3 16]]

[[ 4 39]
 [36 26]]


In [59]:
np.vstack((K, M))

array([[43, 24],
       [ 3, 16],
       [ 4, 39],
       [36, 26]])

In [60]:
np.hstack((K, M))

array([[43, 24,  4, 39],
       [ 3, 16, 36, 26]])

In [61]:
np.concatenate([K,M], axis=0)

array([[43, 24],
       [ 3, 16],
       [ 4, 39],
       [36, 26]])

In [62]:
np.concatenate([K,M.T], axis=1)

array([[43, 24,  4, 36],
       [ 3, 16, 39, 26]])