Numpy Cheetsheets
========

기초
----

In [110]:
import numpy as np

In [2]:
np.array([1, 2, 3, 4, 5])

array([1, 2, 3, 4, 5])

In [3]:
np.array([1, 2, 3, 4, 5], dtype = 'int64')

array([1, 2, 3, 4, 5], dtype=int64)

In [4]:
np.array([range(i, i + 3) for i in [1, 2, 3]])

array([[1, 2, 3],
       [2, 3, 4],
       [3, 4, 5]])

In [6]:
np.array(range(1, 3))

array([1, 2])

배열 만들기
------

In [7]:
np.zeros(10, dtype = 'int')

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [8]:
np.ones((3, 5), dtype = float)

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [9]:
np.full((3, 5), 3.14)

array([[3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14]])

In [10]:
np.arange(0, 20, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [11]:
np.linspace(0, 1, 5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [13]:
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [20]:
np.empty(3)

array([1., 1., 1.])

랜덤한 배열 생성하기
-----------

In [12]:
np.random.random((3, 3)) # 0과 1 사이의 행렬 생성

array([[0.30024721, 0.91730164, 0.74988515],
       [0.01548515, 0.7647472 , 0.81156161],
       [0.50427389, 0.22649988, 0.12775086]])

In [19]:
np.random.normal(0, 1, (3, 3)) # 정규 분포 난수 생성

array([[-1.49779413,  0.41115843,  0.09546645],
       [-0.52042876, -0.5644294 , -1.36739099],
       [ 0.66472334, -1.03552387,  0.52178655]])

In [15]:
np.random.randint(0, 10, (3, 3)) # 지정한 범위 내의 랜덤한 정수를 행렬로 생성

array([[9, 0, 0],
       [9, 4, 9],
       [2, 9, 4]])

In [21]:
np.random.seed(0)

배열 정보 파악하기
--------

In [23]:
x1 = np.random.randint(10, size = 6)
x2 = np.random.randint(10, size = (3, 4))
x3 = np.random.randint(10, size = (3, 4, 5))

In [24]:
x3.ndim # 차원 정보

3

In [26]:
x3.shape # 행렬의 모양

(3, 4, 5)

In [27]:
x3.size # 행렬의 크기

60

In [28]:
x2.dtype # 행렬의 데이터 타입

dtype('int32')

In [30]:
x1.itemsize # 행렬 개별 요소의 크기

4

In [31]:
x1.nbytes # 행렬 전체 크기

24

배열 인덱싱, 슬라이싱
---------

In [35]:
x4 = np.random.randint(10, size = (3, 4))
x4

array([[1, 2, 4, 2],
       [0, 3, 2, 0],
       [7, 5, 9, 0]])

In [36]:
x4[:2, :3]

array([[1, 2, 4],
       [0, 3, 2]])

In [34]:
x4[:, ::2]

array([[3, 5],
       [0, 5],
       [3, 5]])

In [37]:
x4_copy = x4[:2, :2].copy() # 사본 만들기
x4_copy

array([[1, 2],
       [0, 3]])

배열 모양 바꾸기
---------

In [43]:
np.arange(1, 10).reshape((3, 3))

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [44]:
x = np.array([1, 2, 3])
x[:, np.newaxis]

array([[1],
       [2],
       [3]])

In [45]:
x.reshape(3, 1)

array([[1],
       [2],
       [3]])

In [46]:
x[np.newaxis, :]

array([[1, 2, 3]])

In [47]:
x.reshape(1, 3)

array([[1, 2, 3]])

배열 합치기
------

In [48]:
x = np.arange(1, 10)
y = np.array([3, 4, 7])

In [50]:
np.concatenate([x, y])

array([1, 2, 3, 4, 5, 6, 7, 8, 9, 3, 4, 7])

In [51]:
x2 = np.random.randint(1, 10, (3, 3))
x3 = np.random.randint(1, 10, (3, 3))

In [52]:
np.concatenate([x2, x3])

array([[3, 8, 3],
       [3, 4, 4],
       [3, 4, 5],
       [2, 3, 2],
       [5, 7, 9],
       [3, 4, 1]])

In [53]:
np.concatenate([x2, x3], axis = 1)

array([[3, 8, 3, 2, 3, 2],
       [3, 4, 4, 5, 7, 9],
       [3, 4, 5, 3, 4, 1]])

In [54]:
np.vstack([x2, x3])

array([[3, 8, 3],
       [3, 4, 4],
       [3, 4, 5],
       [2, 3, 2],
       [5, 7, 9],
       [3, 4, 1]])

In [56]:
np.hstack([x2, x3])

array([[3, 8, 3, 2, 3, 2],
       [3, 4, 4, 5, 7, 9],
       [3, 4, 5, 3, 4, 1]])

배열 나누기
-------

In [57]:
x = np.arange(1, 10)

In [59]:
x1, x2, x3 = np.split(x, [3, 6])
print(x1, x2, x3)

[1 2 3] [4 5 6] [7 8 9]


In [60]:
y = np.arange(0, 16).reshape(4, 4)
upper, lower = np.vsplit(y, [2])
print(upper)
print(lower)

[[0 1 2 3]
 [4 5 6 7]]
[[ 8  9 10 11]
 [12 13 14 15]]


In [61]:
left, right = np.hsplit(y, [2])
print(left)
print(right)

[[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]]
[[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]


In [62]:
z = np.array(y).reshape(2, 2, 4)
np.dsplit(z, 2)

[array([[[ 0,  1],
         [ 4,  5]],
 
        [[ 8,  9],
         [12, 13]]]), array([[[ 2,  3],
         [ 6,  7]],
 
        [[10, 11],
         [14, 15]]])]

Ufuncs
-------

In [63]:
x = np.arange(5)
y = np.empty(5)
np.multiply(x, 10, out = y)
print(y)

[ 0. 10. 20. 30. 40.]


배열 집계
---

In [64]:
x = np.arange(1, 6)
np.add.reduce(x)

15

In [65]:
np.multiply.accumulate(x)

array([  1,   2,   6,  24, 120], dtype=int32)

In [66]:
np.multiply.outer(x, x)

array([[ 1,  2,  3,  4,  5],
       [ 2,  4,  6,  8, 10],
       [ 3,  6,  9, 12, 15],
       [ 4,  8, 12, 16, 20],
       [ 5, 10, 15, 20, 25]])

In [67]:
x = np.random.random(100)
print(np.sum(x))
print(np.max(x))

52.288757385980134
0.9988470065678665


브로드캐스팅
--------

In [68]:
a = np.arange(3)
b = np.arange(3)
a + b

array([0, 2, 4])

In [69]:
M = np.ones((3, 3))
M + a

array([[1., 2., 3.],
       [1., 2., 3.],
       [1., 2., 3.]])

In [70]:
c = a.reshape(1, 3)
d = b[:, np.newaxis]
c + d

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

비교, 마스크, 부울 로직
-------------

In [71]:
a = np.random.random((10, 3))
a

array([[0.94437239, 0.7395508 , 0.49045881],
       [0.22741463, 0.25435648, 0.05802916],
       [0.43441663, 0.31179588, 0.69634349],
       [0.37775184, 0.17960368, 0.02467873],
       [0.06724963, 0.67939277, 0.45369684],
       [0.53657921, 0.89667129, 0.99033895],
       [0.21689698, 0.6630782 , 0.26332238],
       [0.020651  , 0.75837865, 0.32001715],
       [0.38346389, 0.58831711, 0.83104846],
       [0.62898184, 0.87265066, 0.27354203]])

In [75]:
mean = a.mean(0)
mean

array([0.3837778 , 0.59437955, 0.4401476 ])

In [76]:
center = a - mean
center.mean(0)

array([2.77555756e-17, 2.22044605e-17, 5.55111512e-18])

In [83]:
a = np.random.randint(12, size = (3, 4))
a

array([[ 4,  2,  4,  6],
       [ 3, 10,  3,  7],
       [ 8,  5,  0,  8]])

In [84]:
a > 3

array([[ True, False,  True,  True],
       [False,  True, False,  True],
       [ True,  True, False,  True]])

In [78]:
np.sum(a < 6, axis = 1)

array([2, 2, 1])

In [79]:
np.any(x > 8)

False

In [80]:
np.all(x < 10)

True

In [81]:
plus = np.sum((x > 1) & (x < 4))
plus

0

In [82]:
a[a < 5]

array([1, 4, 0, 4, 0])

팬시 인덱싱
--------

In [85]:
rng = np.random.RandomState(42)
x = rng.randint(100, size = 10)
x

array([51, 92, 14, 71, 60, 20, 82, 86, 74, 74])

In [86]:
[x[3], x[7], x[2]]

[71, 86, 14]

In [88]:
ind = [3, 7, 4]
x[ind]

array([71, 86, 60])

In [89]:
ind = np.array([[3, 7],
               [4, 5]])
x[ind]

array([[71, 86],
       [60, 20]])

In [90]:
x = np.arange(12).reshape((3, 4))
x

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [92]:
row = np.array([0, 1, 2])
col = np.array([2, 1, 3])
x[row, col]

array([ 2,  5, 11])

In [93]:
x[row[:, np.newaxis], col]

array([[ 2,  1,  3],
       [ 6,  5,  7],
       [10,  9, 11]])

In [94]:
row[:, np.newaxis]

array([[0],
       [1],
       [2]])

In [95]:
x[2, [2, 0, 1]]

array([10,  8,  9])

In [97]:
x[1:, [2, 0, 1]]

array([[ 6,  4,  5],
       [10,  8,  9]])

In [98]:
mask = np.array([1, 0, 1, 0], dtype = bool)
x[row[:, np.newaxis], mask]

array([[ 0,  2],
       [ 4,  6],
       [ 8, 10]])

배열 정렬하기
----------

In [99]:
x = np.array([2, 1, 4, 3, 6])
np.sort(x)

array([1, 2, 3, 4, 6])

In [100]:
i = np.argsort(x)
i

array([1, 0, 3, 2, 4], dtype=int64)

In [115]:
rng = np.random.RandomState(42)
x = rng.randint(0, 10, (4, 6))
x

array([[6, 3, 7, 4, 6, 9],
       [2, 6, 7, 4, 3, 7],
       [7, 2, 5, 4, 1, 7],
       [5, 1, 4, 0, 9, 5]])

In [116]:
np.sort(x, axis = 0)

array([[2, 1, 4, 0, 1, 5],
       [5, 2, 5, 4, 3, 7],
       [6, 3, 7, 4, 6, 7],
       [7, 6, 7, 4, 9, 9]])

In [117]:
np.sort(x, axis = 1)

array([[3, 4, 6, 6, 7, 9],
       [2, 3, 4, 6, 7, 7],
       [1, 2, 4, 5, 7, 7],
       [0, 1, 4, 5, 5, 9]])

In [118]:
y = np.array([77, 32, 13, 12, 63, 85, 34])
np.partition(y, 4)

array([32, 12, 13, 34, 63, 77, 85])

In [119]:
np.partition(x, 2, axis = 1)

array([[3, 4, 6, 7, 6, 9],
       [2, 3, 4, 7, 6, 7],
       [1, 2, 4, 5, 7, 7],
       [0, 1, 4, 5, 9, 5]])

구조화된 배열
---------

In [120]:
name = ['Alice', 'Bob', 'Cathy', 'Doug']
age = [25, 45, 37, 19]
weight = [55.0, 85.5, 68.0, 61.5]

In [121]:
x = np.zeros(4, dtype = int)
data = np.zeros(4, dtype = {'names' : ('name', 'age', 'weight'),
                           'formats' : ('U10', 'i4', 'f8')})
print(data.dtype)

[('name', '<U10'), ('age', '<i4'), ('weight', '<f8')]


In [122]:
data['name'] = name
data['age'] = age
data['weight'] = weight
print(data)

[('Alice', 25, 55. ) ('Bob', 45, 85.5) ('Cathy', 37, 68. )
 ('Doug', 19, 61.5)]


In [123]:
data[data['age'] < 30]['name']

array(['Alice', 'Doug'], dtype='<U10')

References
----------

http://rfriend.tistory.com/284