In [1]:
import numpy as np

In [2]:
np.__version__

'2.2.2'

Python原生的List儲存不同data type(實際List中的每一個element都是一個Python object指向一個C struct，且每個元素都帶有自己的資訊，refcnt, type, size, digit等)

In [3]:
L1 = [True, 3.14, 5, "Algorithm"]
[type(i) for i in L1]

[bool, float, int, str]

# Array

In [4]:
import array

lst = list(range(10))
arr = array.array('i', lst)
arr

array('i', [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

使用`numpy` `array`

In [5]:
np.array([1, 4, 5, 3, 2], dtype='float32')

array([1., 4., 5., 3., 2.], dtype=float32)

In [6]:
np.array([range(i, i + 3) for i in [2, 4, 6]])

array([[2, 3, 4],
       [4, 5, 6],
       [6, 7, 8]])

In [7]:
# 0 array
np.zeros(10, dtype=int)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [8]:
# 全部填1，(3, 3)代表rows & columns
np.ones((3, 3), dtype=float)

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [9]:
# full則是填入指定的數值
np.full((3, 4), 12)

array([[12, 12, 12, 12],
       [12, 12, 12, 12],
       [12, 12, 12, 12]])

In [10]:
# array - range
np.arange(0, 20, 3)

array([ 0,  3,  6,  9, 12, 15, 18])

In [11]:
# linspace 按照線性平均分配元素(起點, 終點, 元素數量)
np.linspace(0, 10, 5)

array([ 0. ,  2.5,  5. ,  7.5, 10. ])

In [12]:
# random.random 是建立隨機array，數值範圍介於0~1之間
np.random.random((2, 3))

array([[0.19854197, 0.64192022, 0.96126932],
       [0.81745392, 0.76570364, 0.59370434]])

In [13]:
# random.normal -> 正態分布 array
np.random.normal(0, 1, (3, 3))

array([[-0.07891374,  0.83387145,  0.7263727 ],
       [ 0.62007767, -0.1217671 , -0.27632892],
       [ 0.34043225,  0.06297228, -0.94573691]])

In [14]:
# random.randint -> 隨機整數array
np.random.randint(0, 100, (5, 5))

array([[27,  9, 19, 19, 10],
       [30, 39, 44, 67, 47],
       [89, 44, 78,  6, 16],
       [36, 69, 24, 26, 23],
       [99, 35,  0, 20, 52]], dtype=int32)

In [15]:
# 不初始化數值
np.empty((3, 2))

array([[0.19854197, 0.64192022],
       [0.96126932, 0.81745392],
       [0.76570364, 0.59370434]])

In [16]:
np.empty((3, 2), dtype=int)

array([[4596321248728132840, 4603957122870293319],
       [4606833563820272146, 4605538189866049387],
       [4605072064778393550, 4603522832851106797]])

## NumPy Array 屬性

In [17]:
# 讓實驗(測試)數據具有可再現性 ref: https://blog.csdn.net/weixin_41571493/article/details/80549833
np.random.seed(0)

rd1 = np.random.randint(10, size=6)
rd2 = np.random.randint(10, size=(2, 3))
rd3 = np.random.randint(10, size=(3, 3, 3))

In [18]:
print(f'rd3的維度(dimensions): {rd3.ndim}')
print(f'rd3的size: {rd3.shape}')
print(f'rd3的Length(elements): {rd3.size}')
print(f'rd3的data type: {rd3.dtype}')

rd3的維度(dimensions): 3
rd3的size: (3, 3, 3)
rd3的Length(elements): 27
rd3的data type: int32


- `itemsize`: 每個element的大小
- `nbytes`: 整個array的總大小

In [19]:
print(f'rd3的item size: {rd3.itemsize}')
print(f'rd3的bytes: {rd3.nbytes}')  # 4 * 27 = 108

rd3的item size: 4
rd3的bytes: 108


## Array Indexing

In [20]:
rd1

array([5, 0, 3, 3, 7, 9], dtype=int32)

In [21]:
rd1[0]

np.int32(5)

In [22]:
rd1[3]

np.int32(3)

In [23]:
rd1[-1]

np.int32(9)

In [24]:
rd2

array([[3, 5, 2],
       [4, 7, 6]], dtype=int32)

In [25]:
rd2[0, 2]  # 對於多維度array，直接用row, column進行取值

np.int32(2)

In [26]:
rd2[0, 0] = 11  # 可以直接修改array中的數值，但必須要注意dtype
rd2

array([[11,  5,  2],
       [ 4,  7,  6]], dtype=int32)

### One-Dimensional Subarray

In [27]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [28]:
arr[:5]

array([0, 1, 2, 3, 4])

In [29]:
arr[::2]

array([0, 2, 4, 6, 8])

In [30]:
arr[1::2]

array([1, 3, 5, 7, 9])

In [31]:
arr[5::-2]  # start from index 5, step=-2

array([5, 3, 1])

### Multi-Dimensional Subarray

In [32]:
arr = np.random.randint(1, 10 ,(3, 4))

In [33]:
arr

array([[2, 1, 5, 8],
       [4, 3, 8, 3],
       [1, 1, 5, 6]], dtype=int32)

In [34]:
arr[:2, :3]  # 取(2, 3)，用row, column的結構去思考

array([[2, 1, 5],
       [4, 3, 8]], dtype=int32)

In [35]:
arr[::-1, ::-1]  # reverse all arrays

array([[6, 5, 1, 1],
       [3, 8, 3, 4],
       [8, 5, 1, 2]], dtype=int32)

In [36]:
arr[0]

array([2, 1, 5, 8], dtype=int32)

※ 以上操作皆指向同一個object，並非複本

## Create the copies of arrays

In [37]:
arr_sub_copy = arr[:2, :3].copy()
print(arr_sub_copy)

[[2 1 5]
 [4 3 8]]


In [38]:
arr_sub_copy[0, 0] = 100
print(arr_sub_copy)

[[100   1   5]
 [  4   3   8]]


In [39]:
arr

array([[2, 1, 5, 8],
       [4, 3, 8, 3],
       [1, 1, 5, 6]], dtype=int32)

調整Array shape

In [40]:
# 要注意長度更改前後的長度要符合
arr = np.arange(1, 10).reshape((3, 3))
print(arr)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [41]:
arr = np.array([1, 2, 3])

In [42]:
arr.reshape((1, 3))

array([[1, 2, 3]])

In [43]:
arr.shape

(3,)

`newaxis`: 轉變matrix shape (如: (1, 3) -> (3, 1))

In [44]:
arr[np.newaxis, :]

array([[1, 2, 3]])

In [45]:
arr.shape

(3,)

In [46]:
arr.reshape((3, 1))

array([[1],
       [2],
       [3]])

In [47]:
arr[:, np.newaxis]

array([[1],
       [2],
       [3]])

## Array Concatenation & Splitting

`concatenate`: 裡面放的是一個List[array]，可以是多個objects在裡面一起concat

In [48]:
a = np.array([1, 3, 5])
b = np.array([2, 4, 6])
np.concatenate([a, b])  # 記憶: 跟pandas.concat有點相似

array([1, 3, 5, 2, 4, 6])

In [49]:
c = [2, 3, 5]
np.concatenate([a, b, c])

array([1, 3, 5, 2, 4, 6, 2, 3, 5])

In [50]:
arr = np.array([
    [1, 2, 3],
    [4, 5, 6]
])

In [51]:
# 範例是把Self array concat
np.concatenate([arr, arr])

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [52]:
# axis=0 (default)，可理解成依照rows去concat，也可以指定依照axis=1進行
np.concatenate([arr, arr], axis=1)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

### 不同dimensional合併

- `np.vstack`: 垂直(vertical) stack
- `np.hstack`: horizontal stack

In [53]:
a = np.array([1, 2, 3])
base = np.array([
    [2, 4, 6],
    [1, 3, 5]
])
np.vstack([a, base])

array([[1, 2, 3],
       [2, 4, 6],
       [1, 3, 5]])

In [54]:
b = np.array([
    [111],
    [121]
])
np.hstack([base, b])

array([[  2,   4,   6, 111],
       [  1,   3,   5, 121]])

### Splitting

In [55]:
arr = [2, 4, 6, 8, 10, 12, 15]
a, b, c = np.split(arr, [3, 5])  # 第二個參數可以指定切割位置(index)
print(a, b, c)

[2 4 6] [ 8 10] [12 15]


In [56]:
arr = np.arange(9).reshape((3, 3))
arr

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [57]:
upper, lower = np.vsplit(arr, [1])
print(upper)
print(lower)

[[0 1 2]]
[[3 4 5]
 [6 7 8]]


※ 以3 * 3分割，因為參數給定1，所以會從index=1的位置進行切割

In [58]:
left, right = np.hsplit(arr, [1])
print(left)
print(right)

[[0]
 [3]
 [6]]
[[1 2]
 [4 5]
 [7 8]]
