In [2]:
#
# NumPyのndarray(多次元配列オブジェクト N-dimensional array)
#
import numpy as np

In [2]:
data = np.random.randn(2,3)
data

array([[-0.52478642,  0.42851989,  0.5590878 ],
       [-0.28563274, -0.49560206,  0.83596911]])

In [3]:
# ブロードキャスト
# 全ての要素を10倍にする
data * 10

array([[-5.24786417,  4.28519895,  5.59087797],
       [-2.85632738, -4.95602064,  8.35969115]])

In [4]:
# ndarray生成
data1 = [6, 7.5, 8, 0, 1]

In [5]:
arr1 = np.array(data1)
arr1

array([6. , 7.5, 8. , 0. , 1. ])

In [6]:
# ２次元ndarray
data2 = [[1,2,3,4],[5,6,7,8]]
arr2 = np.array(data2)
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [7]:
# 行列の次元
arr2.shape

(2, 4)

In [8]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [9]:
np.empty((2,3,2))

array([[[0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.]]])

In [10]:
# ndarrayの型指定
arr1 = np.array([1,2,3], dtype=np.float64)

In [11]:
arr1.dtype

dtype('float64')

In [12]:
arr2 = np.array([1,2,3], dtype=np.int32)

In [13]:
arr2.dtype

dtype('int32')

In [14]:
# 型変換
arr = np.array([3.7, -1.2, -2.6, 0.5, 12.7, -4.4])
arr.astype(int)

array([ 3, -1, -2,  0, 12, -4])

In [22]:
# ndarrayの計算
arr = np.array([[1,2,3],[4,5,6]])
arr

array([[1, 2, 3],
       [4, 5, 6]])

In [16]:
#　それぞれの要素同士を計算
arr * arr

array([[ 1,  4,  9],
       [16, 25, 36]])

In [17]:
# インデックス指定
arr[1]

array([4, 5, 6])

In [18]:
arr[0,2]

3

In [23]:
# 行の入れ替え
arr[[1,0]]

array([[4, 5, 6],
       [1, 2, 3]])

In [51]:
# ndarrayの更新
# インデックス指定で得た部分配列は参照なので、old_valは複製する
old_val = arr[0].copy()
arr[0] = 10
arr

array([[10, 10, 10],
       [ 4,  5,  6]])

In [54]:
arr[0] = old_val
arr

array([[1, 2, 3],
       [4, 5, 6]])

In [19]:
#　スライスによるインデックス参照
arr = np.array([1,2,3,4,5,6,7,8,9])
arr[1:6]

array([2, 3, 4, 5, 6])

In [8]:
arr2 = np.array([[1,2,3],
                 [4,5,6],
                 [7,8,9]])
arr2[:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [61]:
arr2[:2, 1:]

array([[2, 3],
       [5, 6]])

In [65]:
arr2[:2, 1:] = 0
arr2

array([[1, 0, 0],
       [4, 0, 0],
       [7, 8, 9]])

In [4]:
# ブールインデックス
# namesとdataに関連性がある場合、下の例はdataの各行がnamesの項目と対応している。
names = np.array(['鈴木','田中','山田','鈴木','山田','田中','田中'])
data = np.random.randn(7,4)
names

array(['鈴木', '田中', '山田', '鈴木', '山田', '田中', '田中'], dtype='<U2')

In [6]:
# 全てのデータ
data

array([[-0.09741311,  0.43062916, -0.50665675,  0.08395739],
       [ 1.8905471 , -0.29938084,  0.34516355,  0.082637  ],
       [-0.0977479 ,  0.01968081, -1.50069928, -0.75537655],
       [ 1.27215614,  0.05561886,  1.19052697, -0.23144401],
       [ 0.83228853, -1.83480158,  0.20748924,  0.06462946],
       [-0.85742146,  0.22699821, -1.29228892,  1.37537326],
       [-0.31525589,  0.33690937,  0.18040733,  0.29391578]])

In [8]:
# names配列で「山田」のブールインデックスを取得
names == '山田'

array([False, False,  True, False,  True, False, False])

In [11]:
# 「山田」に関連するdataを取得
data[names == '山田']

array([[-0.0977479 ,  0.01968081, -1.50069928, -0.75537655],
       [ 0.83228853, -1.83480158,  0.20748924,  0.06462946]])

In [13]:
# 「山田」ではないdataを取得
data[names != '山田']

array([[-0.09741311,  0.43062916, -0.50665675,  0.08395739],
       [ 1.8905471 , -0.29938084,  0.34516355,  0.082637  ],
       [ 1.27215614,  0.05561886,  1.19052697, -0.23144401],
       [-0.85742146,  0.22699821, -1.29228892,  1.37537326],
       [-0.31525589,  0.33690937,  0.18040733,  0.29391578]])

In [16]:
# 取得したいデータの条件を設定
mask = (names == '鈴木') | (names == '山田')
mask

array([ True, False,  True,  True,  True, False, False])

In [18]:
# maskの条件でデータを抽出
data[mask]

array([[-0.09741311,  0.43062916, -0.50665675,  0.08395739],
       [-0.0977479 ,  0.01968081, -1.50069928, -0.75537655],
       [ 1.27215614,  0.05561886,  1.19052697, -0.23144401],
       [ 0.83228853, -1.83480158,  0.20748924,  0.06462946]])

In [20]:
# 条件に合うデータを変更
data[names != '田中'] = 0
data

array([[ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 1.8905471 , -0.29938084,  0.34516355,  0.082637  ],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [-0.85742146,  0.22699821, -1.29228892,  1.37537326],
       [-0.31525589,  0.33690937,  0.18040733,  0.29391578]])

In [30]:
# ファンシーインデックス
#  インデックス参照で整数配列を使う
arr = np.empty((8,4))
for i in range(8):
    arr[i] = i
arr

array([[0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.]])

In [28]:
# インデックスの配列でデータを抽出
arr[[4,3,0,6]]

array([[4., 4., 4., 4.],
       [3., 3., 3., 3.],
       [0., 0., 0., 0.],
       [6., 6., 6., 6.]])

In [33]:
arr = np.arange(32).reshape((8,4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [36]:
# １つ目のインデックスは行、２つ目のインデックスは列を指定
# 想定と違う動き
arr[[1,5,7,2],[0,3,1,2]]

array([ 4, 23, 29, 10])

In [38]:
# 行列入れ替え
arr[[1,5,7,2]][:,[0,3,1,2]]

array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])

In [40]:
# 転置行列
arr = np.arange(15).reshape((3,5))
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [42]:
arr.T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [48]:
# 高次元の軸の入れ替え
arr = np.arange(16).reshape((2,2,4))
arr

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])

In [50]:
arr.transpose((1,0,2))

array([[[ 0,  1,  2,  3],
        [ 8,  9, 10, 11]],

       [[ 4,  5,  6,  7],
        [12, 13, 14, 15]]])

In [52]:
# 軸の入れ替え(2,2,4) -> (2,4,2)
arr.swapaxes(1,2)

array([[[ 0,  4],
        [ 1,  5],
        [ 2,  6],
        [ 3,  7]],

       [[ 8, 12],
        [ 9, 13],
        [10, 14],
        [11, 15]]])

In [55]:
# ユニバーサル関数
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [57]:
# 平方根関数
np.sqrt(arr)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ])

In [59]:
# exp関数
np.exp(arr)

array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03])

In [61]:
x = np.random.randn(8)
y = np.random.randn(8)

In [62]:
# 最大値
x

array([ 1.20008315,  0.93341213, -0.27909559, -0.66108248, -2.07170391,
       -1.00566875, -1.48028726, -2.37671008])

In [63]:
y

array([-1.22854792, -0.06474989,  1.88581843, -1.09382932,  0.53514983,
        0.80231562, -1.03166834, -0.95953094])

In [65]:
np.maximum(x,y)

array([ 1.20008315,  0.93341213,  1.88581843, -0.66108248,  0.53514983,
        0.80231562, -1.03166834, -0.95953094])

In [69]:
# 除算と余り
remainder, whole_part = np.modf(x)
remainder

array([ 0.20008315,  0.93341213, -0.27909559, -0.66108248, -0.07170391,
       -0.00566875, -0.48028726, -0.37671008])

In [71]:
whole_part

array([ 1.,  0., -0., -0., -2., -1., -1., -2.])

In [73]:
# ３項演算子　x if condtion else y
xarr = np.array([1.1,1.2,1.3,1.4,1.5])
yarr = np.array([2.1,2.2,2.3,2.4,2.5])
cond = np.array([True,False,True,True,False])

In [78]:
# zip関数を使った内包表現
result = [(x if c else y)
           for x,y,c in zip(xarr, yarr, cond)]
result

[1.1, 2.2, 1.3, 1.4, 2.5]

In [80]:
# ３項演算子の別表記
result = np.where(cond, xarr, yarr)
result

array([1.1, 2.2, 1.3, 1.4, 2.5])

In [18]:
#数学関数
arr = np.random.randn(5,4)
arr

array([[ 0.47854705, -0.08085851,  0.0499182 ,  0.80455194],
       [ 1.0030332 ,  1.36255142, -0.2005299 ,  0.19417205],
       [-1.27394356, -0.61055785, -0.25429614,  1.84989717],
       [ 0.43791186, -1.5841222 , -0.65239978,  0.98525485],
       [-1.03958689, -0.0355612 , -0.39702248,  1.26575422]])

In [10]:
# 中央値
arr.mean()

-0.051883417309332346

In [11]:
# 合計
arr.sum()

-1.037668346186647

In [12]:
# 軸を指定した中央値の出力
arr.mean(axis=1)

array([ 0.50315222, -0.3243937 ,  0.16499639, -0.44276481, -0.16040718])

In [13]:
# 軸を指定した合計
arr.sum(axis=0)

array([-0.4347143 ,  0.2878148 , -2.08331838,  1.19254954])

In [19]:
# 軸を指定してソート
arr.sort(axis=1)
arr

array([[-0.08085851,  0.0499182 ,  0.47854705,  0.80455194],
       [-0.2005299 ,  0.19417205,  1.0030332 ,  1.36255142],
       [-1.27394356, -0.61055785, -0.25429614,  1.84989717],
       [-1.5841222 , -0.65239978,  0.43791186,  0.98525485],
       [-1.03958689, -0.39702248, -0.0355612 ,  1.26575422]])

In [3]:
# 行列の内積
x = np.array([[1,2,3],[4,5,6]])
y = np.array([[6,23],[-1,7],[8,9]])
x

array([[1, 2, 3],
       [4, 5, 6]])

In [6]:
y

array([[ 6, 23],
       [-1,  7],
       [ 8,  9]])

In [8]:
x.dot(y)

array([[ 28,  64],
       [ 67, 181]])

In [5]:
# Python3.5以上
x@y

array([[ 28,  64],
       [ 67, 181]])