# numpy

* 数値計算や配列計算をサポートする拡張モジュール

## ndarray(numpy配列)

* asarrayメソッドで取得する

In [6]:
import numpy as np
num_list = [1,2,3,4,5]
print(num_list)

nd_ary = np.asarray(num_list)
print(type(nd_ary))

print(nd_ary)

for i in nd_ary:
    print(i)

[1, 2, 3, 4, 5]
<class 'numpy.ndarray'>
[1 2 3 4 5]
1
2
3
4
5


In [9]:
# ループもできる
import numpy as np
num_list = [[1,2,3], [4,5,6], [7,8,9]]
nd_ary = np.asarray(num_list)

print(nd_ary)

for i in nd_ary:
    print(i)

[[1 2 3]
 [4 5 6]
 [7 8 9]]
[1 2 3]
[4 5 6]
[7 8 9]


## ndarrayをリストに変換

* tolistメソッドを利用
    * リスト ↔ numpy配列 の変換はよく使う

In [12]:
import numpy as np
num_list = [[1,2,3], [4,5,6], [7,8,9]]
nd_ary = np.asarray(num_list)

# リストに変換
to_list = nd_ary.tolist()
print(nd_ary)
print(to_list)

[[1 2 3]
 [4 5 6]
 [7 8 9]]
[[1, 2, 3], [4, 5, 6], [7, 8, 9]]


## 型を指定して配列を取得

* uint(符号なし整数)やcomplexなども指定できる

In [25]:
import numpy as np
num_list = [[1,2,3], [4,5,6], [7,8,9]]

print(np.asarray(num_list, dtype = np.int64))
print(np.asarray(num_list, dtype = np.float64))
print(np.asarray(num_list, dtype = np.float64).tolist())

[[1 2 3]
 [4 5 6]
 [7 8 9]]
[[1. 2. 3.]
 [4. 5. 6.]
 [7. 8. 9.]]
[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]


## 型を変更

* astypeメソッドを利用する
* 破壊的メソッドではなく変更したものが返される

In [31]:
import numpy as np
num_list = [[1,2,3], [4,5,6], [7,8,9]]

nd_ary = np.asarray(num_list)
print(nd_ary)
print(nd_ary.astype(np.float64))

# nd_aryの型は変更されていない
print(nd_ary)

[[1 2 3]
 [4 5 6]
 [7 8 9]]
[[1. 2. 3.]
 [4. 5. 6.]
 [7. 8. 9.]]
[[1 2 3]
 [4 5 6]
 [7 8 9]]


## 配列初期化

* empty
    * 初期化しない
        * 値は都度異なる
        * 初期化しなので生成は高速
        * 初期化済みの場合は何もしない
* zeros
    * 0.0で初期化
* ones
    * 1.0で初期化
* arange
    * 連番で初期化
* full
    * 任意の値で初期化
* identity
    * 行列同じの正方形で初期化
* diag
    * 対角要素の取得

In [60]:
import numpy as np
print(np.empty(8))
print(np.empty([4,3]))
print(np.zeros(5))
print(np.zeros([2,3]))
print(np.ones(6))
print(np.ones([3,3]))
print(np.arange(2,5,0.5))
print(np.full((2,2), 7))
print(np.identity(3))

num_list = [[1,2,3], [4,5,6], [7,8,9]]
print(np.asarray(num_list))
print(np.diag(num_list))

[6.95332146e-310 0.00000000e+000 0.00000000e+000 0.00000000e+000
 0.00000000e+000 0.00000000e+000 0.00000000e+000 0.00000000e+000]
[[6.95332149e-310 6.92298238e-310 0.00000000e+000]
 [0.00000000e+000 0.00000000e+000 0.00000000e+000]
 [0.00000000e+000 0.00000000e+000 0.00000000e+000]
 [0.00000000e+000 0.00000000e+000 0.00000000e+000]]
[0. 0. 0. 0. 0.]
[[0. 0. 0.]
 [0. 0. 0.]]
[1. 1. 1. 1. 1. 1.]
[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]
[2.  2.5 3.  3.5 4.  4.5]
[[7 7]
 [7 7]]
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]
[[1 2 3]
 [4 5 6]
 [7 8 9]]
[1 5 9]


## 型を指定して初期化

* dtypeで指定

In [62]:
import numpy as np
print(np.empty(9, dtype=np.bool))
print(np.zeros([2,3], dtype=np.int64))
print(np.ones([3,3], dtype=np.int64))

[ True False False False  True False False False  True]
[[0 0 0]
 [0 0 0]]
[[1 1 1]
 [1 1 1]
 [1 1 1]]


## ndarrayの構造確認

* ndim
    * 次元数
* size
    * 要素数
* shape
    * 次元毎の要素数
* nbytes
    * 全体のサイズ(バイト数)
* dtype
    * 型

In [73]:
import numpy as np
nd_ary = np.zeros((3,2,4), dtype=np.int64)
print(nd_ary)

print(nd_ary.ndim)
print(nd_ary.size)
print(nd_ary.shape)
print(nd_ary.nbytes)
print(nd_ary.dtype)

[[[0 0 0 0]
  [0 0 0 0]]

 [[0 0 0 0]
  [0 0 0 0]]

 [[0 0 0 0]
  [0 0 0 0]]]
3
24
(3, 2, 4)
192
int64


## 行列の取り出し

In [80]:
import numpy as np
num_list = [[1,2,3], [4,5,6], [7,8,9]]

nd_ary = np.asarray(num_list)
print(nd_ary)

### 2行目取り出し
print(nd_ary[1,:])

### 2列目取り出し
print(nd_ary[:,1])

### 2行2列目の要素を取り出し
print(nd_ary[1,1])

[[1 2 3]
 [4 5 6]
 [7 8 9]]
[4 5 6]
[2 5 8]
5


## numpy演算

* add
    * 加算
    * ndarray同士の演算が可能
* subtract
    * 減算
    * ndarray同士の演算が可能
* multiply
    * 乗算
    * ndarray同士の演算が可能
* divide
    * 除算
    * ndarray同士の演算が可能
* mod
    * 剰余
    * ndarray同士の演算が可能
* power
    * 累乗
    * ndarray同士の演算が可能
* 平方根
    * sqrt
* サイン
    * sin
* コサイン
    * cos
* タンジェント
    * tan

In [20]:
import numpy as np
nd_ary_1 = np.asarray([1,3,5])
nd_ary_2 = np.asarray([7,6,9])

print(np.add(nd_ary_1[0], nd_ary_2[0]))
print(np.add(nd_ary_1, nd_ary_2))
print(np.subtract(nd_ary_2, nd_ary_1))
print(np.multiply(nd_ary_1, nd_ary_2))
print(np.divide(nd_ary_2, nd_ary_1))
print(np.mod(nd_ary_2, nd_ary_1))
print(np.power(nd_ary_1, nd_ary_2))
print(np.sqrt(2))
print(np.sin(np.pi * 1/2))
print(np.cos(np.pi))
print(np.tan(np.pi * 1/4))

8
[ 8  9 14]
[6 3 4]
[ 7 18 45]
[7.  2.  1.8]
[0 0 4]
[      1     729 1953125]
1.4142135623730951
1.0
-1.0
0.9999999999999999


## 統計関数

* sum
    * 合計値
* mean
    * 平均値
* amax
    * 最大値
* amin
    * 最小値
* ptp
    * 範囲 (最大値 - 最小値)
* median
    * 中央値
* std
    * 標準偏差

In [25]:
import numpy as np
nd_ary = np.asarray([1,2,3,4,5,6,7,8,9])

print(np.sum(nd_ary))
print(np.mean(nd_ary))
print(np.amax(nd_ary))
print(np.amin(nd_ary))
print(np.ptp(nd_ary))
print(np.median(nd_ary))
print(np.std(nd_ary))


45
5.0
9
1
8
5.0
2.581988897471611


## ndarrayのファイル入出力

* 保存形式
    * バイナリ形式
        * np.save / np.load
        * 軽い
        * 拡張子は「.npy」
    * テキスト形式
        * np.savetxt / np.loadtxt
        * 扱えるのは2次元まで

In [1]:
import numpy as np
num_list = [[1,2,3], [4,5,6], [7,8,9]]

nd_ary = np.asarray(num_list)
print(nd_ary)

np.save('files/nd_ary_bin.npy',nd_ary)
nd_ary_bin = np.load('files/nd_ary_bin.npy')
print(nd_ary_bin)

np.savetxt('files/nd_ary.txt',nd_ary)
nd_ary_txt = np.loadtxt('files/nd_ary.txt')
print(nd_ary_txt)

# 整数
np.savetxt('files/nd_ary_int.txt', nd_ary, fmt='%d')
nd_ary_txt_int = np.loadtxt('files/nd_ary_int.txt', dtype=int)
print(nd_ary_txt_int)

# csv形式で保存
np.savetxt('files/nd_ary_int_csv.txt', nd_ary, fmt='%d', delimiter=",")

[[1 2 3]
 [4 5 6]
 [7 8 9]]
[[1 2 3]
 [4 5 6]
 [7 8 9]]
[[1. 2. 3.]
 [4. 5. 6.]
 [7. 8. 9.]]
[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [48]:
%%bash
cat files/nd_ary_bin.npy
cat files/nd_ary.txt
cat files/nd_ary_int.txt
cat files/nd_ary_int_csv.txt

�NUMPY v {'descr': '<i8', 'fortran_order': False, 'shape': (3, 3), }                                                          
                                                        	       1.000000000000000000e+00 2.000000000000000000e+00 3.000000000000000000e+00
4.000000000000000000e+00 5.000000000000000000e+00 6.000000000000000000e+00
7.000000000000000000e+00 8.000000000000000000e+00 9.000000000000000000e+00
1 2 3
4 5 6
7 8 9
1,2,3
4,5,6
7,8,9


## CSVの読み込みと解析例

In [2]:
## CSV作成
import csv
header = ['日付', '店舗1', '店舗2', '店舗3']
data = [['2018//6/1',42520,18373,43522], ['2018//6/2',21660,13211,67534], ['2018//6/3',65261,13822,55988], ['2018//6/4',75428,24358,45327], ['2018//6/5',32987,19654,58750]]
with open('csv/numpy_売上.csv', mode='w', encoding='utf-8') as fp:
    csv_writer = csv.writer(fp, lineterminator="\n")
    csv_writer.writerow(header)
    csv_writer.writerows(data)

In [3]:
%%bash
cat csv/numpy_売上.csv

日付,店舗1,店舗2,店舗3
2018//6/1,42520,18373,43522
2018//6/2,21660,13211,67534
2018//6/3,65261,13822,55988
2018//6/4,75428,24358,45327
2018//6/5,32987,19654,58750


In [41]:
import csv
import numpy as np
with open('csv/numpy_売上.csv', mode="r", encoding='utf-8') as fp:
    csv_reader = csv.reader(fp)
    csv_list = list(csv_reader)
    
# ヘッダの抜き取り
csv_head = csv_list[0]

# 店舗名のみ取得
store_list = csv_head[1:]

# 日付と売上のみ取得
sale_list = csv_list[1:]

# numpy配列に日付と売上を入れる
sale_ndary = np.asanyarray(sale_list)
#print(sale_ndary)
#sale_ndary = np.delete(sale_ndary, 0, 1)
#print(sale_ndary)

# 店舗毎の統計を取る
# col = 1とすることで日付列を除外する
col = 1
store_statis_dict = {"店舗":[], "合計":[], "平均":[], "最大":[], "最小":[], "範囲":[], }

for store in store_list:
    store_statis_dict["店舗"].append(store)
    store_statis_dict["合計"].append(np.sum(sale_ndary[:,col].astype(np.int64)))
    store_statis_dict["平均"].append(int(np.mean(sale_ndary[:,col].astype(np.int64))))
    store_statis_dict["最大"].append(np.amax(sale_ndary[:,col].astype(np.int64)))
    store_statis_dict["最小"].append(np.amin(sale_ndary[:,col].astype(np.int64)))
    store_statis_dict["範囲"].append(np.ptp(sale_ndary[:,col].astype(np.int64)))
    col += 1
#print(store_statis_dict)

csv_statis_list = []
for store, statis in store_statis_dict.items():
    #print(store)
    #print(statis)
    
    csv_statis = [store]
    csv_statis.extend(statis)
    #print(csv_statis)
    
    csv_statis_list.append(csv_statis)
#print(csv_statis_list)

with open('csv/numpy_売上統計.csv', mode="w", encoding='utf-8') as fp:
    csv_writer = csv.writer(fp, lineterminator="\n")
    csv_writer.writerows(csv_statis_list)

In [42]:
%%bash
cat csv/numpy_売上統計.csv

店舗,店舗1,店舗2,店舗3
最大,75428,24358,67534
平均,47571,17883,54224
合計,237856,89418,271121
最小,21660,13211,43522
範囲,53768,11147,24012
