In [None]:
# chapter 4-1 IPythonの実行時間計測

In [None]:
n=100000

In [None]:
%timeit sum([1. / i**2 for i in range(1, n)])

In [None]:
%%timeit s = 0.
for i in range(1, n):
    s += 1. / i**2

In [None]:
import numpy as np

In [None]:
%timeit np.sum(1. / np.arange(1., n) ** 2)

In [None]:
# chapter 4-2 cProfileとIPythonによるコードプロファイル

In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
%matplotlib inline

In [None]:
def step(*shape):
    # +1と-1の値をランダムに並べたn-vectorを作成
    return 2 * (np.random.random_sample(shape) < .5) -1

In [None]:
%%prun -s cumulative -q -l 10 -T prun0
n = 10000
iterations = 50
x = np.cumsum(step(iterations, n), axis=0)
bins = np.arange(-30, 30, 1)
y = np.vstack([np.histogram(x[i,:], bins)[0] for i in range(iterations)])

In [None]:
print(open('prun0', 'r').read())

In [None]:
%%prun -s cumulative -q -l 10 -T prun0
n = 10000
iterations = 500
x = np.cumsum(step(iterations, n), axis=0)
bins = np.arange(-30, 30, 1)
y = np.vstack([np.histogram(x[i,:], bins)[0] for i in range(iterations)])

In [None]:
print(open('prun0', 'r').read())

In [None]:
# 4-3 line_profilerを使った行単位のコードプロファイル

In [None]:
# line_profilerのインストール
# !pip install line_profiler

In [None]:
import numpy as np

In [None]:
%load_ext line_profiler

In [None]:
%%writefile simulation.py
import numpy as np
def step(*shape):
    # +1と-1の値をランダムに並べたn-vectorを作成
    return 2 * (np.random.random_sample(shape) < .5) -1
def simulate(iterations, n = 10000):
    s = step(iterations, n)
    x = np.cumsum(s, axis=0)
    bins = np.arange(-30, 30, 1)
    y = np.vstack([np.histogram(x[i,:], bins)[0] for i in range(iterations)])
    return y

In [None]:
import simulation

In [None]:
%lprun -T lprof0 -f simulation.simulate simulation.simulate(50)

In [None]:
print(open('lprof0', 'r').read())

In [None]:
%lprun -T lprof0 -f simulation.simulate simulation.simulate(500)

In [None]:
print(open('lprof0', 'r').read())

In [None]:
# chapter 4-4 memory_profilerを使ったメモリ使用状況のプロファイル

In [None]:
# memory_profilerのインストール
# !pip install memory_profiler

In [None]:
import simulation

In [None]:
%load_ext memory_profiler

In [None]:
%mprun -T mprof0 -f simulation.simulate simulation.simulate(50)

In [None]:
%mprun -T mprof0 -f simulation.simulate simulation.simulate(500)

In [None]:
# chapter 4-5 不必要な配列コピーを削除するためのNumPy内部構造解説

In [None]:
import numpy as np

In [None]:
a = np.zeros(10)

In [None]:
def id(x):
# 配列のメモリブロックのアドレスを返す
    return x.__array_interface__['data'][0]

In [None]:
id(a), id(a[1:])
# オフセットが異なればメモリ位置も異なる

In [None]:
# 2つの配列が同じバッファを共有しているのかを確かめるためのより汎用で厳密な方法

def get_data_base(arr):
    # 指定されたNumPy配列のデータを格納している配列のアドレスを求める
    base = arr
    while isinstance(base.base, np.ndarray):
        base = base.base
        
    return base

def arrays_share_data(x, y):
    return get_data_base(x) is get_data_base(y)

In [None]:
print(arrays_share_data(a,a.copy()),
     arrays_share_data(a,a[1:]))

In [None]:
a = np.zeros(10); aid = id(a); aid

In [None]:
b = a.copy(); id(b) == aid
# 違うアドレスになる

In [None]:
a *= 2; id(a) == aid
# 同じアドレス、元の配列の内容が変更される

In [None]:
a = a*2; id(a) == aid
# 違うアドレス、元の配列が暗黙のうちにコピーされ新しい配列が作成される

In [None]:
%%timeit a = np.zeros(10000000)
a *= 2
# 変更、速い

In [None]:
%%timeit a = np.zeros(10000000)
a = a*2
# コピー、遅い

In [None]:
a = np.zeros((10, 10)); aid = id(a)

In [None]:
b = a.reshape((1, -1)); id(b) == aid
# 2次元配列の形状変更ではコピーが起こらないが、、

In [None]:
c = a.T.reshape((1, -1)); id(c) == aid
# 転置を伴う場合にはコピーが発生する

In [None]:
d = a.flatten(); id(d) == aid
# flattenは常に配列のコピーを返すが、、

In [None]:
e = a.ravel(); id(e) == aid
# ravelは必要な場合にだけコピーを行う

In [None]:
%timeit a.flatten()

In [None]:
%timeit a.ravel()

In [None]:
# ブロードキャストルール(2つの配列の形状が全く同じか、いずれかの次元長が1である場合に両者が互換であるとする)
# により配列の形状が異なっていても直接計算できる。
# reshapeやtileを使って形状を合わせる必要は必ずしもない。

In [None]:
n = 1000

In [None]:
a = np.arange(n)
ac = a[:, np.newaxis] # 列ベクトル
ar = a[np.newaxis, :] # 行ベクトル

In [None]:
% timeit np.tile(ac, (1, n)) * np.tile(ar, (n, 1))
# タイリングを使って計算、遅い

In [None]:
%timeit ac * ar
# ブロードキャストにより直接計算、速い

In [None]:
# 以下の2つの例のパフォーマンスの違いの理由は？
# 答えは書いてない。NumPyの内部構造が行優先だから？？？

In [None]:
a = np.random.rand(5000, 5000)

In [None]:
%timeit a[0,:].sum()
# 速い

In [None]:
%timeit a[:,0].sum()
# 遅い

In [None]:
# chapter 4-6 NumPyのストライドトリック

In [None]:
x = np.zeros(10); x.strides

In [None]:
y =np.zeros((10, 10)); y.strides

In [None]:
n = 1000; a = np.arange(n)

In [None]:
b = np.lib.stride_tricks.as_strided(a, (n, n), (0, 8))

In [None]:
b

In [None]:
b.size, b.shape, b.nbytes

In [None]:
b.dtype

In [None]:
%timeit b * b.T

In [None]:
timeit np.tile(a, (n, 1)) * np.tile(a[:, np.newaxis], (1, n))

In [None]:
# ストライドを使った移動平均の効率的計算アルゴリズム

In [None]:
import numpy as np
from numpy.lib.stride_tricks import as_strided
import matplotlib.pyplot as plt

In [None]:
n = 5; k = 2

In [None]:
a = np.linspace(1, n, n); aid = a

In [None]:
as_strided(a, (k, n), (8, 8))

In [None]:
as_strided(a, (k, n-k+1), (8, 8))

In [None]:
def shift1(x, k):
    return np.vstack([x[i:n-k+i+1] for i in range(k)])

In [None]:
def shift2(x, k):
    return as_strided(x, (k, n-k+1), (x.itemsize,)*2)

In [None]:
b = shift1(a, k); b, id(b) == aid

In [None]:
c = shift1(a, k); c, id(c) == aid

In [None]:
n, k = 100, 10
t = np.linspace(0., 1., n)
x = t + .1 * np.random.randn(n)

In [None]:
y = shift2(x, k)
x_avg = y.mean(axis=0)

In [None]:
%matplotlib inline
f = plt.figure()
plt.plot(x[:-k+1], '-k');
plt.plot(x_avg, '-r')

In [None]:
%timeit shift1(x, k)

In [None]:
%%timeit y = shift1(x, k)
z = y.mean(axis=0)

In [None]:
%timeit shift2(x, k)

In [None]:
%%timeit y = shift2(x, k)
z = y.mean(axis=0)

In [None]:
# chapter 4-8 NumPy配列要素の効率的な選択方法

In [1]:
import numpy as np
from numpy.lib.stride_tricks import as_strided
import matplotlib.pyplot as plt
%matplotlib inline

def id(x):
# 配列のメモリブロックのアドレスを返す
    return x.__array_interface__['data'][0]

In [2]:
n, d = 100000, 100

In [3]:
a = np.random.random_sample((n, d)); aid = id(a)

In [4]:
b1 = a[::10]
b2 = a[np.arange(0, n, 10)]

In [5]:
np.array_equal(b1, b2)

True

In [6]:
id(b1) == aid, id(b2) == aid

(True, False)

In [7]:
%timeit a[::10]

The slowest run took 27.68 times longer than the fastest. This could mean that an intermediate result is being cached.
1000000 loops, best of 3: 260 ns per loop


In [8]:
%timeit a[np.arange(0, n, 10)]

100 loops, best of 3: 1.97 ms per loop


In [None]:
# 最近のPythonではファンシーインデックスが改善されているため以下の手法による性能差は縮小している

In [9]:
i = np.arange(0, n, 10)

In [10]:
b1 = a[i]
b2 = np.take(a, i, axis=0)

In [11]:
np.array_equal(b1, b2)

True

In [12]:
%timeit a[i]

100 loops, best of 3: 2.19 ms per loop


In [13]:
%timeit np.take(a, i, axis=0)

1000 loops, best of 3: 1.86 ms per loop


In [14]:
i = np.random.random_sample(n) < .5

In [16]:
b1 = a[i]
b2 = np.compress(i, a, axis=0)

In [17]:
np.array_equal(b1, b2)

True

In [18]:
%timeit a[i]

100 loops, best of 3: 15.8 ms per loop


In [19]:
%timeit np.compress(i, a, axis=0)

100 loops, best of 3: 15.2 ms per loop


In [20]:
# chapter 4-9 メモリマップを使った巨大NumPy配列処理