In [1]:
import numpy as np
import itertools

In [2]:
def diffsize(x, y):
    return np.sqrt(np.sum((x-y)**2))

vdiffsize = np.vectorize(diffsize, signature="(n),(n)->()")

In [3]:
x = np.random.rand(10, 3)
y = np.random.rand(10, 3)

In [4]:
for i in range(x.shape[0]):
    print(diffsize(x[i,:], y[i,:]))

0.9706696101126147
0.2574869178816365
0.5157508422082189
0.4449132348469197
0.8722723066177326
0.9251086909863078
0.4515689963355143
1.0482154904964494
0.6786769007264327
0.2592164011678992


In [5]:
vdiffsize(x,y)

array([0.97066961, 0.25748692, 0.51575084, 0.44491323, 0.87227231,
       0.92510869, 0.451569  , 1.04821549, 0.6786769 , 0.2592164 ])

# 行列化

`m[i,j] = f(x[i,:], y[j,:])` となるような行列を作る

## vectorize

In [6]:
def f(x, y):
    return x[0] + y[0]/10

dim = 2
numx = 3
numy = 4
vf = np.vectorize(f, signature="(n),(n)->()")
xs, ys = np.zeros((numx,dim)), np.zeros((numy,dim))
xs[:,0] = np.arange(numx)
ys[:,0] = np.arange(numy)
print(xs)
print(ys)

[[0. 0.]
 [1. 0.]
 [2. 0.]]
[[0. 0.]
 [1. 0.]
 [2. 0.]
 [3. 0.]]


In [7]:
# step1.
# ブロードキャストによって、必要な要素数になるまでコピーする.
# この時、最後の2つの次元が、xs,ysに合うように次元を調整する.
xs2, ys2 = np.zeros((numy,numx,dim)), np.zeros((numx,numy,dim))
xs2[:,:,:] = xs
ys2[:,:,:] = ys
# step2.
# xs2, ys2が同じ形になるように、転地によって次元を調整する.
xs2 = xs2.transpose((1,0,2))
vf(xs2, ys2)

array([[0. , 0.1, 0.2, 0.3],
       [1. , 1.1, 1.2, 1.3],
       [2. , 2.1, 2.2, 2.3]])

## broadcastのみ
vecorizeを使うと場合によっては激遅いので、ブロードキャストで頑張ってみる

In [8]:
xs3, ys3 = np.zeros((numy,numx,dim)), np.zeros((numx,numy,dim))
xs3[:,:,:] = xs
ys3[:,:,:] = ys
xs3 = xs3.transpose((1,0,2)).reshape(-1,dim)
ys3 = ys3.reshape(-1,dim)

In [9]:
mat = np.array([f(x,y) for x,y in zip(xs3, ys3)]).reshape(numx,numy)
mat

array([[0. , 0.1, 0.2, 0.3],
       [1. , 1.1, 1.2, 1.3],
       [2. , 2.1, 2.2, 2.3]])

## apply_along_axis化する

In [10]:
def argconcat(f, d):
    def ret(x):
        return f(x[:d], x[d:])
    return ret
f2 = argconcat(f, 2)

xys = np.concatenate([xs3, ys3], axis=1)
np.apply_along_axis(f2, 1, xys).reshape(numx, numy)

array([[0. , 0.1, 0.2, 0.3],
       [1. , 1.1, 1.2, 1.3],
       [2. , 2.1, 2.2, 2.3]])

## ベンチマークしてみる

In [11]:
def cross_prod0(xs, ys, f):
    N, M = len(xs), len(ys)
    ret = np.zeros((N, M))
    for i, j in itertools.product(range(N), range(M)):
        ret[i,j] = f(xs[i], ys[j])
    return ret

In [12]:
def cross_prod1(xs, ys, vf):
    dim = xs.shape[1]
    nx = xs.shape[0]
    ny = ys.shape[0]
    xs2, ys2 = np.zeros((ny, nx, dim)), np.zeros((nx, ny, dim))
    xs2[:,:,:] = xs
    ys2[:,:,:] = ys
    xs2 = xs2.transpose((1,0,2))
    return vf(xs2, ys2)

In [13]:
def cross_prod2(xs, ys, f):
    dim = xs.shape[1]
    nx = xs.shape[0]
    ny = ys.shape[0]
    xs2, ys2 = np.zeros((ny, nx, dim)), np.zeros((nx, ny, dim))
    xs2[:,:,:] = xs
    ys2[:,:,:] = ys
    xs2 = xs2.transpose((1,0,2)).reshape(-1,dim)
    ys2 = ys2.reshape(-1,dim)
    mat = np.array([f(x, y) for x, y in zip(xs2, ys2)]).reshape(nx, ny)
    return mat

In [14]:
def cross_prod3(xs, ys, f):
    dim = xs.shape[1]
    nx = xs.shape[0]
    ny = ys.shape[0]
    xs2, ys2 = np.zeros((ny, nx, dim)), np.zeros((nx, ny, dim))
    xs2[:,:,:] = xs
    ys2[:,:,:] = ys
    xs2 = xs2.transpose((1,0,2)).reshape(-1,dim)
    ys2 = ys2.reshape(-1,dim)
    xys = np.concatenate([xs2, ys2], axis=1)
    mat = np.apply_along_axis(f, 1, xys).reshape(nx, ny)
    return mat

In [15]:
dim = 2
nx, ny = 100, 100
xs = np.random.randn(nx, dim)
ys = np.random.randn(ny, dim)

In [16]:
%%timeit
cross_prod0(xs, ys, f)

11.1 ms ± 494 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [17]:
%%timeit
cross_prod1(xs, ys, vf)

23.9 ms ± 89.3 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [18]:
%%timeit
cross_prod2(xs, ys, f)

10.4 ms ± 344 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [19]:
%%timeit
cross_prod3(xs, ys, f2)

33.5 ms ± 1.28 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


クロージャーは遅いことが多い気がするので、グローバルの関数を定義してみる

In [20]:
def f3(x):
    return f(x[:2], x[2:])

In [21]:
%%timeit
cross_prod3(xs, ys, f3)

31.2 ms ± 1.83 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


あんま変わらん・・

## 結論
cross_prod2が一番早いが、シンプルなcross_prod0でもほとんど変わらない

### 結果が一致することを確認

In [22]:
print(cross_prod0(xs, ys, vf))
print(cross_prod1(xs, ys, vf))
print(cross_prod2(xs, ys, f))
print(cross_prod3(xs, ys, f2))
print(cross_prod3(xs, ys, f3))

[[ 0.06376783 -0.1578777  -0.1335834  ... -0.02680454 -0.18792929
  -0.14877715]
 [ 1.31213266  1.09048713  1.11478143 ...  1.22156029  1.06043554
   1.09958768]
 [ 0.31445713  0.09281159  0.1171059  ...  0.22388475  0.06276
   0.10191214]
 ...
 [-0.46286066 -0.68450619 -0.66021189 ... -0.55343303 -0.71455778
  -0.67540564]
 [-0.80424885 -1.02589439 -1.00160008 ... -0.89482123 -1.05594598
  -1.01679384]
 [ 0.08231029 -0.13933525 -0.11504094 ... -0.00826209 -0.16938684
  -0.1302347 ]]
[[ 0.06376783 -0.1578777  -0.1335834  ... -0.02680454 -0.18792929
  -0.14877715]
 [ 1.31213266  1.09048713  1.11478143 ...  1.22156029  1.06043554
   1.09958768]
 [ 0.31445713  0.09281159  0.1171059  ...  0.22388475  0.06276
   0.10191214]
 ...
 [-0.46286066 -0.68450619 -0.66021189 ... -0.55343303 -0.71455778
  -0.67540564]
 [-0.80424885 -1.02589439 -1.00160008 ... -0.89482123 -1.05594598
  -1.01679384]
 [ 0.08231029 -0.13933525 -0.11504094 ... -0.00826209 -0.16938684
  -0.1302347 ]]
[[ 0.06376783 -0.15787