## 分布の距離
- KL divergence
- L1 norm
- L2 norm
- JS divergence
- Wasserstein distance


In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats

In [None]:
def gauss(x, mu, sigma):
    return 1/np.sqrt(2*np.pi*sigma*sigma)*np.exp(-(x-mu)**2/(2*sigma**2))
x = np.linspace(-5,5,101)
y1 = gauss(x,0,1)

y2 = gauss(x,1,2)
plt.plot(x,y1,"b")
plt.fill_between(x,y1,color="blue", alpha=0.2)
plt.plot(x,y2,"r")
plt.fill_between(x,y2,color="red", alpha=0.2)
plt.xlim(x[0],x[-1])

In [None]:
def kld(p, q):
    """
    Calculates Kullback–Leibler divergence
    相互エントロピー
    """
    return np.sum(p*np.log(p/q))
 
def jsd(p,q):
    """Calculates Jensen-Shannon Divergence"""
    m = (p+q)/2.0
    return (kld(p,m)+kld(q,m))/2

def l1(p ,q):
    return np.sum(np.abs(p-q))

def l2(p ,q):
    return np.sum((p-q)**2)


In [None]:
mu = np.linspace(-3,3,30)
klds, jsds, l1s, l2s, wds = [],[],[],[],[]
for m in mu:
    y1 = gauss(x,0,1)
    y2 = gauss(x,m,1)
    klds.append(kld(y1,y2))
    jsds.append(jsd(y1,y2))
    l1s.append(l1(y1,y2))
    l2s.append(l2(y1,y2))
plt.plot(mu, klds,label="KL")
plt.plot(mu, jsds,label="JS")
plt.plot(mu, l1s,label="L1")
plt.plot(mu, l2s, label="L2")
plt.legend()
plt.xlabel("mu")
plt.ylabel("dist")
plt.xlim(mu[0],mu[-1])

## Wasserstein distance
- POTというライブラリを使う(Optimal Transportation)[github](https://github.com/rflamary/POT)
- 輸送コスト最小化問題を解いた場合の最小の輸送コスト

```
pip install Cython
pip install POT
```

In [None]:
import ot
from ot.datasets import get_1D_gauss as gauss
n = 100  # nb bins
# bin positions
# x = np.arange(n, dtype=np.float64)
x = np.linspace(-5,5,n,dtype=np.float64)
# Gaussian distributions
a = gauss(n, m=0, s=1)  # m= mean, s= std
# loss matrix
M = ot.dist(x.reshape((n, 1)), x.reshape((n, 1)))
M /= M.max()

wds = []
for m in mu:
    b = gauss(n,m=m,s=1)
    ws = ot.emd2(b,a,M)    
    wds.append(ws)
plt.plot(mu,wds,label="wasserstein")
plt.legend()
plt.xlim(mu[0],mu[-1])