In [10]:
import sys
sys.path.append('../Data')
sys.path.append('../Util')
import Load_Hypergraph as lh
import Hypergraph as hg
import Random

In [11]:
import numpy as np
import numpy.linalg as LA

## Wasserstein for Gaussian

In [12]:
import Wasserstein_Gaussian as wg

### Test 1: Basic test

In [13]:
mu_1, Sigma_1 = Random.rand_Gaussian(2, 2., 1., 3.)
mu_2, Sigma_2 = Random.rand_Gaussian(2, 2., 1., 3.)

print('mu_1 = {}\nSigma_1 =\n{}\n'.format(mu_1, Sigma_1))
print('mu_2 = {}\nSigma_2 =\n{}\n'.format(mu_2, Sigma_2))

dist_12 = wg.Wasserstein_dist_Gaussian(mu_1, Sigma_1, mu_2, Sigma_2)
dist_21 = wg.Wasserstein_dist_Gaussian(mu_2, Sigma_2, mu_1, Sigma_1)
        
print('distance from 1 to 2 = {}'.format(dist_12))
print('distance from 2 to 1 = {}\n'.format(dist_21))
        
mus = [mu_1, mu_2]
Sigmas = [Sigma_1, Sigma_2]

mu_c, Sigma_c = wg.Wasserstein_barycenter_Gaussian(mus, Sigmas)
print('mu of barycenter = {}'.format(mu_c))
print('Sigma of barycenter =\n{}'.format(Sigma_c))

mu_1 = [ 1.79006476 -0.26635168]
Sigma_1 =
[[2.1327125  0.71217752]
 [0.71217752 1.92564481]]

mu_2 = [-0.29660776 -1.69524629]
Sigma_2 =
[[ 2.68906843 -0.07528343]
 [-0.07528343  2.330292  ]]

distance from 1 to 2 = 2.5723777080488266
distance from 2 to 1 = 2.572377708048827

mu of barycenter = [ 0.7467285  -0.98079898]
Sigma of barycenter =
[[2.38295094 0.34378494]
 [0.34378494 2.10061166]]


### Test 2: The inverse cdf of the barycenter is the average of inverse cdfs for 1-dim'l Gaussian

In [14]:
from scipy.stats import norm

#### Test 2.1: two Gaussian distributions

In [15]:
mu_1, Sigma_1 = Random.rand_Gaussian(1, 1.)
mu_2, Sigma_2 = Random.rand_Gaussian(1, 1.)
mus = [mu_1, mu_2]
Sigmas = [Sigma_1, Sigma_2]

mu_c, Sigma_c = wg.Wasserstein_barycenter_Gaussian(mus, Sigmas)

mu_1, mu_2, mu_c = mu_1[0], mu_2[0], mu_c[0]
std_1, std_2, std_c = np.sqrt(Sigma_1[0][0]), np.sqrt(Sigma_2[0][0]), np.sqrt(Sigma_c[0][0])

print("mu_1 = {}\tstd_1 = {}".format(mu_1, std_1))
print("mu_2 = {}\tstd_2 = {}".format(mu_2, std_2))
print('Barycenter:')
print("mu = {}\tstd = {}\n".format(mu_c, std_c))

num_step = 20
step_size = 1. / num_step
for x in np.linspace(step_size, 1. - step_size, num_step - 1):
    a_1 = norm.ppf(x, loc=mu_1, scale=std_1)
    a_2 = norm.ppf(x, loc=mu_2, scale=std_2)
    avg = (a_1 + a_2) / 2.
    target = norm.ppf(x, loc=mu_c, scale=std_c)
    print('avg = {} \t target = {}'.format(avg, target))    

mu_1 = 0.8760159865390862	std_1 = 0.6582076917583841
mu_2 = 0.8154689074584354	std_2 = 0.7473167588656556
Barycenter:
mu = 0.8457424469987608	std = 0.7027622253120198

avg = -0.3101985481902033 	 target = -0.3101985481902031
avg = -0.05488358305546548 	 target = -0.05488358305546537
avg = 0.11737621181042562 	 target = 0.11737621181042568
avg = 0.25428283602321233 	 target = 0.25428283602321244
avg = 0.371736529200814 	 target = 0.37173652920081407
avg = 0.4772135757332938 	 target = 0.4772135757332938
avg = 0.5749537785679131 	 target = 0.5749537785679131
avg = 0.6676996730226923 	 target = 0.6676996730226922
avg = 0.7574323992471832 	 target = 0.7574323992471833
avg = 0.8457424469987607 	 target = 0.8457424469987607
avg = 0.9340524947503381 	 target = 0.9340524947503381
avg = 1.0237852209748293 	 target = 1.0237852209748293
avg = 1.1165311154296085 	 target = 1.1165311154296085
avg = 1.2142713182642275 	 target = 1.2142713182642275
avg = 1.3197483647967077 	 target = 1.31974836479670

#### Test 2.2: arbitrarily many Gaussian distributions

In [16]:
num_entries = 5
mus = []
Sigmas = []
for i in range(num_entries):
    mu, Sigma = Random.rand_Gaussian(1, 1.)
    mus.append(mu)
    Sigmas.append(Sigma)
    print("mu_{} = {}\tstd_{} = {}".format(i, mu[0], i, np.sqrt(Sigma[0][0])))

mu_c, Sigma_c = wg.Wasserstein_barycenter_Gaussian(mus, Sigmas)
print('Barycenter:')
print("mu = {}\tstd = {}\n".format(mu_c[0], np.sqrt(Sigma_c[0][0])))

mus = [mu[0] for mu in mus]
stds = [np.sqrt(Sigma[0][0]) for Sigma in Sigmas]
mu_c = mu_c[0]
std_c = np.sqrt(Sigma_c[0][0])
    
num_step = 20
step_size = 1. / num_step
for x in np.linspace(step_size, 1. - step_size, num_step - 1):
    avg = 0.
    for mu, std in zip(mus, stds):
        avg += norm.ppf(x, loc=mu, scale=std)
    avg /= num_entries 
    target = norm.ppf(x, loc=mu_c, scale=std_c)
    print('avg = {} \t target = {}'.format(avg, target))

mu_0 = -0.5582420763828546	std_0 = 0.8985804886190741
mu_1 = -0.16719188446862399	std_1 = 0.9513119352906851
mu_2 = 0.2975120496451571	std_2 = 0.8927140798215805
mu_3 = 0.9948237099372956	std_3 = 0.42831701540628725
mu_4 = 0.04571033419182946	std_4 = 0.9824625120764904
Barycenter:
mu = 0.12252242658456071	std = 0.8306772062428236

avg = -1.2438199889298638 	 target = -1.2438199889298642
avg = -0.9420332475381447 	 target = -0.9420332475381447
avg = -0.7384191658569208 	 target = -0.7384191658569209
avg = -0.5765931484344264 	 target = -0.5765931484344266
avg = -0.4377608347477403 	 target = -0.4377608347477404
avg = -0.31308512626405893 	 target = -0.31308512626405904
avg = -0.19755450195905938 	 target = -0.19755450195905944
avg = -0.08792723725799798 	 target = -0.08792723725799807
avg = 0.0181384100462773 	 target = 0.01813841004627728
avg = 0.12252242658456061 	 target = 0.1225224265845606
avg = 0.2269064431228439 	 target = 0.22690644312284391
avg = 0.3329720904271193 	 target = 0

## Wasserstein distance and barycenter with the POT pacakage

In [17]:
import ot

In [18]:
Graph = lh.load_UCI_Congress()

target_labels = []
for idx, vertex_map in vertex_dict.items():
    party = vertex_map['class']
    print('{}:\t{}'.format(idx, party))
    if party == 'democrat':
        target_labels.append(0)
    else:
        target_labels.append(1)

edges = []
for idx, edge in edge_dict.items():
    edges.append(edge)

num_vertices = len(target_labels)
pw_dist_matrix = np.loadtxt('pw_dist_matrix_Congression')

w_edge_weights = Wasserstein_edge_weights(num_vertices, edges, pw_dist_matrix, verbose=True)

edge = edges[9]
A = conditional_prob[edge, :].T
barycenter, log_b = ot.bregman.barycenter(A, pw_dist_matrix, 0.1, log=True)
print(barycenter.shape)
print(log_b)
for v in edge:
    p = conditional_prob[v]
    if (abs(p.sum() - 1.) > 1e-4) or (p.min() < 0):
        print('\tsum = {}, min = {}'.format(p.sum(), p.min()))

    d = ot.sinkhorn2(p, barycenter, pw_dist_matrix, 0.01)
    print(d)

FileNotFoundError: File b'/home/Desktop/Curvatures/Data/Datasets/UCI_Congressional_Voting_Records.txt' does not exist

## Wasserstein distance with numpy.optimize.linprog and CVXPY

In [None]:
import Wasserstein_lp as wlp