In [1]:
import sys
sys.path.append('../Data')
sys.path.append('../Util')
import Load_Hypergraph as lh
import Hypergraph as hg
import Random

In [2]:
import numpy as np
import numpy.linalg as LA

## Wasserstein for Gaussian

In [3]:
import Wasserstein_Gaussian as wg

### Test 1: Basic test

In [4]:
mu_1, Sigma_1 = Random.rand_Gaussian(2, 2., 1., 3.)
mu_2, Sigma_2 = Random.rand_Gaussian(2, 2., 1., 3.)

print('mu_1 = {}\nSigma_1 =\n{}\n'.format(mu_1, Sigma_1))
print('mu_2 = {}\nSigma_2 =\n{}\n'.format(mu_2, Sigma_2))

dist_12 = wg.Wasserstein_dist_Gaussian(mu_1, Sigma_1, mu_2, Sigma_2)
dist_21 = wg.Wasserstein_dist_Gaussian(mu_2, Sigma_2, mu_1, Sigma_1)
        
print('distance from 1 to 2 = {}'.format(dist_12))
print('distance from 2 to 1 = {}\n'.format(dist_21))
        
mus = [mu_1, mu_2]
Sigmas = [Sigma_1, Sigma_2]

mu_c, Sigma_c = wg.Wasserstein_barycenter_Gaussian(mus, Sigmas)
print('mu of barycenter = {}'.format(mu_c))
print('Sigma of barycenter =\n{}'.format(Sigma_c))

mu_1 = [-1.32337159  0.40303492]
Sigma_1 =
[[ 1.11192496 -0.2102142 ]
 [-0.2102142   1.76434324]]

mu_2 = [0.57590376 0.72587146]
Sigma_2 =
[[ 1.73043609 -0.0258995 ]
 [-0.0258995   2.17064608]]

distance from 1 to 2 = 1.9534348488981306
distance from 2 to 1 = 1.9534348488981308

mu of barycenter = [-0.37373391  0.56445319]
Sigma of barycenter =
[[ 1.40191444 -0.12605738]
 [-0.12605738  1.96065139]]


### Test 2: The inverse cdf of the barycenter is the average of inverse cdfs for 1-dim'l Gaussian

In [5]:
from scipy.stats import norm

#### Test 2.1: two Gaussian distributions

In [6]:
mu_1, Sigma_1 = Random.rand_Gaussian(1, 1.)
mu_2, Sigma_2 = Random.rand_Gaussian(1, 1.)
mus = [mu_1, mu_2]
Sigmas = [Sigma_1, Sigma_2]

mu_c, Sigma_c = wg.Wasserstein_barycenter_Gaussian(mus, Sigmas)

mu_1, mu_2, mu_c = mu_1[0], mu_2[0], mu_c[0]
std_1, std_2, std_c = np.sqrt(Sigma_1[0][0]), np.sqrt(Sigma_2[0][0]), np.sqrt(Sigma_c[0][0])

print("mu_1 = {}\tstd_1 = {}".format(mu_1, std_1))
print("mu_2 = {}\tstd_2 = {}".format(mu_2, std_2))
print('Barycenter:')
print("mu = {}\tstd = {}\n".format(mu_c, std_c))

num_step = 20
step_size = 1. / num_step
for x in np.linspace(step_size, 1. - step_size, num_step - 1):
    a_1 = norm.ppf(x, loc=mu_1, scale=std_1)
    a_2 = norm.ppf(x, loc=mu_2, scale=std_2)
    avg = (a_1 + a_2) / 2.
    target = norm.ppf(x, loc=mu_c, scale=std_c)
    print('avg = {} \t target = {}'.format(avg, target))    

mu_1 = -0.41976402834223414	std_1 = 0.7512225205074092
mu_2 = 0.09876930246167848	std_2 = 0.6442062183351165
Barycenter:
mu = -0.16049736294027783	std = 0.697714369421263

avg = -1.3081353740590018 	 target = -1.308135374059002
avg = -1.054654305375061 	 target = -1.054654305375061
avg = -0.8836318317380796 	 target = -0.8836318317380797
avg = -0.747708591214149 	 target = -0.7477085912141491
avg = -0.631098553679442 	 target = -0.6310985536794422
avg = -0.5263791359885555 	 target = -0.5263791359885556
avg = -0.42934098918494085 	 target = -0.4293409891849409
avg = -0.33726127724937616 	 target = -0.33726127724937616
avg = -0.2481730903218925 	 target = -0.2481730903218925
avg = -0.16049736294027794 	 target = -0.16049736294027792
avg = -0.07282163555866336 	 target = -0.07282163555866335
avg = 0.01626655136882031 	 target = 0.016266551368820353
avg = 0.10834626330438518 	 target = 0.10834626330438524
avg = 0.2053844101079997 	 target = 0.20538441010799974
avg = 0.3101038277988864 	 t

#### Test 2.2: arbitrarily many Gaussian distributions

In [7]:
num_entries = 5
mus = []
Sigmas = []
for i in range(num_entries):
    mu, Sigma = Random.rand_Gaussian(1, 1.)
    mus.append(mu)
    Sigmas.append(Sigma)
    print("mu_{} = {}\tstd_{} = {}".format(i, mu[0], i, np.sqrt(Sigma[0][0])))

mu_c, Sigma_c = wg.Wasserstein_barycenter_Gaussian(mus, Sigmas)
print('Barycenter:')
print("mu = {}\tstd = {}\n".format(mu_c[0], np.sqrt(Sigma_c[0][0])))

mus = [mu[0] for mu in mus]
stds = [np.sqrt(Sigma[0][0]) for Sigma in Sigmas]
mu_c = mu_c[0]
std_c = np.sqrt(Sigma_c[0][0])
    
num_step = 20
step_size = 1. / num_step
for x in np.linspace(step_size, 1. - step_size, num_step - 1):
    avg = 0.
    for mu, std in zip(mus, stds):
        avg += norm.ppf(x, loc=mu, scale=std)
    avg /= num_entries 
    target = norm.ppf(x, loc=mu_c, scale=std_c)
    print('avg = {} \t target = {}'.format(avg, target))

mu_0 = -0.2616153389599216	std_0 = 0.6442054105900872
mu_1 = -0.11085478859358955	std_1 = 0.9248426149389147
mu_2 = 0.8291665592742272	std_2 = 0.4127392688170239
mu_3 = -0.4427838142654261	std_3 = 0.27275605354772026
mu_4 = 0.1163054610275942	std_4 = 0.790821019162407
Barycenter:
mu = 0.026043615696576837	std = 0.6090728734112307

avg = -0.9757921092116412 	 target = -0.9757921092116413
avg = -0.754514678754334 	 target = -0.7545146787543341
avg = -0.6052198469417469 	 target = -0.605219846941747
avg = -0.4865650473595825 	 target = -0.48656504735958256
avg = -0.3847697945417738 	 target = -0.3847697945417739
avg = -0.2933545113968322 	 target = -0.2933545113968323
avg = -0.20864462796243602 	 target = -0.20864462796243605
avg = -0.12826323238075626 	 target = -0.12826323238075626
avg = -0.05049330190916849 	 target = -0.0504933019091685
avg = 0.026043615696576726 	 target = 0.026043615696576754
avg = 0.10258053330232197 	 target = 0.102580533302322
avg = 0.18035046377390976 	 target =

## Wasserstein distance and barycenter with the POT pacakage

In [8]:
import ot

In [9]:
Graph = lh.load_UCI_Congress()

target_labels = []
for idx, vertex_map in vertex_dict.items():
    party = vertex_map['class']
    print('{}:\t{}'.format(idx, party))
    if party == 'democrat':
        target_labels.append(0)
    else:
        target_labels.append(1)

edges = []
for idx, edge in edge_dict.items():
    edges.append(edge)

num_vertices = len(target_labels)
pw_dist_matrix = np.loadtxt('pw_dist_matrix_Congression')

w_edge_weights = Wasserstein_edge_weights(num_vertices, edges, pw_dist_matrix, verbose=True)

edge = edges[9]
A = conditional_prob[edge, :].T
barycenter, log_b = ot.bregman.barycenter(A, pw_dist_matrix, 0.1, log=True)
print(barycenter.shape)
print(log_b)
for v in edge:
    p = conditional_prob[v]
    if (abs(p.sum() - 1.) > 1e-4) or (p.min() < 0):
        print('\tsum = {}, min = {}'.format(p.sum(), p.min()))

    d = ot.sinkhorn2(p, barycenter, pw_dist_matrix, 0.01)
    print(d)

FileNotFoundError: File b'/home/Desktop/Curvatures/Data/Datasets/UCI_Congressional_Voting_Records.txt' does not exist

## Wasserstein distance with numpy.optimize.linprog and CVXPY

In [None]:
import Wasserstein_lp as wlp