In [1]:
import sys
import os

module_path = os.path.abspath(os.path.join('../../'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
import torch
import gpytorch
from tqdm.notebook import trange
import heapq
import math
import pickle
import itertools
from algorithms.cd import con_div
from algorithms.ccr import con_conv_rate
from utils.class_imbalance import get_classes, class_proportion

from algorithms.cgm import *

## Dataset

In [3]:
def sample_GMM(means, covs, num_samples):
    """
    Samples equally from clusters of normal distributions.
    """
    assert(means.shape[0] == covs.shape[0])
    assert(means.shape[1] == covs.shape[1])
    assert(covs.shape[1] == covs.shape[2])
    
    n = means.shape[0]
    d = means.shape[1]
    samples = np.zeros((num_samples, d))
    clusters = np.zeros(num_samples, dtype=np.int32)
    
    for i in range(num_samples):
        cluster = np.random.randint(n)
        samples[i] = np.random.multivariate_normal(means[cluster], covs[cluster], check_valid='raise')
        clusters[i] = cluster
    
    return samples, clusters

In [4]:
num_clusters = 5
d = 2
num_samples = 1000

In [5]:
np.random.seed(2)

In [6]:
means = np.random.uniform(size=(num_clusters, d))
covs = np.zeros((num_clusters, d, d))
for i in range(num_clusters):
    covs[i] = np.eye(d)/200

In [7]:
train_sets = np.zeros((num_clusters, num_samples, d))
test_sets = np.zeros((num_clusters, num_samples, d))

In [8]:
for i in range(num_clusters):
    train_sets[i] = np.random.multivariate_normal(means[i], covs[i], size=(num_samples), check_valid='raise')
    test_sets[i] = np.random.multivariate_normal(means[i], covs[i], size=(num_samples), check_valid='raise')

In [9]:
# plt.figure(figsize=(10, 6), dpi=300)
# for i in range(num_clusters):
#     plt.scatter(train_sets[i, :, 0], train_sets[i, :, 1], s=2, color=cm.get_cmap('Set1')(i*(1/9)), label="{0}".format(i))
#     plt.legend()

## Unequal split

In [10]:
num_parties = 5

In [11]:
unequal_prop = np.array([[0.2, 0.2, 0.2, 0.2, 0.2], 
                         [0.2, 0.2, 0.2, 0.2, 0.2],
                         [0.6, 0.4, 0.0, 0.0, 0.0],
                         [0.0, 0.2, 0.6, 0.2, 0.0],
                         [0.0, 0.0, 0.0, 0.4, 0.6]])

In [12]:
party_datasets = split_proportions(train_sets, unequal_prop)

In [13]:
# # Check
# plt.figure(figsize=(10, 6), dpi=300)
# plt.xlim(0, 0.8)
# plt.ylim(-0.2, 1.0)
# for i in range(num_parties):
#     if i == 0:
#         plt.scatter(party_datasets[i, :, 0], party_datasets[i, :, 1], s=2, color=cm.get_cmap('Set1')(i*(1/9)), label="{0}".format(i))

# plt.legend()

In [14]:
kernel = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel(ard_num_dims=d))
kernel.base_kernel.lengthscale = [0.1, 0.1]
kernel.outputscale = 1

In [15]:
perm_samp_dataset = np.concatenate(party_datasets)
reference_dataset = np.concatenate(party_datasets)

In [16]:
v = get_v(party_datasets, reference_dataset, kernel)

In [17]:
v

{'{1}': 0.1646876484155655,
 '{2}': 0.1645859181880951,
 '{3}': 0.013847321271896362,
 '{4}': 0.09602096676826477,
 '{5}': 0.051841288805007935,
 '{1, 2}': 0.16484807431697845,
 '{1, 3}': 0.12805142998695374,
 '{1, 4}': 0.1476638913154602,
 '{1, 5}': 0.13576869666576385,
 '{2, 3}': 0.12621892988681793,
 '{2, 4}': 0.14736829698085785,
 '{2, 5}': 0.13787144422531128,
 '{3, 4}': 0.13700227439403534,
 '{3, 5}': 0.14734943211078644,
 '{4, 5}': 0.12713100016117096,
 '{1, 2, 3}': 0.14815029501914978,
 '{1, 2, 4}': 0.1571362465620041,
 '{1, 2, 5}': 0.1525375097990036,
 '{1, 3, 4}': 0.1529237926006317,
 '{1, 3, 5}': 0.1571446806192398,
 '{1, 4, 5}': 0.14774493873119354,
 '{2, 3, 4}': 0.15198928117752075,
 '{2, 3, 5}': 0.15727604925632477,
 '{2, 4, 5}': 0.1485593467950821,
 '{3, 4, 5}': 0.16491344571113586,
 '{1, 2, 3, 4}': 0.15789549052715302,
 '{1, 2, 3, 5}': 0.16065672039985657,
 '{1, 2, 4, 5}': 0.15552087128162384,
 '{1, 3, 4, 5}': 0.16494202613830566,
 '{2, 3, 4, 5}': 0.16494838893413544,
 

In [18]:
phi = shapley(v, num_parties)
print(phi)

[0.049346553285916646, 0.049321537961562474, 0.012247522175312043, 0.03245804657538732, 0.021592103689908982]


In [19]:
alpha = norm(phi)
print(alpha)

[1.0, 0.9994930684579076, 0.2481940755689506, 0.6577571160304472, 0.43756052352437147]


In [20]:
vN = get_vN(v, num_parties)
print(vN)

0.16496576368808746


In [21]:
v_is = get_v_is(v, num_parties)
print(v_is)

[0.1646876484155655, 0.1645859181880951, 0.013847321271896362, 0.09602096676826477, 0.051841288805007935]


## R6

In [22]:
q, rho = get_q_rho(alpha, v_is, vN, phi, v, cond="R6")

In [23]:
rho

0.0007571734515806762

In [24]:
v_is

[0.1646876484155655,
 0.1645859181880951,
 0.013847321271896362,
 0.09602096676826477,
 0.051841288805007935]

In [25]:
#all condition
r = list(map(q, alpha))
print(r)

[0.16496576368808746, 0.16496570035239352, 0.1647917910824843, 0.16491344571113586, 0.16486255474930103]


In [26]:
num_candidate_points = 8000
gmm_clusters = [sample_GMM(means, covs, num_candidate_points) for i in range(num_clusters)]
gmm = np.array([pair[0] for pair in gmm_clusters])
clusters = np.array([pair[1] for pair in gmm_clusters])
cand_datasets = np.array([gmm[0]]*num_parties)

In [27]:
greeds = np.ones(num_parties) * 3

In [28]:
rewards, deltas, mus = reward_realization(cand_datasets, 
                                          reference_dataset, 
                                          r, 
                                          party_datasets, 
                                          kernel, 
                                          greeds=greeds,
                                          rel_tol=1e-5)

Running weighted sampling algorithm with -MMD^2 target 0.16496576368808746
Running weighted sampling algorithm with -MMD^2 target 0.16496570035239352Running weighted sampling algorithm with -MMD^2 target 0.16491344571113586

Running weighted sampling algorithm with -MMD^2 target 0.16486255474930103Running weighted sampling algorithm with -MMD^2 target 0.1647917910824843



HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=8000.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=8000.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=8000.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=8000.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=8000.0), HTML(value='')))






Process ForkPoolWorker-5:
Process ForkPoolWorker-4:
Process ForkPoolWorker-3:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/sebtsh/anaconda3/envs/CML/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/sebtsh/anaconda3/envs/CML/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/home/sebtsh/anaconda3/envs/CML/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/sebtsh/anaconda3/envs/CML/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/sebtsh/anaconda3/envs/CML/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/sebtsh/anaconda3/envs/CML/lib/python3.6/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/home/sebtsh/anaconda3/envs/CML/lib/

KeyboardInterrupt: 

In [None]:
pickle.dump((gmm, clusters, reference_dataset, cand_datasets, party_datasets, greeds, rewards, deltas, mus), open("results/CGM-GMM-rho-unequal-greed3-stable.p", "wb"))

In [None]:
class_props = []
for result in rewards:
        class_props.append(class_proportion(get_classes(np.array(result), gmm[0], clusters[0]), num_clusters))

In [None]:
class_props

In [None]:
for i in range(num_parties):
    print(mmd_neg_biased(np.concatenate([party_datasets[i], np.array(rewards[i])], axis=0), reference_dataset, kernel)[0])

In [None]:
r