In [1]:
# import torch
# import torch.nn as nn
# import torch.optim as optim
# from torch.autograd import Variable

from tensorflow import keras
from tensorflow.keras.layers import Flatten, Dense, LeakyReLU, Dropout
import tensorflow.compat.v1 as tf

import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
from scipy.special import comb
import itertools

import sklearn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
from cvxopt import matrix, solvers, spdiag
import copy

# Baselines

## Least Core

In [2]:
def checkConstraint(A, x, e, utility):
    check = np.round(np.matmul(A, x)+e, 5) >= np.round(utility, 5)
    print('Total Constraints: %s, Satisfy: %s, Accuracy: %s' % (len(check), sum(check), sum(check)/len(check)))

def ComputeLC(X_feature, y_feature, u_tot):
    t, N = np.array(X_feature).shape
    X_feature_new = np.hstack((X_feature, np.ones((len(y_feature), 1))))

    c = matrix([0]*N+[1.0], (N+1,1), 'd')
    G = matrix(-X_feature_new, (len(y_feature), N+1), 'd' )
    h = matrix(-np.array(y_feature), (len(y_feature), 1), 'd')
    A = matrix([1.0]*N+[0], (1, N+1), 'd')
    b = matrix([u_tot], (1, 1), 'd')

    solvers.options['show_progress'] = False
    sol = solvers.lp(c, G, h, A, b)
    print(sol['status'])
    # print(sol['x'])

    x = np.array(sol['x']).reshape(len(sol['x']))[:N]
    e = np.array(sol['x']).reshape(len(sol['x']))[-1]

    return x, e

def ComputeLC_Normalize(X_feature, y_feature, u_tot, e):
    t, N = np.array(X_feature).shape

    Q = 2*matrix(np.eye(N), (N, N), 'd')
    p = matrix(np.zeros(N), (N, 1), 'd')

    G = matrix(-np.array(X_feature), (len(y_feature), N), 'd' )
    h = matrix(-np.array(y_feature)+e, (len(y_feature), 1), 'd')
    A = matrix([1.0]*N, (1, N), 'd')
    b = matrix([u_tot], (1, 1), 'd')

    solvers.options['show_progress'] = False
    sol = solvers.qp(Q, p, G, h, A, b)
    print(sol['status'])

    x = np.array(sol['x']).reshape(len(sol['x']))

    return x

## Shapley value

In [3]:
def val_to_dataind(v):
    # 利用二进制来选择所有的子集
    one_hot = np.array([int(x) for x in bin(v)[2:]])[::-1]
    return one_hot.nonzero()[0]

def dataind_to_val(arr):
    val = 0
    for i in arr:
        val += 2**i
    return val

def exact_shapley(utility_array, n_data, target_ind):
    sv = 0
    for v in range(0, 2**n_data):
        ind_set = val_to_dataind(v)
        if target_ind in ind_set:
            pass
        else:
            s = len(ind_set)
            weight = 1/n_data * (1/comb(n_data-1, s))
            v_sand = dataind_to_val(list(ind_set)+[target_ind])
            sv += weight*(utility_array[v_sand] - utility_array[v])
    return sv

In [4]:
def perm_shapley_exact(utility_array, n_data, target_ind):
    marginal_value = []
    for perm in itertools.permutations(range(n_data)):
        i = perm.index(target_ind)
        team_without = list(perm[:i])
        without_score = utility_array[int(dataind_to_val(team_without))]

        team_with = list(perm[:i+1])
        with_score = utility_array[int(dataind_to_val(team_with))]
        marginal_value.append(with_score - without_score)
    return np.average(marginal_value)

def perm_shapley_sampling(utility_array, n_data, target_ind, n_sample):
    marginal_value = []
    n_sample = int(n_sample / 2)

    X_feature, y_feature = [], []

    for _ in range(n_sample):
        perm = np.random.permutation(range(n_data))

        i = (perm==target_ind).nonzero()[0][0]

        team_without = list(perm[:i])
        v_without = int(dataind_to_val(team_without))
        without_score = utility_array[v_without]
        y_feature.append(without_score)
        ind_bin = -np.ones(n_data)
        ind_bin[val_to_dataind(v_without)] = 1
        X_feature.append(ind_bin)

        team_with = list(perm[:i+1])
        v_with = int(dataind_to_val(team_with))
        with_score = utility_array[v_with]
        y_feature.append(with_score)
        ind_bin = -np.ones(n_data)
        ind_bin[val_to_dataind(v_with)] = 1
        X_feature.append(ind_bin)

        marginal_value.append(with_score - without_score)
    return np.average(marginal_value), X_feature, y_feature

In [5]:
def grouptest_shapley_sampling(utility_array, n_data, n_sample):
    N = n_data
    Z = 2*np.sum([1/i for i in range(1, N)])
    q = [1/Z * (1/k + 1/(N-k)) for k in range(1, N)]
    u_tot = utility_array[-1]

    T = int(n_sample / 2)

    A = np.zeros((T, n_data))
    B = np.zeros(T)

    X_feature, y_feature = [], []

    for t in range(T):
        # Randomly sample size from 1,...,N-1
        num_sample_users = np.random.choice(np.arange(1, N), p=q)

        # Uniformly sample kt data points from N data points
        sampled_data_ind = np.random.choice(np.arange(N), num_sample_users, replace=False)

        A[t, sampled_data_ind] = 1
        v = int(dataind_to_val(sampled_data_ind))
        B[t] = utility_array[v]

        y_feature.append(utility_array[v])
        ind_bin = -np.ones(n_data)
        ind_bin[val_to_dataind(v)] = 1
        X_feature.append(ind_bin)

    C = {}
    for i in range(N):
        for j in range(i+1, N):
            C[(i,j)] = Z/T*(B.dot(A[:,i] - A[:,j]))

    sv_last, x, y = perm_shapley_sampling(utility_array, n_data, n_data-1, T)
    X_feature += x
    y_feature += y

    sv_approx = np.zeros(N)
    for i in range(N-1):
        sv_approx[i] = C[(i, N-1)] + sv_last
    sv_approx[N-1] = sv_last
  
    return sv_approx, X_feature, y_feature

# Utility Learning

In [6]:
def sample_utility(n, size_min, size_max, x_train, y_train, utility_func, random_state, verbose=False):

    X_feature_test = []
    y_feature_test = []

    x_train = np.array(x_train)
    y_train = np.array(y_train)

    N = len(y_train)

    np.random.seed(random_state)
    for i in range(n):
        if verbose:
            print('{} / {}'.format(i, n))
        n_select = np.random.choice(range(size_min, size_max))
        subset_index = np.random.choice(range(N), n_select, replace=False)

        y_subset = y_train[subset_index]

        if np.count_nonzero(y_subset == y_subset[0]) == len(y_subset):
            y_feature_test.append(np.sum(y_train) / N)
        else:
            y_feature_test.append(utility_func(
                x_train[subset_index], y_train[subset_index]))

        temp = np.zeros(N)
        temp[subset_index] = 1
        X_feature_test.append(temp)

    return X_feature_test, y_feature_test


def sample_learned_utility(n, size_min, size_max, x_train, y_train, model, random_state):
    X_feature_test = []
    y_feature_test = []

    x_train = np.array(x_train)
    y_train = np.array(y_train)

    N = len(y_train)

    np.random.seed(random_state)
    for _ in range(n):
        n_select = np.random.choice(range(size_min, size_max))
        subset_index = np.random.choice(range(N), n_select, replace=False)

        y_subset = y_train[subset_index]

        temp = np.zeros(N)
        temp[subset_index] = 1
        X_feature_test.append(temp)

        if np.count_nonzero(y_subset == y_subset[0]) == len(y_subset):
            y_feature_test.append(np.sum(y_train) / N)
        else:
            y_feature_test.append(model.predict(
                np.array(temp.reshape(1, temp.size, 1)))[0][0])

    return np.array(X_feature_test), np.array(y_feature_test)

In [7]:
class UtilityModel:

    def __init__(self, dataset_name, load_name=None):
        if dataset_name == 'Synthetic':
            model = tf.keras.models.Sequential()
            model.add(Flatten(input_shape=(200, 1)))
            model.add(Dense(512, activation=LeakyReLU(alpha=0.3)))
            model.add(Dropout(0.4))
            model.add(Dense(256, activation=LeakyReLU(alpha=0.3)))
            model.add(Dropout(0.4))
            model.add(Dense(32, activation=LeakyReLU(alpha=0.3)))
            model.add(Dropout(0.4))
            model.add(Dense(1, activation=tf.nn.sigmoid))
            model.compile(optimizer='adam', loss='mean_squared_error')
            if load_name:
                model.load_weights(load_name)

        elif dataset_name == 'UCI_Congress':
            model = tf.keras.models.Sequential()
            model.add(Flatten(input_shape=(N, 1)))
            model.add(Dense(2*N, activation=LeakyReLU(alpha=0.3)))
            model.add(Dropout(0.5))
            model.add(Dense(256, activation=LeakyReLU(alpha=0.3)))
            model.add(Dropout(0.5))
            model.add(Dense(128, activation=LeakyReLU(alpha=0.3)))
            model.add(Dropout(0.5))
            model.add(Dense(32, activation=LeakyReLU(alpha=0.3)))
            model.add(Dropout(0.5))
            model.add(Dense(1, activation=tf.nn.sigmoid))
            model.compile(optimizer='adam', loss='mean_absolute_error')

        elif dataset_name == 'mini':
            model = tf.keras.models.Sequential()
            model.add(Flatten(input_shape=(10, 1)))
            model.add(Dense(200, activation=LeakyReLU(alpha=0.3)))
            model.add(Dropout(0.5))
            model.add(Dense(50, activation=LeakyReLU(alpha=0.3)))
            model.add(Dropout(0.5))
            model.add(Dense(1, activation=tf.nn.sigmoid))
            if load_name:
                model.load_weights(load_name)

        elif dataset_name == 'mini-iris':
            model = tf.keras.models.Sequential()
            model.add(Flatten(input_shape=(15, 1)))
            model.add(Dense(200, activation=LeakyReLU(alpha=0.3)))
            model.add(Dropout(0.4))
            model.add(Dense(100, activation=LeakyReLU(alpha=0.3)))
            model.add(Dropout(0.4))
            model.add(Dense(50, activation=LeakyReLU(alpha=0.3)))
            model.add(Dropout(0.4))
            model.add(Dense(1, activation=tf.nn.sigmoid))
            if load_name:
                model.load_weights(load_name)

        elif dataset_name == 'mini_overfit':
            model = tf.keras.models.Sequential()
            model.add(Flatten(input_shape=(10, 1)))
            model.add(Dense(200, activation=LeakyReLU(alpha=0.3)))
            model.add(Dense(100, activation=LeakyReLU(alpha=0.3)))
            model.add(Dense(50, activation=LeakyReLU(alpha=0.3)))
            model.add(Dense(1, activation=tf.nn.sigmoid))
            if load_name:
                model.load_weights(load_name)

        else:
            print('dataset not support!')
            exit(1)
        self.model = model

    def fit(self, X_feature, y_feature, X_feature_test, y_feature_test,
            lr,
            save_name=None, verbose=0, epoch=100, batch_size=32, callback=None):

        opt = tf.keras.optimizers.Adam(learning_rate=lr)
        self.model.compile(optimizer=opt, loss='mean_squared_error')

        X_feature = np.array(X_feature)
        y_feature = np.array(y_feature)
        X_feature_test = np.array(X_feature_test)
        y_feature_test = np.array(y_feature_test)

        if len(X_feature.shape) == 2:
            t, N = X_feature.shape
            X_feature = X_feature.reshape((t, N, 1))

        if len(X_feature_test.shape) == 2:
            t, N = X_feature_test.shape
            X_feature_test = X_feature_test.reshape((t, N, 1))

        x_train_c, y_train_c, x_test_c, y_test_c = X_feature, y_feature, X_feature_test, y_feature_test
        print('Utility Learning Sample: x_train %s y_train %s' %
              (x_train_c.shape, y_train_c.shape))

        self.model.fit(x_train_c, y_train_c, batch_size=batch_size, epochs=epoch,
                       validation_data=(x_test_c, y_test_c), verbose=verbose, callbacks=callback)
        results = self.model.evaluate(
            x_test_c, y_test_c, batch_size=batch_size, verbose=1)
        print("Test Loss", results)

        if save_name:
            self.model.save_weights(save_name)

    def predict(self, X_feature):
        X_feature = np.array(X_feature)
        if len(X_feature.shape) == 2:
            t, N = X_feature.shape
            X_feature = X_feature.reshape((t, N, 1))
        return self.model.predict(X_feature)

# IRIS

In [8]:
import sklearn
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

X, y = load_iris(return_X_y=True)

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.9, random_state=42)

n_data = 15  # x_train.shape[0]

In [9]:
def svm_data_to_acc(x_train, y_train, x_val, y_val):
    if len(set(y_train)) == 1:
        return 0.5
    classifier = sklearn.svm.SVC(C=1)
    classifier.fit(x_train, y_train)
    return classifier.score(x_val, y_val)


u_tot = svm_data_to_acc(x_train, y_train, x_test, y_test)
u_tot

0.9407407407407408

15

In [44]:
utility_array = np.zeros(2**n_data)
utility_array[0] = 0.5

for v in range(1, 2**n_data):
    # print(v)
    ind = val_to_dataind(v)
    utility_array[v] = svm_data_to_acc(x_train[ind], y_train[ind], x_test, y_test)

In [16]:
import pickle

seed = 42
(x_train, y_train), (x_test, y_test), utility_array = pickle.load( open('/Users/zhtian/PycharmProjects/DataValuation/iris_Seed{}_Ntrain{}_fix.data'.format(seed, n_data), 'rb') )

### Sample Utilities

In [20]:
X_feature_total = []
y_feature_total = np.zeros(2**n_data)

for v in range(2**n_data):
    ind_bin = -np.ones(n_data)
    ind = val_to_dataind(v)
    ind_bin[ind] = 1
    X_feature_total.append(ind_bin)
    y_feature_total[v] = utility_array[v]

## Perm SV

In [21]:
true_sv = np.array([exact_shapley(utility_array, n_data, i) for i in range(n_data)])

In [24]:
n_samples = 1500

X_feature, y_feature = [], []
perm_sv = np.zeros(n_data)

for i in range(n_data):
    sv, x, y = perm_shapley_sampling(utility_array, n_data, 0, int(n_samples/n_data))
    perm_sv[i] = sv
    X_feature += x
    y_feature += y

### PermSV+DUL

In [31]:
model = UtilityModel('mini-iris', load_name=None)

ind = np.random.choice(len(y_feature_total), size=2000, replace=False)

X_feature_test, y_feature_test = np.array(
    X_feature_total)[ind], np.array(y_feature_total)[ind]

model.fit(X_feature, y_feature, X_feature_test, y_feature_test,
          lr=1e-3, save_name=None, verbose=1, epoch=600, batch_size=32)

Utility Learning Sample: x_train (1500, 15, 1) y_train (1500,)
Train on 1500 samples, validate on 2000 samples
Epoch 1/800
Epoch 2/800
Epoch 3/800
Epoch 4/800
Epoch 5/800
Epoch 6/800
Epoch 7/800
Epoch 8/800
Epoch 9/800
Epoch 10/800
Epoch 11/800
Epoch 12/800
Epoch 13/800
Epoch 14/800
Epoch 15/800
Epoch 16/800
Epoch 17/800
Epoch 18/800
Epoch 19/800
Epoch 20/800
Epoch 21/800
Epoch 22/800
Epoch 23/800
Epoch 24/800
Epoch 25/800
Epoch 26/800
Epoch 27/800
Epoch 28/800
Epoch 29/800
Epoch 30/800
Epoch 31/800
Epoch 32/800
Epoch 33/800
Epoch 34/800
Epoch 35/800
Epoch 36/800
Epoch 37/800
Epoch 38/800
Epoch 39/800
Epoch 40/800
Epoch 41/800
Epoch 42/800
Epoch 43/800
Epoch 44/800
Epoch 45/800
Epoch 46/800
Epoch 47/800
Epoch 48/800
Epoch 49/800
Epoch 50/800
Epoch 51/800
Epoch 52/800
Epoch 53/800
Epoch 54/800
Epoch 55/800
Epoch 56/800
Epoch 57/800
Epoch 58/800
Epoch 59/800
Epoch 60/800
Epoch 61/800
Epoch 62/800
Epoch 63/800
Epoch 64/800
Epoch 65/800
Epoch 66/800
Epoch 67/800
Epoch 68/800
Epoch 69/800
E

Epoch 76/800
Epoch 77/800
Epoch 78/800
Epoch 79/800
Epoch 80/800
Epoch 81/800
Epoch 82/800
Epoch 83/800
Epoch 84/800
Epoch 85/800
Epoch 86/800
Epoch 87/800
Epoch 88/800
Epoch 89/800
Epoch 90/800
Epoch 91/800
Epoch 92/800
Epoch 93/800
Epoch 94/800
Epoch 95/800
Epoch 96/800
Epoch 97/800
Epoch 98/800
Epoch 99/800
Epoch 100/800
Epoch 101/800
Epoch 102/800
Epoch 103/800
Epoch 104/800
Epoch 105/800
Epoch 106/800
Epoch 107/800
Epoch 108/800
Epoch 109/800
Epoch 110/800
Epoch 111/800
Epoch 112/800
Epoch 113/800
Epoch 114/800
Epoch 115/800
Epoch 116/800
Epoch 117/800
Epoch 118/800
Epoch 119/800
Epoch 120/800
Epoch 121/800
Epoch 122/800
Epoch 123/800
Epoch 124/800
Epoch 125/800
Epoch 126/800
Epoch 127/800
Epoch 128/800
Epoch 129/800
Epoch 130/800
Epoch 131/800
Epoch 132/800
Epoch 133/800
Epoch 134/800
Epoch 135/800
Epoch 136/800
Epoch 137/800
Epoch 138/800
Epoch 139/800
Epoch 140/800
Epoch 141/800
Epoch 142/800
Epoch 143/800
Epoch 144/800
Epoch 145/800
Epoch 146/800
Epoch 147/800
Epoch 148/800
Ep

Epoch 227/800
Epoch 228/800
Epoch 229/800
Epoch 230/800
Epoch 231/800
Epoch 232/800
Epoch 233/800
Epoch 234/800
Epoch 235/800
Epoch 236/800
Epoch 237/800
Epoch 238/800
Epoch 239/800
Epoch 240/800
Epoch 241/800
Epoch 242/800
Epoch 243/800
Epoch 244/800
Epoch 245/800
Epoch 246/800
Epoch 247/800
Epoch 248/800
Epoch 249/800
Epoch 250/800
Epoch 251/800
Epoch 252/800
Epoch 253/800
Epoch 254/800
Epoch 255/800
Epoch 256/800
Epoch 257/800
Epoch 258/800
Epoch 259/800
Epoch 260/800
Epoch 261/800
Epoch 262/800
Epoch 263/800
Epoch 264/800
Epoch 265/800
Epoch 266/800
Epoch 267/800
Epoch 268/800
Epoch 269/800
Epoch 270/800
Epoch 271/800
Epoch 272/800
Epoch 273/800
Epoch 274/800
Epoch 275/800
Epoch 276/800
Epoch 277/800
Epoch 278/800
Epoch 279/800
Epoch 280/800
Epoch 281/800
Epoch 282/800
Epoch 283/800
Epoch 284/800
Epoch 285/800
Epoch 286/800
Epoch 287/800
Epoch 288/800
Epoch 289/800
Epoch 290/800
Epoch 291/800
Epoch 292/800
Epoch 293/800
Epoch 294/800
Epoch 295/800
Epoch 296/800
Epoch 297/800
Epoch 

Epoch 377/800
Epoch 378/800
Epoch 379/800
Epoch 380/800
Epoch 381/800
Epoch 382/800
Epoch 383/800
Epoch 384/800
Epoch 385/800
Epoch 386/800
Epoch 387/800
Epoch 388/800
Epoch 389/800
Epoch 390/800
Epoch 391/800
Epoch 392/800
Epoch 393/800
Epoch 394/800
Epoch 395/800
Epoch 396/800
Epoch 397/800
Epoch 398/800
Epoch 399/800
Epoch 400/800
Epoch 401/800
Epoch 402/800
Epoch 403/800
Epoch 404/800
Epoch 405/800
Epoch 406/800
Epoch 407/800
Epoch 408/800
Epoch 409/800
Epoch 410/800
Epoch 411/800
Epoch 412/800
Epoch 413/800
Epoch 414/800
Epoch 415/800
Epoch 416/800
Epoch 417/800
Epoch 418/800
Epoch 419/800
Epoch 420/800
Epoch 421/800
Epoch 422/800
Epoch 423/800
Epoch 424/800
Epoch 425/800
Epoch 426/800
Epoch 427/800
Epoch 428/800
Epoch 429/800
Epoch 430/800
Epoch 431/800
Epoch 432/800
Epoch 433/800
Epoch 434/800
Epoch 435/800
Epoch 436/800
Epoch 437/800
Epoch 438/800
Epoch 439/800
Epoch 440/800
Epoch 441/800
Epoch 442/800
Epoch 443/800
Epoch 444/800
Epoch 445/800
Epoch 446/800
Epoch 447/800
Epoch 

Epoch 527/800
Epoch 528/800
Epoch 529/800
Epoch 530/800
Epoch 531/800
Epoch 532/800
Epoch 533/800
Epoch 534/800
Epoch 535/800
Epoch 536/800
Epoch 537/800
Epoch 538/800
Epoch 539/800
Epoch 540/800
Epoch 541/800
Epoch 542/800
Epoch 543/800
Epoch 544/800
Epoch 545/800
Epoch 546/800
Epoch 547/800
Epoch 548/800
Epoch 549/800
Epoch 550/800
Epoch 551/800
Epoch 552/800
Epoch 553/800
Epoch 554/800
Epoch 555/800
Epoch 556/800
Epoch 557/800
Epoch 558/800
Epoch 559/800
Epoch 560/800
Epoch 561/800
Epoch 562/800
Epoch 563/800
Epoch 564/800
Epoch 565/800
Epoch 566/800
Epoch 567/800
Epoch 568/800
Epoch 569/800
Epoch 570/800
Epoch 571/800
Epoch 572/800
Epoch 573/800
Epoch 574/800
Epoch 575/800
Epoch 576/800
Epoch 577/800
Epoch 578/800
Epoch 579/800
Epoch 580/800
Epoch 581/800
Epoch 582/800
Epoch 583/800
Epoch 584/800
Epoch 585/800
Epoch 586/800
Epoch 587/800
Epoch 588/800
Epoch 589/800
Epoch 590/800
Epoch 591/800
Epoch 592/800
Epoch 593/800
Epoch 594/800
Epoch 595/800
Epoch 596/800
Epoch 597/800
Epoch 

Epoch 677/800
Epoch 678/800
Epoch 679/800
Epoch 680/800
Epoch 681/800
Epoch 682/800
Epoch 683/800
Epoch 684/800
Epoch 685/800
Epoch 686/800
Epoch 687/800
Epoch 688/800
Epoch 689/800
Epoch 690/800
Epoch 691/800
Epoch 692/800
Epoch 693/800
Epoch 694/800
Epoch 695/800
Epoch 696/800
Epoch 697/800
Epoch 698/800
Epoch 699/800
Epoch 700/800
Epoch 701/800
Epoch 702/800
Epoch 703/800
Epoch 704/800
Epoch 705/800
Epoch 706/800
Epoch 707/800
Epoch 708/800
Epoch 709/800
Epoch 710/800
Epoch 711/800
Epoch 712/800
Epoch 713/800
Epoch 714/800
Epoch 715/800
Epoch 716/800
Epoch 717/800
Epoch 718/800
Epoch 719/800
Epoch 720/800
Epoch 721/800
Epoch 722/800
Epoch 723/800
Epoch 724/800
Epoch 725/800
Epoch 726/800
Epoch 727/800
Epoch 728/800
Epoch 729/800
Epoch 730/800
Epoch 731/800
Epoch 732/800
Epoch 733/800
Epoch 734/800
Epoch 735/800
Epoch 736/800
Epoch 737/800
Epoch 738/800
Epoch 739/800
Epoch 740/800
Epoch 741/800
Epoch 742/800
Epoch 743/800
Epoch 744/800
Epoch 745/800
Epoch 746/800
Epoch 747/800
Epoch 

Test Loss 0.003623871022835374


### Error Simulation

In [32]:
sampled_ind = [ int(dataind_to_val(((arr+1)/2).nonzero()[0])) for arr in X_feature ]

learned_utility_array = model.predict(X_feature_total).reshape(-1)
learned_utility_array[0:n_data+1] = utility_array[0:n_data+1]
learned_utility_array[-1] = utility_array[-1]

for v in sampled_ind:
    learned_utility_array[v] = utility_array[v]

In [41]:
dnn_sv = np.array([exact_shapley(learned_utility_array, n_data, i) for i in range(n_data)])

In [30]:
ord = 1
print('DeepSet, L{} Error:'.format(ord), np.round(np.linalg.norm(true_sv-dnn_sv, ord=ord), 3))
print('Perm, L{} Error:'.format(ord), np.round(np.linalg.norm(true_sv-perm_sv, ord=ord), 3))
# print('CGA, L{} Error:'.format(ord), np.round(np.linalg.norm(true_sv-cga_sv, ord=ord), 3))

ord = 2
print('DeepSet, L{} Error:'.format(ord), np.round(np.linalg.norm(true_sv-dnn_sv, ord=ord), 3))
print('Perm, L{} Error:'.format(ord), np.round(np.linalg.norm(true_sv-perm_sv, ord=ord), 3))
# print('CGA, L{} Error:'.format(ord), np.round(np.linalg.norm(true_sv-cga_sv, ord=ord), 3))

ord = np.inf
print('DeepSet, L{} Error:'.format(ord), np.round(np.linalg.norm(true_sv-dnn_sv, ord=ord), 3))
print('Perm, L{} Error:'.format(ord), np.round(np.linalg.norm(true_sv-perm_sv, ord=ord), 3))
# print('CGA, L{} Error:'.format(ord), np.round(np.linalg.norm(true_sv-cga_sv, ord=ord), 3))

DeepSet, L1 Error: 0.071
Perm, L1 Error: 0.499
DeepSet, L2 Error: 0.022
Perm, L2 Error: 0.249
DeepSet, Linf Error: 0.011
Perm, Linf Error: 0.18


## GroupSV

In [32]:
n_samples = 1500

group_sv, X_feature, y_feature = grouptest_shapley_sampling(utility_array, n_data, n_samples)

### GT+DUL

In [35]:
model = UtilityModel('mini-iris', load_name=None)

model.fit(X_feature, y_feature, X_feature_test, y_feature_test, lr=1e-3, save_name=None, verbose=1, epoch=800, batch_size=32)

Utility Learning Sample: x_train (1500, 15, 1) y_train (1500,)
Epoch 1/800
Epoch 2/800
Epoch 3/800
Epoch 4/800
Epoch 5/800
Epoch 6/800
Epoch 7/800
Epoch 8/800
Epoch 9/800
Epoch 10/800
Epoch 11/800
Epoch 12/800
Epoch 13/800
Epoch 14/800
Epoch 15/800
Epoch 16/800
Epoch 17/800
Epoch 18/800
Epoch 19/800
Epoch 20/800
Epoch 21/800
Epoch 22/800
Epoch 23/800
Epoch 24/800
Epoch 25/800
Epoch 26/800
Epoch 27/800
Epoch 28/800
Epoch 29/800
Epoch 30/800
Epoch 31/800
Epoch 32/800
Epoch 33/800
Epoch 34/800
Epoch 35/800
Epoch 36/800
Epoch 37/800
Epoch 38/800
Epoch 39/800
Epoch 40/800
Epoch 41/800
Epoch 42/800
Epoch 43/800
Epoch 44/800
Epoch 45/800
Epoch 46/800
Epoch 47/800
Epoch 48/800
Epoch 49/800
Epoch 50/800
Epoch 51/800
Epoch 52/800
Epoch 53/800
Epoch 54/800
Epoch 55/800
Epoch 56/800
Epoch 57/800
Epoch 58/800
Epoch 59/800
Epoch 60/800
Epoch 61/800
Epoch 62/800
Epoch 63/800
Epoch 64/800
Epoch 65/800
Epoch 66/800
Epoch 67/800
Epoch 68/800
Epoch 69/800
Epoch 70/800
Epoch 71/800
Epoch 72/800
Epoch 73/8

### Error Simulation

In [36]:
sampled_ind = [ int(dataind_to_val(((arr+1)/2).nonzero()[0])) for arr in X_feature ]

learned_utility_array = model.predict(X_feature_total).reshape(-1)
learned_utility_array[0:n_data+1] = utility_array[0:n_data+1]
learned_utility_array[-1] = utility_array[-1]

for v in sampled_ind:
    learned_utility_array[v] = utility_array[v]

In [37]:
dnn_sv = np.array([exact_shapley(learned_utility_array, n_data, i) for i in range(n_data)])

In [38]:
ord = 1
print('DeepSet, L{} Error:'.format(ord), np.round(np.linalg.norm(true_sv-dnn_sv, ord=ord), 3))
print('GT, L{} Error:'.format(ord), np.round(np.linalg.norm(true_sv-perm_sv, ord=ord), 3))
# print('CGA, L{} Error:'.format(ord), np.round(np.linalg.norm(true_sv-cga_sv, ord=ord), 3))

ord = 2
print('DeepSet, L{} Error:'.format(ord), np.round(np.linalg.norm(true_sv-dnn_sv, ord=ord), 3))
print('GT, L{} Error:'.format(ord), np.round(np.linalg.norm(true_sv-perm_sv, ord=ord), 3))
# print('CGA, L{} Error:'.format(ord), np.round(np.linalg.norm(true_sv-cga_sv, ord=ord), 3))

ord = np.inf
print('DeepSet, L{} Error:'.format(ord), np.round(np.linalg.norm(true_sv-dnn_sv, ord=ord), 3))
print('GT, L{} Error:'.format(ord), np.round(np.linalg.norm(true_sv-perm_sv, ord=ord), 3))
# print('CGA, L{} Error:'.format(ord), np.round(np.linalg.norm(true_sv-cga_sv, ord=ord), 3))

DeepSet, L1 Error: 0.066
GT, L1 Error: 0.499
DeepSet, L2 Error: 0.023
GT, L2 Error: 0.249
DeepSet, Linf Error: 0.011
GT, Linf Error: 0.18


## Least Core

In [39]:
n_samples = 1500

X_feature = []
y_feature = np.zeros(n_samples)

for i in range(n_samples):
    ind_bin = -np.ones(n_data)
    v = np.random.choice(range(1, 2**n_data))
    ind = val_to_dataind(v)
    ind_bin[ind] = 1
    X_feature.append(ind_bin)
    y_feature[i] = utility_array[v]

In [41]:
model = UtilityModel('mini-iris', load_name=None)

model.fit(X_feature, y_feature, X_feature_test, y_feature_test, lr=1e-3, save_name=None, verbose=1, epoch=800, batch_size=32)

Utility Learning Sample: x_train (1500, 15, 1) y_train (1500,)
Epoch 1/800
Epoch 2/800
Epoch 3/800
Epoch 4/800
Epoch 5/800
Epoch 6/800
Epoch 7/800
Epoch 8/800
Epoch 9/800
Epoch 10/800
Epoch 11/800
Epoch 12/800
Epoch 13/800
Epoch 14/800
Epoch 15/800
Epoch 16/800
Epoch 17/800
Epoch 18/800
Epoch 19/800
Epoch 20/800
Epoch 21/800
Epoch 22/800
Epoch 23/800
Epoch 24/800
Epoch 25/800
Epoch 26/800
Epoch 27/800
Epoch 28/800
Epoch 29/800
Epoch 30/800
Epoch 31/800
Epoch 32/800
Epoch 33/800
Epoch 34/800
Epoch 35/800
Epoch 36/800
Epoch 37/800
Epoch 38/800
Epoch 39/800
Epoch 40/800
Epoch 41/800
Epoch 42/800
Epoch 43/800
Epoch 44/800
Epoch 45/800
Epoch 46/800
Epoch 47/800
Epoch 48/800
Epoch 49/800
Epoch 50/800
Epoch 51/800
Epoch 52/800
Epoch 53/800
Epoch 54/800
Epoch 55/800
Epoch 56/800
Epoch 57/800
Epoch 58/800
Epoch 59/800
Epoch 60/800
Epoch 61/800
Epoch 62/800
Epoch 63/800
Epoch 64/800
Epoch 65/800
Epoch 66/800
Epoch 67/800
Epoch 68/800
Epoch 69/800
Epoch 70/800
Epoch 71/800
Epoch 72/800
Epoch 73/8

In [42]:
sampled_ind = [int(dataind_to_val(((arr+1)/2).nonzero()[0]))
               for arr in X_feature]

learned_utility_array = model.predict(X_feature_total).reshape(-1)
learned_utility_array[0:n_data+1] = utility_array[0:n_data+1]
learned_utility_array[-1] = utility_array[-1]

for v in sampled_ind:
    learned_utility_array[v] = utility_array[v]

In [46]:
u_tot = utility_array[-1]
true_lc, true_e = ComputeLC(X_feature_total, y_feature_total, u_tot)
true_lc = ComputeLC_Normalize(X_feature_total, y_feature_total, u_tot, true_e)

dnn_lc, dnn_e = ComputeLC(X_feature_total, np.float64(learned_utility_array), u_tot)
dnn_lc = ComputeLC_Normalize(X_feature_total, np.float64(learned_utility_array), u_tot, dnn_e)
checkConstraint(X_feature_total, dnn_lc, true_e, y_feature_total)

sample_lc, sample_e = ComputeLC(X_feature, y_feature, u_tot)
sample_lc = ComputeLC_Normalize(X_feature, y_feature, u_tot, sample_e)
checkConstraint(X_feature_total, sample_lc, true_e, y_feature_total)

optimal
unknown
optimal
unknown
Total Constraints: 32768, Satisfy: 32718, Accuracy: 0.99847412109375
optimal
unknown
Total Constraints: 32768, Satisfy: 32738, Accuracy: 0.99908447265625


In [47]:
ord = 1
print('DeepSet, L{} Error:'.format(ord), np.round(np.linalg.norm(true_lc-dnn_lc, ord=ord), 3))
print('MC, L{} Error:'.format(ord), np.round(np.linalg.norm(true_lc-sample_lc, ord=ord), 3))

ord = 2
print('DeepSet, L{} Error:'.format(ord), np.round(np.linalg.norm(true_lc-dnn_lc, ord=ord), 3))
print('MC, L{} Error:'.format(ord), np.round(np.linalg.norm(true_lc-sample_lc, ord=ord), 3))

ord = np.inf
print('DeepSet, L{} Error:'.format(ord), np.round(np.linalg.norm(true_lc-dnn_lc, ord=ord), 3))
print('MC, L{} Error:'.format(ord), np.round(np.linalg.norm(true_lc-sample_lc, ord=ord), 3))

DeepSet, L1 Error: 0.17
MC, L1 Error: 0.48
DeepSet, L2 Error: 0.061
MC, L2 Error: 0.163
DeepSet, Linf Error: 0.042
MC, Linf Error: 0.082
