# Extra Part

> Updated at 2021/06/21 15:00

This part contains two parts, including adding one point and adding two points using pivot-based algorithm with the same/different permutations.

*Attention:* pivot-based algorithm with the same/different permutations will be called `pivot_s` and `pivot_d` below.

There are 3 cases which are considered in this experiment.

| No. | lsv permutations | rsv permutations | whose variance better |
| - | - | - | - |
| 0 | `pivot_s`  = `pivot_d` | `pivot_s`  = `pivot_d` | `pivot_s` |
| 1 | `pivot_s`  < `pivot_d` | `pivot_s`  = `pivot_d` | `pivot_d` |
| 2 | `pivot_s`  = `pivot_d` | `pivot_s`  < `pivot_d` | `pivot_d` |

## 0 Prepare environment

In [None]:
import dynashap

from sklearn import svm
import numpy as np
from examples.data_utils import (
    load_tabular_data, preprocess_data, variance, normalize, save_npy, load_npy, comp
)

np.set_printoptions(precision=10)
model = svm.SVC(decision_function_shape='ovo')

repeat = 2
success_req = 2

In [None]:
# Get data from parent folders
!cp -r ../data_files .

## 1 Adding a single point

In [None]:
# read
x_train, y_train, x_test, y_test, _ = preprocess_data('train_100p_3c.csv',
                                                      'test_100p_3c.csv')

# pick a point from test for add
select_idx = 25

add_point_x = x_test[select_idx]
add_point_y = y_test[select_idx]

x_train_added = np.append(x_train, [x_test[select_idx,:]], axis=0)
y_train_added = np.append(y_train, y_test[select_idx])

x_test = np.delete(x_test, select_idx, axis=0)
y_test = np.delete(y_test, select_idx)

mc = dynashap.mc_shap(x_train_added, y_train_added, x_test, y_test, model, m=1000 * 100, proc_num=2)
save_npy('mc_plus_sv_101.npy', mc)

# Case 0
pivot_shap = dynashap.PivotShap(x_train, y_train, x_test, y_test, model, None)
pivot_shap.prepare(100 * 100, proc_num=4)
pivot_sv_s = pivot_shap.add_single_point(add_point_x, add_point_y, proc_num=4,
                                         flags={'flag_lsv': False},
                                         params={'method': 's'})
pivot_sv_d = pivot_shap.add_single_point(add_point_x, add_point_y, m=100 * 100,
                                         proc_num=4, flags={'flag_lsv': False},
                                         params={'method': 'd'})

save_npy('eas_pivot_sv_s_tau100.npy', pivot_sv_s)
save_npy('eas_pivot_sv_d_tau100_100.npy', pivot_sv_d)

pivot_sv_s = load_npy('eas_pivot_sv_s_tau100.npy')
pivot_sv_d = load_npy('eas_pivot_sv_d_tau100_100.npy')


# Case 1
start_time = 120
step = 10

current_time = 120
mc_plus_sv = load_npy('mc_plus_sv_101.npy')
mc_plus_sv = mc_plus_sv / np.sum(mc_plus_sv)
base_var = variance(mc_plus_sv, pivot_sv_s / np.sum(pivot_sv_s))
print(base_var, variance(mc_plus_sv, pivot_sv_d / np.sum(pivot_sv_d)))
run = True
while run:
    # Start from the start times
    success_cnt = 0
    var_list = list()
    for _ in range(repeat):
        pivot_shap = dynashap.PivotShap(x_train, y_train, x_test, y_test, model, None)
        pivot_shap.prepare(current_time * len(y_train), proc_num=2)
        pivot_sv_d = pivot_shap.add_single_point(add_point_x, add_point_y, m=100 * len(y_train),
                                                 proc_num=2, flags={'flag_lsv': False},
                                                 params={'method': 'd'})
        current_var = variance(mc_plus_sv / np.sum(mc_plus_sv),
                               pivot_sv_d / np.sum(pivot_sv_d))
        print('current var\t', current_var)
        var_list.append(current_var)
        if current_var < base_var:
            success_cnt += 1

    if success_cnt >= success_req:
        print('[info] case 1 terminated - success')
        print('current time\t', current_time)
        print('base var\t', base_var)
        print('var list\t', var_list)
        run = False
    else:
        print('[info] case 1 - go on')
        print('current time\t', current_time)
        print('base var\t', base_var)
        print('var list\t', var_list)
        current_time += step

    # No more than 10 steps
    if (current_time - start_time) / step > 10:
        run = False

# Case 2

start_time = 140
step = 10

current_time = 140
mc_plus_sv = load_npy('mc_plus_sv_101.npy')
mc_plus_sv = mc_plus_sv / np.sum(mc_plus_sv)
base_var = variance(mc_plus_sv, pivot_sv_s / np.sum(pivot_sv_s))
run = True
while run:
    # Start from the start times
    success_cnt = 0
    var_list = list()
    for _ in range(repeat):
        pivot_shap = dynashap.PivotShap(x_train, y_train, x_test, y_test, model, None)
        pivot_shap.prepare(100 * len(y_train), proc_num=2)
        pivot_sv_d = pivot_shap.add_single_point(add_point_x, add_point_y, m=current_time * len(y_train),
                                                 proc_num=2, flags={'flag_lsv': False},
                                                 params={'method': 'd'})
        current_var = variance(mc_plus_sv / np.sum(mc_plus_sv),
                               pivot_sv_d / np.sum(pivot_sv_d))
        var_list.append(current_var)
        if current_var < base_var:
            success_cnt += 1

    if success_cnt >= success_req:
        print('[info] ')
        print('current time\t', current_time)
        print('base var\t', base_var)
        print('var list\t', var_list)
        run = False
    else:
        current_time += step

    # No more than 10 steps
    if (current_time - start_time) / step > 10:
        run = False

## 2 Adding multi points

In [None]:
x_train, y_train, x_test, y_test, _ = preprocess_data('train_80p_2c.csv',
                                                      'test_80p_2c.csv')

# Select two features
x_train = x_train[:,:2]
x_test = x_test[:,:2]

add_point_idxs = [5, 13]

# Pick two points from test for add
add_points_x = x_test[add_point_idxs]
add_points_y = y_test[add_point_idxs]

x_train_added = np.append(x_train, add_points_x, axis=0)
y_train_added = np.append(y_train, add_points_y)

x_test_added = np.delete(x_test, add_point_idxs, axis=0)
y_test_added = np.delete(y_test, add_point_idxs)

x_test = x_test_added
y_test = y_test_added

mc = dynashap.mc_shap(x_train_added, y_train_added, x_test, y_test, model, m=1000 * 80, proc_num=2)
save_npy('mc_plus_sv_82.npy', mc)

# Case 0
pivot_shap = dynashap.PivotShap(x_train, y_train, x_test, y_test, model, None)
pivot_shap.prepare(100 * 80, proc_num=2)
pivot_shap.add_single_point(add_points_x[0], add_point_y[0], proc_num=2,
                            flags={'flag_lsv': True},
                            params={'method': 's'})
pivot_sv_s = pivot_shap.add_single_point(add_point_x[1], add_point_y[1], proc_num=2,
                                         flags={'flag_lsv': True},
                                         params={'method': 's'})

pivot_shap = dynashap.PivotShap(x_train, y_train, x_test, y_test, model, None)
pivot_shap.prepare(100 * 80, proc_num=2)
pivot_shap.add_single_point(add_points_x[0], add_points_y[0], m=100 * 80,
                            proc_num=2, flags={'flag_lsv': True},
                            params={'method': 'd'})
pivot_sv_d = pivot_shap.add_single_point(add_points_x[1], add_points_y[1], m=100 * 80,
                                         proc_num=2, flags={'flag_lsv': True},
                                         params={'method': 'd'})

save_npy('eam_pivot_sv_s_tau100.npy', pivot_sv_s)
save_npy('eam_pivot_sv_d_tau100_100.npy', pivot_sv_d)

# Case 1
start_time = 110
step = 10

current_time = 110
mc_plus_sv = load_npy('mc_plus_sv_82.npy')
mc_plus_sv = mc_plus_sv / np.sum(mc_plus_sv)
base_var = variance(mc_plus_sv, pivot_sv_s / np.sum(pivot_sv_s))
run = True
while run:
    # Start from the start times
    success_cnt = 0
    var_list = list()
    for _ in range(repeat):
        pivot_shap = dynashap.PivotShap(x_train, y_train, x_test, y_test, model, None)
        pivot_shap.prepare(current_time * len(y_train), proc_num=2)
        pivot_shap.add_single_point(add_points_x[0], add_points_y[0], m=100 * 80,
                                    proc_num=2, flags={'flag_lsv': True},
                                    params={'method': 'd'})
        pivot_sv_d = pivot_shap.add_single_point(add_points_x[1], add_points_y[1], m=100 * 80,
                                                 proc_num=2, flags={'flag_lsv': True},
                                                 params={'method': 'd'})
        current_var = variance(mc_plus_sv, pivot_sv_d)
        var_list.append(current_var)
        if current_var < base_var:
            success_cnt += 1

    if success_cnt >= success_req:
        print('current time\t', current_time)
        print('base var\t', base_var)
        print('var list\t', var_list)
        run = False
    else:
        current_time += step

    # No more than 10 steps
    if (current_time - start_time) / step > 10:
        run = False

# Case 2

start_time = 110
step = 10

current_time = 110
mc_plus_sv = load_npy('mc_plus_sv_82.npy')
mc_plus_sv = mc_plus_sv / np.sum(mc_plus_sv)
base_var = variance(mc_plus_sv, pivot_sv_s / np.sum(pivot_sv_s))
run = True
while run:
    # Start from the start times
    success_cnt = 0
    var_list = list()
    for _ in range(repeat):
        pivot_shap = dynashap.PivotShap(x_train, y_train, x_test, y_test, model, None)
        pivot_shap.prepare(100 * len(y_train), proc_num=2)
        pivot_shap.add_single_point(add_points_x[0], add_points_y[0], m=current_time * 80,
                                    proc_num=2, flags={'flag_lsv': True},
                                    params={'method': 'd'})
        pivot_sv_d = pivot_shap.add_single_point(add_points_x[1], add_points_y[1], m=current_time * 80,
                                                 proc_num=2, flags={'flag_lsv': True},
                                                 params={'method': 'd'})
        current_var = variance(base_var / np.sum(base_var),
                               pivot_sv_d / np.sum(pivot_sv_d))
        var_list.append(current_var)
        if current_var < base_var:
            success_cnt += 1

    if success_cnt >= success_req:
        print('current time\t', current_time)
        print('base var\t', base_var)
        print('var list\t', var_list)
        run = False
    else:
        current_time += step

    # No more than 10 steps
    if (current_time - start_time) / step > 10:
        run = False

In [None]:

x_train, y_train, x_test, y_test, _ = preprocess_data('train_80p_2c.csv',
                                                      'test_80p_2c.csv')

# Select two features
x_train = x_train[:,:2]
x_test = x_test[:,:2]

add_point_idxs = [5, 13]

# Pick two points from test for add
add_points_x = x_test[add_point_idxs]
add_points_y = y_test[add_point_idxs]

x_train_added = np.append(x_train, add_points_x, axis=0)
y_train_added = np.append(y_train, add_points_y)

x_test_added = np.delete(x_test, add_point_idxs, axis=0)
y_test_added = np.delete(y_test, add_point_idxs)

x_test = x_test_added
y_test = y_test_added
#
# mc = dynashap.mc_shap(x_train_added, y_train_added, x_test, y_test, model, m=1000 * 80, proc_num=2)
# save_npy('mc_plus_sv_82.npy', mc)

# Case 0
pivot_shap = dynashap.PivotShap(x_train, y_train, x_test, y_test, model, None)
pivot_shap.prepare(100 * 80, proc_num=2)
pivot_shap.add_single_point(add_points_x[0], add_points_y[0], proc_num=2,
                            flags={'flag_lsv': True},
                            params={'method': 's'})
pivot_sv_s = pivot_shap.add_single_point(add_points_x[1], add_points_y[1], proc_num=2,
                                         flags={'flag_lsv': True},
                                         params={'method': 's'})

pivot_shap = dynashap.PivotShap(x_train, y_train, x_test, y_test, model, None)
pivot_shap.prepare(100 * 80, proc_num=2)
pivot_shap.add_single_point(add_points_x[0], add_points_y[0], m=100 * 80,
                            proc_num=2, flags={'flag_lsv': True},
                            params={'method': 'd'})
pivot_sv_d = pivot_shap.add_single_point(add_points_x[1], add_points_y[1], m=100 * 80,
                                         proc_num=2, flags={'flag_lsv': True},
                                         params={'method': 'd'})

save_npy('eam_pivot_sv_s_tau100.npy', pivot_sv_s)
save_npy('eam_pivot_sv_d_tau100_100.npy', pivot_sv_d)

# Case 1
start_time = 110
step = 10

current_time = 110
mc_plus_sv = load_npy('mc_plus_sv_82.npy')
mc_plus_sv = mc_plus_sv / np.sum(mc_plus_sv)
base_var = variance(mc_plus_sv, pivot_sv_s / np.sum(pivot_sv_s))
print(base_var)

In [None]:

mc_plus_sv = load_npy('mc_plus_sv_82.npy')

In [None]:
mc_plus_sv