$$ \phi_i = C \sum_{S \subset D \backslash \{i\}} \frac{1}{{n-1} \choose {|S|}} [V(S \cup \{i\})-V(S)]$$

$$ \phi_i = \mathbb{E}_{\pi \sim \Pi} [V(S^i_{\pi} \cup {\{i\}})-V(S^i_{\pi})]$$

1. n = number of training samples
1. values = -np.inf * np.ones(n)
1. scores = [[] for _ in range(n)]
1. For i in (1,n):
    1. iteration = 0
    1. while iteration < max_iterations:
        1. Draw an n-permutation
        1. model.fit(samples(permutation(:index(i))))
        1. score_without = model.predict(test set)
        1. model.fit(samples(permutation(:index(i)+1)))
        1. score_with = model.predict(test set)
        1. old_moving_average = mean(scores(i))
        1. scores(i).push(score_with - score_without)
        1. new_moving_average = mean(scores(i))
        1. if abs(new_moving_average - old_moving_average) < eps then break
        1. old_moving_average = new_moving_average 
        1. iteration += 1
    1. values(i) = mean(scores(i))

In [None]:
%load_ext autoreload
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from collections import OrderedDict
from functools import partial
from sklearn import datasets
from sklearn.ensemble import GradientBoostingRegressor
from valuation.utils import Dataset

In [None]:
data = Dataset.from_sklearn(datasets.load_boston())
model = GradientBoostingRegressor()
model.fit(data.x_train, data.y_train)
predictions = model.predict(data.x_test)

In [None]:
plt.figure(figsize=(9, 6))
plt.scatter(data.y_test, predictions)
plt.plot([0, 50], [0, 50], '--k')
plt.xlabel('True')
plt.ylabel('Predicted');

In [None]:
%autoreload

In [None]:
from valuation.shapley import naive_montecarlo_shapley, truncated_montecarlo_shapley
from valuation.utils import run_and_gather, parallel_wrap
from valuation.reporting.scores import compute_fb_scores
from valuation.reporting.plots import shapley_results

# Naive MCShapley

In [None]:
max_iterations = 200
fun = partial(naive_montecarlo_shapley, model, data,
              max_iterations=max_iterations, tolerance=None)
wrapped = parallel_wrap(fun, ("indices", data.indices), num_jobs=160)
values_nmcs, hist_nmcs = run_and_gather(wrapped, num_runs=10, progress_bar=True)

In [None]:
scores_nmcs = compute_fb_scores(values_nmcs, model, x_train, y_train, x_test, y_test)

In [None]:
scores_nmcs.update({'max_iterations': max_iterations, 'score_name': "$R^2$"})
shapley_results(scores_nmcs)

# Truncated MC Shapley

In [None]:
params = {'bootstrap_iterations': 200,
          'min_scores': 10,
          'score_tolerance': 0.1,
          'min_values': 10,
          'value_tolerance': 1e-2,
          'max_iterations': 0.5*len(data)}
fun = partial(truncated_montecarlo_shapley, 
              model, data, num_workers=160, worker_progress=False, **params)

In [None]:
values_mcs, hist_mcs = run_and_gather(fun, num_runs=10, progress_bar=False)  # montecarlo_shapley already provides a bar

In [None]:
scores_nmcs = compute_fb_scores(values_nmcs, model, data)

In [None]:
scores_mcs.update({'max_iterations': params['max_iterations'], 'score_name': "$R^2$"})
shapley_results(scores_mcs)