$$ \phi_i = C \sum_{S \subset D \backslash \{i\}} \frac{1}{{n-1} \choose {|S|}} [V(S \cup \{i\})-V(S)]$$

$$ \phi_i = \mathbb{E}_{\pi \sim \Pi} [V(S^i_{\pi} \cup {\{i\}})-V(S^i_{\pi})]$$

1. n = number of training samples
1. values = -np.inf * np.ones(n)
1. scores = [[] for _ in range(n)]
1. For i in (1,n):
    1. iteration = 0
    1. while iteration < max_iterations:
        1. Draw an n-permutation
        1. model.fit(samples(permutation(:index(i))))
        1. score_without = model.predict(test set)
        1. model.fit(samples(permutation(:index(i)+1)))
        1. score_with = model.predict(test set)
        1. old_moving_average = mean(scores(i))
        1. scores(i).push(score_with - score_without)
        1. new_moving_average = mean(scores(i))
        1. if abs(new_moving_average - old_moving_average) < eps then break
        1. old_moving_average = new_moving_average 
        1. iteration += 1
    1. values(i) = mean(scores(i))

In [None]:
%load_ext autoreload
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from collections import OrderedDict
from functools import partial
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor

In [None]:
data = datasets.load_boston()
x_train, x_test, y_train, y_test = train_test_split(data.data, data.target)
x_train = pd.DataFrame(x_train, columns=data.feature_names)
y_train = pd.DataFrame(y_train, columns=['target'])
x_test = pd.DataFrame(x_test, columns=data.feature_names)
y_test = pd.DataFrame(y_test, columns=['target'])

In [None]:
model = GradientBoostingRegressor()
model.fit(x_train, y_train)
predictions = model.predict(x_test)

In [None]:
plt.figure(figsize=(9, 6))
plt.scatter(y_test, predictions)
plt.plot([0, 50], [0, 50], '--k')
plt.xlabel('True')
plt.ylabel('Predicted');

In [None]:
from valuation.shapley.data import naive_montecarlo_shapley, montecarlo_shapley
from valuation.parallel import run_and_gather, compute_fb_scores
from valuation.reporting.plots import shapley_results

from sklearn.metrics import mean_absolute_error
from joblib import Parallel, delayed
from tqdm import tqdm

In [None]:
indices = list(range(0, len(x_train)))
max_iterations = 200
fun = partial(naive_montecarlo_shapley, model, model.score,
              x_train, y_train, x_test, y_test,
              max_iterations=max_iterations, tolerance=None)
values = run_and_gather(fun, ("indices", indices), num_jobs=160, num_runs=10)

In [None]:
results = compute_fb_scores(values, model, x_train, y_train, x_test, y_test)

In [None]:
results.update({'max_iterations': max_iterations, "score_name": "$R^2$"})
shapley_results(results)