-
Notifications
You must be signed in to change notification settings - Fork 8
/
__init__.py
120 lines (99 loc) · 4.44 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
"""
!!! tip "New in version 0.4.0"
This package holds all routines for the computation of Least Core data values.
Please refer to [Data valuation][computing-data-values] for an overview.
In addition to the standard interface via
[compute_least_core_values()][pydvl.value.least_core.compute_least_core_values], because computing the
Least Core values requires the solution of a linear and a quadratic problem
*after* computing all the utility values, there is the possibility of performing
each step separately. This is useful when running multiple experiments: use
[lc_prepare_problem()][pydvl.value.least_core.naive.lc_prepare_problem] or
[mclc_prepare_problem()][pydvl.value.least_core.montecarlo.mclc_prepare_problem] to prepare a
list of problems to solve, then solve them in parallel with
[lc_solve_problems()][pydvl.value.least_core.common.lc_solve_problems].
Note that [mclc_prepare_problem()][pydvl.value.least_core.montecarlo.mclc_prepare_problem] is
parallelized itself, so preparing the problems should be done in sequence in this
case. The solution of the linear systems can then be done in parallel.
"""
import warnings
from enum import Enum
from typing import Optional
from pydvl.utils.utility import Utility
from pydvl.value.least_core.montecarlo import *
from pydvl.value.least_core.naive import *
from pydvl.value.result import ValuationResult
__all__ = ["compute_least_core_values", "LeastCoreMode"]
class LeastCoreMode(Enum):
"""Available Least Core algorithms."""
MonteCarlo = "montecarlo"
Exact = "exact"
def compute_least_core_values(
u: Utility,
*,
n_jobs: int = 1,
n_iterations: Optional[int] = None,
mode: LeastCoreMode = LeastCoreMode.MonteCarlo,
non_negative_subsidy: bool = False,
solver_options: Optional[dict] = None,
**kwargs,
) -> ValuationResult:
"""Umbrella method to compute Least Core values with any of the available
algorithms.
See [Data valuation][computing-data-values] for an overview.
The following algorithms are available. Note that the exact method can only
work with very small datasets and is thus intended only for testing.
- `exact`: uses the complete powerset of the training set for the constraints
[combinatorial_exact_shapley()][pydvl.value.shapley.naive.combinatorial_exact_shapley].
- `montecarlo`: uses the approximate Monte Carlo Least Core algorithm.
Implemented in [montecarlo_least_core()][pydvl.value.least_core.montecarlo.montecarlo_least_core].
Args:
u: Utility object with model, data, and scoring function
n_jobs: Number of jobs to run in parallel. Only used for Monte Carlo
Least Core.
n_iterations: Number of subsets to sample and evaluate the utility on.
Only used for Monte Carlo Least Core.
mode: Algorithm to use. See
[LeastCoreMode][pydvl.value.least_core.LeastCoreMode] for available
options.
non_negative_subsidy: If True, the least core subsidy $e$ is constrained
to be non-negative.
solver_options: Optional dictionary of options passed to the solvers.
Returns:
Object with the computed values.
!!! tip "New in version 0.5.0"
"""
progress: bool = kwargs.pop("progress", False)
# TODO: remove this before releasing version 0.7.0
if kwargs:
warnings.warn(
DeprecationWarning(
"Passing solver options as kwargs was deprecated in 0.6.0, will "
"be removed in 0.7.0. `Use solver_options` instead."
)
)
if solver_options is None:
solver_options = kwargs
else:
solver_options.update(kwargs)
if mode == LeastCoreMode.MonteCarlo:
# TODO fix progress showing and maybe_progress in remote case
progress = False
if n_iterations is None:
raise ValueError("n_iterations cannot be None for Monte Carlo Least Core")
return montecarlo_least_core(
u=u,
n_iterations=n_iterations,
n_jobs=n_jobs,
progress=progress,
non_negative_subsidy=non_negative_subsidy,
solver_options=solver_options,
**kwargs,
)
elif mode == LeastCoreMode.Exact:
return exact_least_core(
u=u,
progress=progress,
non_negative_subsidy=non_negative_subsidy,
solver_options=solver_options,
)
raise ValueError(f"Invalid value encountered in {mode=}")