Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove caches due to excessive memory use #317

Merged
merged 2 commits into from
Jul 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 8 additions & 0 deletions aif360/detectors/mdss/MDSS.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,14 @@ def scan(self, coordinates: pd.DataFrame, expectations: pd.Series, outcomes: pd.
"""
np.random.seed(seed)

# Reset indexes
coordinates = coordinates.reset_index(drop = True)
expectations = expectations.reset_index(drop = True)
outcomes = outcomes.reset_index(drop = True)

assert len(coordinates) == len(expectations) == len(outcomes), \
f'Lengths of coordinates, expectations, and outcomes should be equal.'

# Check that the appropriate scoring function is used

if isinstance(self.scoring_function, BerkJones):
Expand Down
25 changes: 0 additions & 25 deletions aif360/detectors/mdss/ScoringFunctions/BerkJones.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,6 @@ def score(self, observed_sum: float, expectations: np.array, penalty: float, q:
"""
alpha = self.alpha

key = tuple([observed_sum, len(expectations), penalty, q, alpha])
ans = self.score_cache.get(key)
if ans is not None:
self.cache_counter['score'] += 1
return ans

if q < alpha:
q = alpha

Expand All @@ -57,7 +51,6 @@ def score(self, observed_sum: float, expectations: np.array, penalty: float, q:
)
if q == 1:
ans = observed_sum * np.log(q / alpha) - penalty
self.score_cache[key] = ans
return ans

a = observed_sum * np.log(q / alpha)
Expand All @@ -68,7 +61,6 @@ def score(self, observed_sum: float, expectations: np.array, penalty: float, q:
- penalty
)

self.score_cache[key] = ans
return ans

def qmle(self, observed_sum: float, expectations: np.array):
Expand All @@ -81,24 +73,15 @@ def qmle(self, observed_sum: float, expectations: np.array):
:return: q MLE
"""
alpha = self.alpha

key = tuple([observed_sum, len(expectations), alpha])
ans = self.qmle_cache.get(key)
if ans is not None:
self.cache_counter['qmle'] += 1
return ans

if len(expectations) == 0:
self.qmle_cache[key] = 0
return 0
else:
q = observed_sum / len(expectations)

if (q < alpha):
self.qmle_cache[key] = alpha
return alpha

self.qmle_cache[key] = q
return q

def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float):
Expand All @@ -110,13 +93,6 @@ def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float
:param penalty: penalty coefficient
"""
alpha = self.alpha

key = tuple([observed_sum, len(expectations), penalty, alpha])
ans = self.compute_qs_cache.get(key)
if ans is not None:
self.cache_counter['qs'] += 1
return ans

q_mle = self.qmle(observed_sum, expectations)

if self.score(observed_sum, expectations, penalty, q_mle) > 0:
Expand All @@ -134,5 +110,4 @@ def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float
q_max = 0

ans = [exist, q_mle, q_min, q_max]
self.compute_qs_cache[key] = ans
return ans
32 changes: 1 addition & 31 deletions aif360/detectors/mdss/ScoringFunctions/Bernoulli.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,7 @@ def score(self, observed_sum: float, expectations: np.array, penalty: float, q:
% (observed_sum, len(expectations), penalty, q)
)

key = tuple([observed_sum, expectations.tostring(), penalty, q])
ans = self.score_cache.get(key)
if ans is not None:
self.cache_counter['score'] += 1
return ans

ans = observed_sum * np.log(q) - np.log(1 - expectations + q * expectations).sum() - penalty
self.score_cache[key] = ans
return ans

def qmle(self, observed_sum: float, expectations: np.array):
Expand All @@ -50,16 +43,8 @@ def qmle(self, observed_sum: float, expectations: np.array):
:param observed_sum: sum of observed binary outcomes for all i
:param expectations: predicted outcomes for each data element i
"""
direction = self.direction

key = tuple([observed_sum, expectations.tostring()])
ans = self.qmle_cache.get(key)
if ans is not None:
self.cache_counter['qmle'] += 1
return ans

direction = self.direction
ans = optim.bisection_q_mle(self, observed_sum, expectations, direction=direction)
self.qmle_cache[key] = ans
return ans

def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float):
Expand All @@ -71,13 +56,6 @@ def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float
:param penalty: penalty coefficient
"""
direction = self.direction

key = tuple([observed_sum, expectations.tostring(), penalty])
ans = self.compute_qs_cache.get(key)
if ans is not None:
self.cache_counter['qs'] += 1
return ans

q_mle = self.qmle(observed_sum, expectations)

if self.score(observed_sum, expectations, penalty, q_mle) > 0:
Expand All @@ -95,7 +73,6 @@ def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float
exist, q_min, q_max = optim.direction_assertions(direction, q_min, q_max)

ans = [exist, q_mle, q_min, q_max]
self.compute_qs_cache[key] = ans
return ans

def q_dscore(self, observed_sum:float, expectations:np.array, q:float):
Expand All @@ -110,12 +87,5 @@ def q_dscore(self, observed_sum:float, expectations:np.array, q:float):
:param q: current value of q
:return: q dscore/dq
"""
key = tuple([observed_sum, expectations.tostring(), q])
ans = self.qdscore_cache.get(key)
if ans is not None:
self.cache_counter['qdscore'] += 1
return ans

ans = observed_sum - (q * expectations / (1 - expectations + q * expectations)).sum()
self.qdscore_cache[key] = ans
return ans
24 changes: 1 addition & 23 deletions aif360/detectors/mdss/ScoringFunctions/Gaussian.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,6 @@ def score(
:return: bias score for the current value of q
"""

key = tuple([observed_sum, expectations.sum(), penalty, q])
ans = self.score_cache.get(key)
if ans is not None:
self.cache_counter["score"] += 1
return ans

assumed_var = self.var
expected_sum = expectations.sum()
penalty /= self.var
Expand All @@ -56,20 +50,13 @@ def score(
ans = 0

ans -= penalty
self.score_cache[key] = ans

return ans

def qmle(self, observed_sum: float, expectations: np.array):
"""
Computes the q which maximizes score (q_mle).
"""
key = tuple([observed_sum, expectations.sum()])
ans = self.qmle_cache.get(key)
if ans is not None:
self.cache_counter["qmle"] += 1
return ans

expected_sum = expectations.sum()

# Deals with case where observed_sum = expected_sum = 0
Expand All @@ -78,8 +65,7 @@ def qmle(self, observed_sum: float, expectations: np.array):
else:
ans = observed_sum / expected_sum

assert np.isnan(ans) == False, f'{expected_sum}, {observed_sum}, {ans}'
self.qmle_cache[key] = ans
assert np.isnan(ans) == False, f'{expected_sum}, {observed_sum}, {ans}'
return ans

def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float):
Expand All @@ -94,13 +80,6 @@ def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float
direction = self.direction

q_mle = self.qmle(observed_sum, expectations)

key = tuple([observed_sum, expectations.sum(), penalty])
ans = self.compute_qs_cache.get(key)
if ans is not None:
self.cache_counter["qs"] += 1
return ans

q_mle_score = self.score(observed_sum, expectations, penalty, q_mle)

if q_mle_score > 0:
Expand All @@ -118,5 +97,4 @@ def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float
exist, q_min, q_max = optim.direction_assertions(direction, q_min, q_max)

ans = [exist, q_mle, q_min, q_max]
self.compute_qs_cache[key] = ans
return ans
29 changes: 0 additions & 29 deletions aif360/detectors/mdss/ScoringFunctions/Poisson.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,30 +32,16 @@ def score(self, observed_sum: float, expectations: np.array, penalty: float, q:
"observed_sum=%.2f, expectations of length=%d, penalty=%.2f, q=%.2f"
% (observed_sum, len(expectations), penalty, q)
)
key = tuple([observed_sum, expectations.sum(), penalty, q])
ans = self.score_cache.get(key)
if ans is not None:
self.cache_counter['score'] += 1
return ans

ans = observed_sum * np.log(q) + (expectations - q * expectations).sum() - penalty
self.score_cache[key] = ans
return ans

def qmle(self, observed_sum: float, expectations: np.array):
"""
Computes the q which maximizes score (q_mle).
"""
direction = self.direction

key = tuple([observed_sum, expectations.sum()])
ans = self.qmle_cache.get(key)
if ans is not None:
self.cache_counter['qmle'] += 1
return ans

ans = optim.bisection_q_mle(self, observed_sum, expectations, direction=direction)
self.qmle_cache[key] = ans
return ans

def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float):
Expand All @@ -68,15 +54,8 @@ def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float
"""

direction = self.direction

q_mle = self.qmle(observed_sum, expectations)

key = tuple([observed_sum, expectations.tostring(), penalty])
ans = self.compute_qs_cache.get(key)
if ans is not None:
self.cache_counter['qs'] += 1
return ans

if self.score(observed_sum, expectations, penalty, q_mle) > 0:
exist = 1
q_min = optim.bisection_q_min(self, observed_sum, expectations, penalty, q_mle)
Expand All @@ -92,7 +71,6 @@ def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float
exist, q_min, q_max = optim.direction_assertions(direction, q_min, q_max)

ans = [exist, q_mle, q_min, q_max]
self.compute_qs_cache[key] = ans
return ans

def q_dscore(self, observed_sum, expectations, q):
Expand All @@ -107,12 +85,5 @@ def q_dscore(self, observed_sum, expectations, q):
:param q: current value of q
:return: q dscore/dq
"""
key = tuple([observed_sum, expectations.sum(), q])
ans = self.qdscore_cache.get(key)
if ans is not None:
self.cache_counter['qdscore'] += 1
return ans

ans = observed_sum - (q * expectations).sum()
self.qdscore_cache[key] = ans
return ans
9 changes: 0 additions & 9 deletions aif360/detectors/mdss/ScoringFunctions/ScoringFunction.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,11 @@ def __init__(self, **kwargs):
Journal of Computational and Graphical Statistics, 25(2), 382-404.
"""
self.kwargs = kwargs
self._reset()
self.direction = kwargs.get('direction')

directions = ['positive', 'negative']
assert self.direction in directions, f"Expected one of {directions}, got {self.direction}"

def _reset(self):
self.score_cache = {}
self.dscore_cache = {}
self.qdscore_cache = {}
self.qmle_cache = {}
self.compute_qs_cache = {}
self.cache_counter = {"score": 0, "dscore": 0, "qdscore": 0, "qmle": 0, "qs": 0}

def score(
self, observed_sum: float, expectations: np.array, penalty: float, q: float
):
Expand Down
14 changes: 7 additions & 7 deletions examples/demo_mdss_detector.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@
{
"data": {
"text/plain": [
"'Our detected priviledged group has a size of 147, we observe 0.5374149659863946 as the average risk of recidivism, but our model predicts 0.3827815971689547'"
"'Our detected priviledged group has a size of 147, we observe 0.5374149659863946 as the average risk of recidivism, but our model predicts 0.38278159716895366'"
]
},
"execution_count": 12,
Expand Down Expand Up @@ -379,7 +379,7 @@
{
"data": {
"text/plain": [
"'Our detected priviledged group has a size of 732, we observe 0.3770491803278688 as the average risk of recidivism, but our model predicts 0.44470388217799317'"
"'Our detected priviledged group has a size of 732, we observe 0.3770491803278688 as the average risk of recidivism, but our model predicts 0.4447038821779929'"
]
},
"execution_count": 14,
Expand Down Expand Up @@ -784,7 +784,7 @@
{
"data": {
"text/plain": [
"'Our detected privileged group has a size of 321, we observe 7844.840295856697 as the mean insurance costs, but our model predicts 5420.49326277455'"
"'Our detected privileged group has a size of 321, we observe 7844.8402958566985 as the mean insurance costs, but our model predicts 5420.493262774548'"
]
},
"execution_count": 28,
Expand All @@ -809,7 +809,7 @@
{
"data": {
"text/plain": [
"'Our detected privileged group has a size of 115, we observe 21148.37389617392 as the mean insurance costs, but our model predicts 29694.035319112852'"
"'Our detected privileged group has a size of 115, we observe 21148.373896173915 as the mean insurance costs, but our model predicts 29694.035319112845'"
]
},
"execution_count": 29,
Expand Down Expand Up @@ -1152,7 +1152,7 @@
{
"data": {
"text/plain": [
"'Our detected privileged group has a size of 31607, we observe 5.155584909121915 as the mean temperature, but our model predicts 11.932678437519867'"
"'Our detected privileged group has a size of 31607, we observe 5.155584909121934 as the mean temperature, but our model predicts 11.93267843751985'"
]
},
"execution_count": 42,
Expand All @@ -1176,7 +1176,7 @@
{
"data": {
"text/plain": [
"'Our detected unprivileged group has a size of 55642, we observe 16.773802762911167 as the mean temperature, but our model predicts 11.932678437519867'"
"'Our detected unprivileged group has a size of 55642, we observe 16.773802762911078 as the mean temperature, but our model predicts 11.93267843751985'"
]
},
"execution_count": 43,
Expand Down Expand Up @@ -1533,7 +1533,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
"version": "3.8.12"
}
},
"nbformat": 4,
Expand Down