Skip to content

Commit

Permalink
better bounds checking and ignore nan support in theil-to-gini conver…
Browse files Browse the repository at this point in the history
…sions
  • Loading branch information
gidden committed Oct 9, 2017
1 parent 25be471 commit 0c960fc
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 11 deletions.
41 changes: 30 additions & 11 deletions salamanca/ineq.py
Expand Up @@ -39,7 +39,16 @@ def _theil_empirical_constants():
return 0.216, 0.991, 0.003


def gini_to_theil(g, empirical=False):
def _check_bounds(name, ary, lb, ub, ignorenan):
bad = (ary <= lb) | (ary >= ub)
if not ignorenan:
bad |= np.isnan(ary)
if np.any(bad):
msg = '{} not within ({}, {})'
raise ValueError(msg.format(name, lb, ub))


def gini_to_theil(g, empirical=False, ignorenan=False):
r"""Translate gini to theil
.. math::
Expand All @@ -55,10 +64,10 @@ def gini_to_theil(g, empirical=False):
gini coefficient(s)
empirical : bool, optional, default: False
whether to use empirical relationship for theil
ignorenan: bool, optional, default: True
if True, throw an error if NaN is encountered in input or output
"""
if not (np.all(g > 0) and np.all(g < 1)):
raise ValueError('Gini not within (0, 1)')
_check_bounds('Gini', g, 0, 1, ignorenan=ignorenan)

s = gini_to_std(g)
t = std_to_theil(s)
Expand All @@ -67,12 +76,11 @@ def gini_to_theil(g, empirical=False):
a, b, c = _theil_empirical_constants()
t = (-b + (b ** 2 - 4 * a * (c - t)) ** 0.5) / (2 * a)

if not (np.all(t < MAX_THEIL) and np.all(t > 0)):
raise ValueError('Theil not within (0, 2.88): {}'.format(t))
_check_bounds('Theil', t, 0, MAX_THEIL, ignorenan=ignorenan)
return t


def theil_to_gini(t, empirical=False):
def theil_to_gini(t, empirical=False, ignorenan=False):
r"""Translate theil to gini
.. math::
Expand All @@ -89,20 +97,31 @@ def theil_to_gini(t, empirical=False):
empirical : bool, optional, default: False
whether to use empirical relationship for theil
"""
if not (np.all(t < MAX_THEIL) and np.all(t > 0)):
raise ValueError('Theil not within (0, 2.88): {}'.format(t))
_check_bounds('Theil', t, 0, MAX_THEIL, ignorenan=ignorenan)

if empirical:
a, b, c = _theil_empirical_constants()
t = a * t ** 2 + b * t + c
s = theil_to_std(t)
g = std_to_gini(s)

if not (np.all(g > 0) and np.all(g < 1)):
raise ValueError('Gini not within (0, 1)')
_check_bounds('Gini', g, 0, 1, ignorenan=ignorenan)
return g


# def recompose_theil(df):
# n = df['n'].sum()
# i = (df['n'] * df['i']).sum() / n
# t_b = (df['n'] * df['i'] * np.log(df['i'] / i)).sum() / (n * i)
# t_w = (df['n'] * df['i'] * df['t']).sum() / (n * i)
# return t_w + t_b


# def below_threshold(threshold, dist=None, **kwargs):
# dist = dist or LogNormal()
# return dist.below_threshold(threshold, **kwargs)


class LogNormalData(AttrObject):
"""Object for storing and updating LogNormal distribution data"""

Expand Down
18 changes: 18 additions & 0 deletions tests/test_ineq.py
@@ -1,5 +1,7 @@
import pytest

import numpy as np

from salamanca import ineq as iq

from utils import assert_almost_equal
Expand Down Expand Up @@ -65,6 +67,16 @@ def test_gini_to_theil_close_hi():
iq.gini_to_theil(0.99, empirical=True)


def test_gini_to_theil_nan():
obs = iq.gini_to_theil(np.nan, ignorenan=True)
assert np.isnan(obs)


def test_theil_to_gini_nan():
obs = iq.theil_to_gini(np.nan, ignorenan=True)
assert np.isnan(obs)


def test_gini_to_theil_error():
with pytest.raises(ValueError):
iq.gini_to_theil(-1)
Expand All @@ -78,6 +90,9 @@ def test_gini_to_theil_error():
with pytest.raises(ValueError):
iq.gini_to_theil(1.1)

with pytest.raises(ValueError):
iq.gini_to_theil(np.nan)


def test_theil_to_gini_error():
with pytest.raises(ValueError):
Expand All @@ -92,6 +107,9 @@ def test_theil_to_gini_error():
with pytest.raises(ValueError):
iq.theil_to_gini(7.0)

with pytest.raises(ValueError):
iq.theil_to_gini(np.nan)


def test_lndata_init():
data = iq.LogNormalData(inc=4.2, gini=0.5)
Expand Down

0 comments on commit 0c960fc

Please sign in to comment.