better bounds checking and ignore nan support in theil-to-gini conver…

…sions
gidden · Oct 9, 2017 · 0c960fc · 0c960fc
1 parent 25be471
commit 0c960fc
Show file tree

Hide file tree

Showing 2 changed files with 48 additions and 11 deletions.
diff --git a/salamanca/ineq.py b/salamanca/ineq.py
@@ -39,7 +39,16 @@ def _theil_empirical_constants():
     return 0.216, 0.991, 0.003
 
 
-def gini_to_theil(g, empirical=False):
+def _check_bounds(name, ary, lb, ub, ignorenan):
+    bad = (ary <= lb) | (ary >= ub)
+    if not ignorenan:
+        bad |= np.isnan(ary)
+    if np.any(bad):
+        msg = '{} not within ({}, {})'
+        raise ValueError(msg.format(name, lb, ub))
+
+
+def gini_to_theil(g, empirical=False, ignorenan=False):
     r"""Translate gini to theil
 
     .. math::
@@ -55,10 +64,10 @@ def gini_to_theil(g, empirical=False):
         gini coefficient(s)
     empirical : bool, optional, default: False
         whether to use empirical relationship for theil
-
+    ignorenan: bool, optional, default: True
+        if True, throw an error if NaN is encountered in input or output
     """
-    if not (np.all(g > 0) and np.all(g < 1)):
-        raise ValueError('Gini not within (0, 1)')
+    _check_bounds('Gini', g, 0, 1, ignorenan=ignorenan)
 
     s = gini_to_std(g)
     t = std_to_theil(s)
@@ -67,12 +76,11 @@ def gini_to_theil(g, empirical=False):
         a, b, c = _theil_empirical_constants()
         t = (-b + (b ** 2 - 4 * a * (c - t)) ** 0.5) / (2 * a)
 
-    if not (np.all(t < MAX_THEIL) and np.all(t > 0)):
-        raise ValueError('Theil not within (0, 2.88): {}'.format(t))
+    _check_bounds('Theil', t, 0, MAX_THEIL, ignorenan=ignorenan)
     return t
 
 
-def theil_to_gini(t, empirical=False):
+def theil_to_gini(t, empirical=False, ignorenan=False):
     r"""Translate theil to gini
 
     .. math::
@@ -89,20 +97,31 @@ def theil_to_gini(t, empirical=False):
     empirical : bool, optional, default: False
         whether to use empirical relationship for theil
     """
-    if not (np.all(t < MAX_THEIL) and np.all(t > 0)):
-        raise ValueError('Theil not within (0, 2.88): {}'.format(t))
+    _check_bounds('Theil', t, 0, MAX_THEIL, ignorenan=ignorenan)
 
     if empirical:
         a, b, c = _theil_empirical_constants()
         t = a * t ** 2 + b * t + c
     s = theil_to_std(t)
     g = std_to_gini(s)
 
-    if not (np.all(g > 0) and np.all(g < 1)):
-        raise ValueError('Gini not within (0, 1)')
+    _check_bounds('Gini', g, 0, 1, ignorenan=ignorenan)
     return g
 
 
+# def recompose_theil(df):
+#     n = df['n'].sum()
+#     i = (df['n'] * df['i']).sum() / n
+#     t_b = (df['n'] * df['i'] * np.log(df['i'] / i)).sum() / (n * i)
+#     t_w = (df['n'] * df['i'] * df['t']).sum() / (n * i)
+#     return t_w + t_b
+
+
+# def below_threshold(threshold, dist=None, **kwargs):
+#     dist = dist or LogNormal()
+#     return dist.below_threshold(threshold, **kwargs)
+
+
 class LogNormalData(AttrObject):
     """Object for storing and updating LogNormal distribution data"""
 

diff --git a/tests/test_ineq.py b/tests/test_ineq.py
@@ -1,5 +1,7 @@
 import pytest
 
+import numpy as np
+
 from salamanca import ineq as iq
 
 from utils import assert_almost_equal
@@ -65,6 +67,16 @@ def test_gini_to_theil_close_hi():
     iq.gini_to_theil(0.99, empirical=True)
 
 
+def test_gini_to_theil_nan():
+    obs = iq.gini_to_theil(np.nan, ignorenan=True)
+    assert np.isnan(obs)
+
+
+def test_theil_to_gini_nan():
+    obs = iq.theil_to_gini(np.nan, ignorenan=True)
+    assert np.isnan(obs)
+
+
 def test_gini_to_theil_error():
     with pytest.raises(ValueError):
         iq.gini_to_theil(-1)
@@ -78,6 +90,9 @@ def test_gini_to_theil_error():
     with pytest.raises(ValueError):
         iq.gini_to_theil(1.1)
 
+    with pytest.raises(ValueError):
+        iq.gini_to_theil(np.nan)
+
 
 def test_theil_to_gini_error():
     with pytest.raises(ValueError):
@@ -92,6 +107,9 @@ def test_theil_to_gini_error():
     with pytest.raises(ValueError):
         iq.theil_to_gini(7.0)
 
+    with pytest.raises(ValueError):
+        iq.theil_to_gini(np.nan)
+
 
 def test_lndata_init():
     data = iq.LogNormalData(inc=4.2, gini=0.5)