Skip to content

Commit

Permalink
update projection model to optionally override national population va…
Browse files Browse the repository at this point in the history
…lue, add tests for this, remove share constraint formulation which was dependent on national population value in two time steps, update test that used that formulation
  • Loading branch information
gidden committed Sep 18, 2017
1 parent dae1966 commit 821728e
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 59 deletions.
72 changes: 19 additions & 53 deletions salamanca/models/project.py
Expand Up @@ -117,32 +117,6 @@ def income_rate_hi_rule(m, idx, b):
return lhs >= b * rhs


def share_diff_hi_rule(m, idx, b):
"""
\frac{s^{t+1} - s^t}{s^{t}} \geq -0.2
s^t = \frac{i^t n^t}{I^t N^t}
@TODO: is 20% in 10 years (or other timeperiod) reasonable?
"""
lhs = m.i[idx] * m.data['n_frac'][idx] / m.data['I']
rhs = m.data['i'][idx] * m.data['n_frac_old'][idx] / m.data['I_old']
return lhs >= (1 - b) * rhs


def share_diff_lo_rule(m, idx, b):
"""
\frac{s^{t+1} - s^t}{s^{t}} \leq 0.2
s^t = \frac{i^t n^t}{I^t N^t}
@TODO: is 20% in 10 years (or other timeperiod) reasonable?
"""
lhs = m.i[idx] * m.data['n_frac'][idx] / m.data['I']
rhs = m.data['i'][idx] * m.data['n_frac_old'][idx] / m.data['I_old']
return lhs <= (1 + b) * rhs


def std_diff_hi(m, b=0.2):
rhs = i_std(m, from_data=True)
lhs = i_std(m, from_data=False)
Expand Down Expand Up @@ -223,10 +197,11 @@ def combined_obj(m, theil_weight=1.0, pop_weight=1.0, **pop_kwargs):
class Model(object):
"""Base class for Projection Models"""

def __init__(self, natdata, subdata, empirical=False):
def __init__(self, natdata, subdata, empirical=False, override_national={}):
self.natdata = natdata
self.subdata = subdata
self.empirical = empirical
self.override_national = override_national

self._setup_model_data(natdata, subdata)
self._check_model_data()
Expand Down Expand Up @@ -284,19 +259,6 @@ def _add_diffusion_rules(self, diffusion):
rule=lambda m, idx: income_diff_lo_rule(m, idx, b),
doc='income within 20% from past',
)
if 'share' in diffusion:
b = diffusion['share']
b = 0.2 if b is True else b
m.s_hi = mo.Constraint(
m.idxs,
rule=lambda m, idx: share_diff_hi_rule(m, idx, b),
doc='income share within 20% from past',
)
m.s_lo = mo.Constraint(
m.idxs,
rule=lambda m, idx: share_diff_lo_rule(m, idx, b),
doc='income share within 20% from past',
)
if 'theil' in diffusion:
b = diffusion['theil']
b = 0.1 if b is True else b
Expand Down Expand Up @@ -326,15 +288,20 @@ def _setup_model_data(self, natdata, subdata):
self._histidx = histidx = ndf.index[0]
self._modelidx = modelidx = ndf.index[1]

# correct population
ratio = ndf.loc[modelidx][n] / sdf.loc[modelidx][n].sum()
if not np.isclose(ratio, 1.0):
msg = 'Scaling subnational population to match national ' + \
'population with ratio: {}'
warnings.warn(msg.format(ratio))
sdf.loc[modelidx][n] *= ratio
n_s = sdf.loc[modelidx][n].sum()
n_n = ndf.loc[modelidx][n]
if self.override_national.get(n, False):
# override national value with subnational value
ratio = 1
n_s = n_n = sdf.loc[modelidx][n].sum()
else:
# correct subnational population to national value by scaling
ratio = ndf.loc[modelidx][n] / sdf.loc[modelidx][n].sum()
if not np.isclose(ratio, 1.0):
msg = 'Scaling subnational population to match national ' + \
'population with ratio: {}'
warnings.warn(msg.format(ratio))
sdf.loc[modelidx][n] *= ratio
n_s = sdf.loc[modelidx][n].sum()
n_n = ndf.loc[modelidx][n]
if not np.isclose(n_s, n_n):
msg = 'Subnational ({}) != national ({}) population using ratio: {}'
raise RuntimeError(msg.format(n_s, n_n, ratio))
Expand All @@ -353,8 +320,7 @@ def _setup_model_data(self, natdata, subdata):
self.scale_I = ndf.loc[modelidx][i]
self.model_data = {
'idxs': self.model_idx,
'n_frac_old': sdf.loc[histidx][n].values / ndf.loc[histidx][n],
'n_frac': sdf.loc[modelidx][n].values / ndf.loc[modelidx][n],
'n_frac': sdf.loc[modelidx][n].values / n_n,
'i': sdf.loc[histidx][i].values,
'i_min': 0.1 * np.min(sdf.loc[histidx][i].values) / ndf.loc[histidx][i],
'i_max': 10 * np.max(sdf.loc[histidx][i].values) / ndf.loc[histidx][i],
Expand All @@ -364,7 +330,7 @@ def _setup_model_data(self, natdata, subdata):
empirical=self.empirical),
't_max': ineq.gini_to_theil(gini_max,
empirical=self.empirical),
'N': ndf.loc[modelidx][n],
'N': n_n,
'I': 1.0,
'I_new': ndf.loc[modelidx][i],
'I_old': ndf.loc[histidx][i],
Expand All @@ -384,7 +350,7 @@ def construct(self):
raise NotImplementedError()

def debug(self, pth=''):
skeys = ['idxs', 'n_frac_old', 'n_frac', 'i', 't',
skeys = ['idxs', 'n_frac', 'i', 't',
't_min', 't_max', 'i_min', 'i_max']
sdf = pd.DataFrame({s: self.model_data[s] for s in skeys},
index=self.orig_idx)
Expand Down
35 changes: 29 additions & 6 deletions tests/test_project.py
Expand Up @@ -19,7 +19,7 @@ def data():
'gini': [0.4, 0.35],
}, index=pd.Index([2010, 2020], name='year'))
subdata = pd.DataFrame({
'n': [7, 13, 9, 16],
'n': np.array([7, 13, 9, 16]) * 0.75,
'i': [10, 5, np.nan, np.nan],
'gini': [0.5, 0.3, np.nan, np.nan],
},
Expand Down Expand Up @@ -50,11 +50,33 @@ def test_model_data_pop():
natdata, subdata = data()
model = Model(natdata, subdata)

# pop
# pop, fraction
obs = model.model_data['n_frac']
exp = subdata.loc[2020]['n'] / subdata.loc[2020]['n'].sum()
assert_array_almost_equal(obs, exp)

# pop, absolute
obs = model.model_data['n_frac'] * model.model_data['N']
exp = subdata.loc[2020]['n'] * \
natdata.loc[2020]['n'] / subdata.loc[2020]['n'].sum()
assert_array_almost_equal(obs, exp)


def test_model_data_pop_override():
# note all subdata order is swapped in model_data due to sorting by gini
natdata, subdata = data()
model = Model(natdata, subdata, override_national={'n': True})

# pop, fraction
obs = model.model_data['n_frac']
exp = subdata.loc[2020]['n'] / subdata.loc[2020]['n'].sum()
assert_array_almost_equal(obs, exp)

# pop, absolute
obs = model.model_data['n_frac'] * model.model_data['N']
exp = subdata.loc[2020]['n']
assert_array_almost_equal(obs, exp)


def test_model_data_error():
natdata, subdata = data()
Expand Down Expand Up @@ -100,7 +122,8 @@ def test_Model1_result():
exp = subdata.loc[2020]['n']
assert_array_almost_equal(obs, exp)
obs = df['n']
exp = subdata.loc[2020]['n']
exp = subdata.loc[2020]['n'] * \
natdata.loc[2020]['n'] / subdata.loc[2020]['n'].sum()
assert_array_almost_equal(obs, exp)

# this is a regression test, results tested 08-29-17
Expand All @@ -116,15 +139,15 @@ def test_Model1_result():
def test_Model1_diffusion_result():
natdata, subdata = data()
model = Model1(natdata, subdata)
diffusion = {'share': True, 'theil': True}
diffusion = {'income': True, 'theil': True}
model.construct(diffusion=diffusion)
model.solve()
df = model.result()

# this is a regression test, results tested 08-29-17
# this is a regression test, results tested 09-18-17
obs = df[['gini', 'i']]
exp = pd.DataFrame({
'i': np.array([8.20987721375, 6.31944406727]),
'i': np.array([8.38271666486, 6.22222187602]),
'gini': np.array([0.477747557316, 0.285296967258]),
}, index=pd.Index(['foo', 'bar'], name='name'))
assert_frame_equal(obs, exp)
Expand Down

0 comments on commit 821728e

Please sign in to comment.