Skip to content

Commit

Permalink
Adding a couple safety checks in the hedonic modeling
Browse files Browse the repository at this point in the history
- Verify that prediction results have the same length as in the
  input data. A mismatch usually indicates that there are null
  values in the input data, something that statsmodels apparently
  does not check.
- Raise an error if a user tries to get a model prediction before
  fitting the model.
  • Loading branch information
jiffyclub committed Apr 10, 2014
1 parent e37505a commit d81f84f
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 0 deletions.
2 changes: 2 additions & 0 deletions urbansim/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
class ModelEvaluationError(Exception):
pass
11 changes: 11 additions & 0 deletions urbansim/models/hedonic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import pandas as pd
import statsmodels.formula.api as smf

from .. exceptions import ModelEvaluationError


def apply_filter_query(df, filters=None):
"""
Expand Down Expand Up @@ -81,6 +83,13 @@ def predict(df, filters, model_fit, ytransform=None):
"""
df = apply_filter_query(df, filters)
sim_data = model_fit.predict(df)

if len(sim_data) != len(df):
raise ModelEvaluationError(
'Predicted data does not have the same length as input. '
'This suggests there are null values in one or more of '
'the input columns.')

if ytransform:
sim_data = ytransform(sim_data)
return pd.Series(sim_data, index=df.index)
Expand Down Expand Up @@ -159,5 +168,7 @@ def predict(self, data):
after applying filters.
"""
if not self.model_fit:
raise RuntimeError('Model has not been fit.')
return predict(
data, self.predict_filters, self.model_fit, self.ytransform)
17 changes: 17 additions & 0 deletions urbansim/models/tests/test_hedonic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from statsmodels.regression.linear_model import RegressionResultsWrapper

from .. import hedonic
from ...exceptions import ModelEvaluationError


@pytest.fixture
Expand Down Expand Up @@ -77,6 +78,18 @@ def test_predict_ytransform(test_df):
pdt.assert_series_equal(predicted, expected)


def test_predict_with_nans():
df = pd.DataFrame(
{'col1': range(5),
'col2': [5, 6, pd.np.nan, 8, 9]},
index=['a', 'b', 'c', 'd', 'e'])
fit = hedonic.fit_model(df.loc[['a', 'b', 'e']], None, 'col1 ~ col2')

with pytest.raises(ModelEvaluationError):
hedonic.predict(
df.loc[['c', 'd']], None, fit)


def test_HedonicModel(test_df):
fit_filters = ['col1 in [0, 2, 4]']
predict_filters = ['col1 in [1, 3]']
Expand All @@ -93,6 +106,10 @@ def test_HedonicModel(test_df):
assert model.name == name
assert model.model_fit is None

# verify there's an error if there isn't a model fit yet
with pytest.raises(RuntimeError):
model.predict(test_df)

fit = model.fit_model(test_df)
assert isinstance(fit, RegressionResultsWrapper)
assert isinstance(model.model_fit, RegressionResultsWrapper)
Expand Down

0 comments on commit d81f84f

Please sign in to comment.