Skip to content

Commit

Permalink
Merge pull request #38 from synthicity/lcm-misc
Browse files Browse the repository at this point in the history
Segmented* fixes
  • Loading branch information
jiffyclub committed Jun 17, 2014
2 parents 1220cef + 524c4ea commit 9333b0c
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 12 deletions.
25 changes: 21 additions & 4 deletions urbansim/models/lcm.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,14 +566,16 @@ class SegmentedMNLLocationChoiceModel(object):
the alternatives index is used.
default_model_expr : str, iterable, or dict, optional
A patsy model expression. Should contain only a right-hand side.
name : str, optional
An optional string used to identify the model in places.
"""
def __init__(self, segmentation_col, sample_size,
choosers_fit_filters=None, choosers_predict_filters=None,
alts_fit_filters=None, alts_predict_filters=None,
interaction_predict_filters=None,
estimation_sample_size=None,
choice_column=None, default_model_expr=None):
choice_column=None, default_model_expr=None, name=None):
self.segmentation_col = segmentation_col
self.sample_size = sample_size
self.choosers_fit_filters = choosers_fit_filters
Expand All @@ -585,6 +587,8 @@ def __init__(self, segmentation_col, sample_size,
self.choice_column = choice_column
self.default_model_expr = default_model_expr
self._group = MNLLocationChoiceModelGroup(segmentation_col)
self.name = (name if name is not None else
'SegmentedMNLLocationChoiceModel')

@classmethod
def from_yaml(cls, yaml_str=None, str_or_buffer=None):
Expand Down Expand Up @@ -618,7 +622,8 @@ def from_yaml(cls, yaml_str=None, str_or_buffer=None):
cfg['interaction_predict_filters'],
cfg['estimation_sample_size'],
cfg['choice_column'],
default_model_expr)
default_model_expr,
cfg['name'])

if "models" not in cfg:
cfg["models"] = {}
Expand Down Expand Up @@ -708,6 +713,14 @@ def fit(self, choosers, alternatives, current_choice):

unique = choosers[self.segmentation_col].unique()

# Remove any existing segments that may no longer have counterparts
# in the data. This can happen when loading a saved model and then
# calling this method with data that no longer has segments that
# were there the last time this was called.
gone = set(self._group.models) - set(unique)
for g in gone:
del self._group.models[g]

for x in unique:
if x not in self._group.models:
self.add_segment(x)
Expand Down Expand Up @@ -794,6 +807,7 @@ def to_dict(self):
"""
return {
'model_type': 'segmented_locationchoice',
'name': self.name,
'segmentation_col': self.segmentation_col,
'sample_size': self.sample_size,
'choosers_fit_filters': self.choosers_fit_filters,
Expand All @@ -807,8 +821,11 @@ def to_dict(self):
'model_expression': self.default_model_expr,
},
'fitted': self.fitted,
'models': {yamlio.to_scalar_safe(name): self._process_model_dict(m.to_dict())
for name, m in self._group.models.items()}
'models': {
yamlio.to_scalar_safe(name):
self._process_model_dict(m.to_dict())
for name, m in self._group.models.items()
}
}

def to_yaml(self, str_or_buffer=None):
Expand Down
33 changes: 27 additions & 6 deletions urbansim/models/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,8 @@ class instance for use during prediction.
self.model_fit = fit
self.fit_parameters = _model_fit_to_table(fit)
if debug:
df = pd.DataFrame(fit.model.exog, columns=fit.model.exog_names, index=data.index)
df = pd.DataFrame(
fit.model.exog, columns=fit.model.exog_names, index=data.index)
df[fit.model.endog_names] = fit.model.endog
df["fittedvalues"] = fit.fittedvalues
df["residuals"] = fit.resid
Expand Down Expand Up @@ -588,18 +589,24 @@ class SegmentedRegressionModel(object):
the results reflect actual price.
By default no transformation is applied.
min_segment_size : int, optional
Segments with less than this many members will be skipped.
name : str, optional
A name used in places to identify the model.
"""
def __init__(
self, segmentation_col, fit_filters=None, predict_filters=None,
default_model_expr=None, default_ytransform=None, min_segment_size=0):
default_model_expr=None, default_ytransform=None,
min_segment_size=0, name=None):
self.segmentation_col = segmentation_col
self._group = RegressionModelGroup(segmentation_col)
self.fit_filters = fit_filters
self.predict_filters = predict_filters
self.default_model_expr = default_model_expr
self.default_ytransform = default_ytransform
self.min_segment_size = min_segment_size
self.name = name if name is not None else 'SegmentedRegressionModel'

@classmethod
def from_yaml(cls, yaml_str=None, str_or_buffer=None):
Expand Down Expand Up @@ -627,7 +634,8 @@ def from_yaml(cls, yaml_str=None, str_or_buffer=None):
seg = cls(
cfg['segmentation_col'], cfg['fit_filters'],
cfg['predict_filters'], default_model_expr,
YTRANSFORM_MAPPING[default_ytransform])
YTRANSFORM_MAPPING[default_ytransform], cfg['min_segment_size'],
cfg['name'])

if "models" not in cfg:
cfg["models"] = {}
Expand Down Expand Up @@ -702,8 +710,17 @@ def fit(self, data, debug=False):
unique = data[self.segmentation_col].unique()
value_counts = data[self.segmentation_col].value_counts()

# Remove any existing segments that may no longer have counterparts
# in the data. This can happen when loading a saved model and then
# calling this method with data that no longer has segments that
# were there the last time this was called.
gone = set(self._group.models) - set(unique)
for g in gone:
del self._group.models[g]

for x in unique:
if x not in self._group.models and value_counts[x] > self.min_segment_size:
if x not in self._group.models and \
value_counts[x] > self.min_segment_size:
self.add_segment(x)

return self._group.fit(data, debug=debug)
Expand Down Expand Up @@ -773,16 +790,20 @@ def to_dict(self):
"""
return {
'model_type': 'segmented_regression',
'name': self.name,
'segmentation_col': self.segmentation_col,
'fit_filters': self.fit_filters,
'predict_filters': self.predict_filters,
'min_segment_size': self.min_segment_size,
'default_config': {
'model_expression': self.default_model_expr,
'ytransform': YTRANSFORM_MAPPING[self.default_ytransform]
},
'fitted': self.fitted,
'models': {yamlio.to_scalar_safe(name): self._process_model_dict(m.to_dict())
for name, m in self._group.models.items()}
'models': {
yamlio.to_scalar_safe(name):
self._process_model_dict(m.to_dict())
for name, m in self._group.models.items()}
}

def to_yaml(self, str_or_buffer=None):
Expand Down
18 changes: 17 additions & 1 deletion urbansim/models/tests/test_lcm.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,12 +194,13 @@ def test_mnl_lcm_segmented_yaml(grouped_choosers, alternatives):
sample_size = 4

group = lcm.SegmentedMNLLocationChoiceModel(
'group', sample_size, default_model_expr=model_exp)
'group', sample_size, default_model_expr=model_exp, name='test_seg')
group.add_segment('x')
group.add_segment('y', 'var3 + var1:var2')

expected_dict = {
'model_type': 'segmented_locationchoice',
'name': 'test_seg',
'segmentation_col': 'group',
'sample_size': sample_size,
'choosers_fit_filters': None,
Expand Down Expand Up @@ -255,3 +256,18 @@ def test_mnl_lcm_segmented_yaml(grouped_choosers, alternatives):

new_seg = lcm.SegmentedMNLLocationChoiceModel.from_yaml(group.to_yaml())
assert new_seg.fitted is True


def test_segmented_lcm_removes_old_models(grouped_choosers, alternatives):
model_exp = 'var2 + var1:var3'
sample_size = 4

group = lcm.SegmentedMNLLocationChoiceModel(
'group', sample_size, default_model_expr=model_exp)
group.add_segment('a')
group.add_segment('b')
group.add_segment('c')

group.fit(grouped_choosers, alternatives, 'thing_id')

assert sorted(group._group.models.keys()) == ['x', 'y']
17 changes: 16 additions & 1 deletion urbansim/models/tests/test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,15 +320,18 @@ def test_SegmentedRegressionModel_explicit(groupby_df):
def test_SegmentedRegressionModel_yaml(groupby_df):
seg = regression.SegmentedRegressionModel(
'group', fit_filters=['col1 not in [2]'],
predict_filters=['group != "z"'], default_model_expr='col1 ~ col2')
predict_filters=['group != "z"'], default_model_expr='col1 ~ col2',
min_segment_size=5000, name='test_seg')
seg.add_segment('x')
seg.add_segment('y', 'np.exp(col2) ~ np.exp(col1)', np.log)

expected_dict = {
'model_type': 'segmented_regression',
'name': 'test_seg',
'segmentation_col': 'group',
'fit_filters': ['col1 not in [2]'],
'predict_filters': ['group != "z"'],
'min_segment_size': 5000,
'default_config': {
'model_expression': 'col1 ~ col2',
'ytransform': None
Expand Down Expand Up @@ -385,3 +388,15 @@ def test_SegmentedRegressionModel_yaml(groupby_df):

new_seg = regression.SegmentedRegressionModel.from_yaml(seg.to_yaml())
assert new_seg.fitted is True


def test_SegmentedRegressionModel_removes_gone_segments(groupby_df):
seg = regression.SegmentedRegressionModel(
'group', default_model_expr='col1 ~ col2')
seg.add_segment('a')
seg.add_segment('b')
seg.add_segment('c')

seg.fit(groupby_df)

assert sorted(seg._group.models.keys()) == ['x', 'y']

0 comments on commit 9333b0c

Please sign in to comment.