Skip to content

Commit

Permalink
Handle the case where the first point of the next section is returned…
Browse files Browse the repository at this point in the history
… in the query

This fixes
https://github.com/e-mission/e-mission-server/issues/530#issuecomment-352206464

Basically, two sections are back to back, then the last point of the first
section will overlap with the first point of the second section. So a query
based on the start and end time for the first section will return the the first
point of the second section as well, which causes a mismatch between the
re-retrieved and stored speeds and distances.

We detect and drop the last point in this case.
  • Loading branch information
shankari committed Dec 16, 2017
1 parent e780ba7 commit dd7a18d
Show file tree
Hide file tree
Showing 4 changed files with 984,926 additions and 0 deletions.
5 changes: 5 additions & 0 deletions emission/analysis/intake/cleaning/clean_and_resample.py
Expand Up @@ -1096,6 +1096,11 @@ def _fix_squished_place_mismatch(user_id, trip_id, ts, cleaned_trip_data, cleane
with_speeds_df = eaicl.add_dist_heading_speed(loc_df)
logging.debug("fix_squished_place: after recomputing for validation, with_speeds_df = %s" %
(with_speeds_df[["_id", "ts", "fmt_time", "latitude", "longitude", "distance", "speed"]]).head())
if with_speeds_df.iloc[-1].speed == 0 and with_speeds_df.iloc[-1].distance == 0:
logging.debug("with_speeds_df.iloc[-1].speed = %s, distance = %s" %
(with_speeds_df.iloc[-1].speed, with_speeds_df.iloc[1].distance))
with_speeds_df.drop(with_speeds_df.index[-1], inplace=True)

if not ecc.compare_rounded_arrays(with_speeds_df.speed.tolist(), first_section_data["speeds"], 10):
logging.error("%s != %s" % (with_speeds_df.speed.tolist()[:10], first_section_data["speeds"][:10]))
assert False
Expand Down
14 changes: 14 additions & 0 deletions emission/tests/analysisTests/intakeTests/TestPipelineRealData.py
Expand Up @@ -781,6 +781,20 @@ def testOverriddenModeHack(self):
self.compare_result(ad.AttrDict({'result': api_result}).result,
ad.AttrDict(ground_truth).data)

def testJan16SpeedAssert(self):
# Test for https://github.com/e-mission/e-mission-server/issues/457
dataFile = "emission/tests/data/real_examples/another_speed_assertion_failure.jan-16"
start_ld = ecwl.LocalDate({'year': 2016, 'month': 1, 'day': 16})
cacheKey = "diary/trips-2016-01-16"
ground_truth = json.load(open("emission/tests/data/real_examples/another_speed_assertion_failure.jan-16.ground_truth"), object_hook=bju.object_hook)

etc.setupRealExample(self, dataFile)
etc.runIntakePipeline(self.testUUID)

api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, start_ld)
self.compare_result(ad.AttrDict({'result': api_result}).result,
ad.AttrDict(ground_truth).data)

if __name__ == '__main__':
etc.configLogging()
unittest.main()

0 comments on commit dd7a18d

Please sign in to comment.