Skip to content
Permalink
Browse files

Switch everything to use inferred_section instead of cleaned_section

Fairly simple set of fixes
- covers both the geojson stuff + the metrics
- configurable via config.json so we can test both old and new

update the existing tests to use the cleaned sections
need to add a new test for the new inferred sections
  • Loading branch information...
shankari committed Feb 23, 2018
1 parent ce6cec9 commit 232628ce1372e691ae1b9b15e2cbb061aaf3ad33
@@ -1,5 +1,6 @@
{
"intake.cleaning.clean_and_resample.speedDistanceAssertions": true,
"intake.cleaning.filter_accuracy.enable": false,
"classification.inference.mode.useAdvancedFeatureIndices": true
"classification.inference.mode.useAdvancedFeatureIndices": true,
"analysis.result.section.key": "analysis/inferred_section"
}
@@ -16,3 +16,6 @@ def get_config():
def reload_config():
global config_data
config_data = get_config_data()

def get_section_key_for_analysis_results():
return config_data["analysis.result.section.key"]
@@ -28,6 +28,7 @@

# TODO: Move this to the section_features class instead
import emission.analysis.intake.cleaning.location_smoothing as eaicl
import emission.analysis.config as eac

def _del_non_derializable(prop_dict, extra_keys):
for key in extra_keys:
@@ -113,7 +114,7 @@ def section_to_geojson(section, tl):
ts = esta.TimeSeries.get_time_series(section.user_id)
entry_it = ts.find_entries(["analysis/recreated_location"],
esda.get_time_query_for_trip_like(
"analysis/cleaned_section",
eac.get_section_key_for_analysis_results(),
section.get_id()))

# TODO: Decide whether we want to use Rewrite to use dataframes throughout instead of python arrays.
@@ -22,6 +22,8 @@
import emission.core.wrapper.modestattimesummary as ecwms
import emission.core.wrapper.localdate as ecwl

import emission.analysis.config as eac

def group_by_timestamp(user_id, start_ts, end_ts, freq, summary_fn_list):
"""
Get grouped dataframes for the specific time range and at the specified frequency
@@ -40,7 +42,7 @@ def group_by_timestamp(user_id, start_ts, end_ts, freq, summary_fn_list):
and the list is empty.
"""
time_query = estt.TimeQuery("data.start_ts", start_ts, end_ts)
section_df = esda.get_data_df(esda.CLEANED_SECTION_KEY,
section_df = esda.get_data_df(eac.get_section_key_for_analysis_results(),
user_id=user_id, time_query=time_query,
geo_query=None)
if len(section_df) == 0:
@@ -87,7 +89,7 @@ def group_by_local_date(user_id, from_dt, to_dt, freq, summary_fn_list):
and the list is empty.
"""
time_query = esttc.TimeComponentQuery("data.start_local_dt", from_dt, to_dt)
section_df = esda.get_data_df(esda.CLEANED_SECTION_KEY,
section_df = esda.get_data_df(eac.get_section_key_for_analysis_results(),
user_id=user_id, time_query=time_query,
geo_query=None)
if len(section_df) == 0:
@@ -21,6 +21,7 @@
CLEANED_TRIP_KEY = "analysis/cleaned_trip"
CLEANED_PLACE_KEY = "analysis/cleaned_place"
CLEANED_SECTION_KEY = "analysis/cleaned_section"
INFERRED_SECTION_KEY = "analysis/inferred_section"
CLEANED_STOP_KEY = "analysis/cleaned_stop"
CLEANED_UNTRACKED_KEY = "analysis/cleaned_untracked"
CLEANED_LOCATION_KEY = "analysis/recreated_location"
@@ -36,6 +36,7 @@
import attrdict as ad
import arrow
import numpy as np
import os

# Our imports
import emission.core.get_database as edb
@@ -53,10 +54,13 @@ class TestPipelineRealData(unittest.TestCase):
def setUp(self):
# Thanks to M&J for the number!
np.random.seed(61297777)
self.analysis_conf_path = \
etc.set_analysis_config("analysis.result.section.key", "analysis/cleaned_section")

def tearDown(self):
logging.debug("Clearing related databases")
self.clearRelatedDb()
os.remove(self.analysis_conf_path)

def clearRelatedDb(self):
edb.get_timeseries_db().delete_many({"user_id": self.testUUID})
@@ -19,6 +19,7 @@

import emission.analysis.result.metrics.time_grouping as earmt
import emission.analysis.result.metrics.simple_metrics as earmts
import emission.analysis.config as eac

import emission.core.wrapper.entry as ecwe
import emission.core.wrapper.section as ecws
@@ -73,7 +74,8 @@ def _createTestSection(self, start_ardt, start_timezone):
self._fillDates(section, "end_", start_ardt, start_timezone)
logging.debug("created section %s" % (section.start_fmt_time))

entry = ecwe.Entry.create_entry(self.testUUID, esda.CLEANED_SECTION_KEY,
entry = ecwe.Entry.create_entry(self.testUUID,
eac.get_section_key_for_analysis_results(),
section, create_id=True)
self.ts.insert(entry)
return entry
@@ -99,7 +101,7 @@ def testLocalDtFillTimesDailyOneTz(self):
self._createTestSection(arrow.Arrow(2016,5,3,23, tzinfo=tz.gettz(PST)),
PST))

section_group_df = self.ts.to_data_df(esda.CLEANED_SECTION_KEY,
section_group_df = self.ts.to_data_df(eac.get_section_key_for_analysis_results(),
test_section_list)
logging.debug("First row of section_group_df = %s" % section_group_df.iloc[0])
self.assertEqual(earmt._get_tz(section_group_df), PST)
@@ -143,7 +145,7 @@ def testLocalDtFillTimesDailyMultiTzGoingWest(self):
arrow.Arrow(2016,5,4,0, tzinfo=tz.gettz(PST)),
PST)

section_group_df = self.ts.to_data_df(esda.CLEANED_SECTION_KEY,
section_group_df = self.ts.to_data_df(eac.get_section_key_for_analysis_results(),
test_section_list)

# Timestamps are monotonically increasing
@@ -234,7 +236,8 @@ def testLocalDtFillTimesDailyMultiTzGoingEast(self):
arrow.Arrow(2016,5,4,3, tzinfo=tz.gettz(BST)),
BST)

section_group_df = self.ts.to_data_df(esda.CLEANED_SECTION_KEY, test_section_list)
section_group_df = self.ts.to_data_df(eac.get_section_key_for_analysis_results(),
test_section_list)
logging.debug("first row is %s" % section_group_df.loc[0])

# Timestamps are monotonically increasing

0 comments on commit 232628c

Please sign in to comment.
You can’t perform that action at this time.