In order to run this notebook with the correct PYTHONPATH, you can use
```
$ ./e-mission-ipy.bash notebook
```

### Preferred access technique

The preferred technique to access wrapper objects from the timeseries is to use the abstract timeseries interface. This makes it easier for us to switch to alternative timeseries implementations later

In [29]:
import emission.storage.timeseries.abstract_timeseries as esta
import emission.storage.decorations.analysis_timeseries_queries as esda
import emission.core.wrapper.entry as ecwe
import emission.storage.decorations.trip_queries as esdt

In [30]:
from uuid import UUID

In [31]:
test_user_id = UUID("d1f337f6-fc03-41e0-ab5f-81705a59cdb6") # replace with the uuid that you loaded

In [32]:
ts = esta.TimeSeries.get_time_series(test_user_id)

#### Accessing entries directly

In [33]:
entry_it = ts.find_entries(["analysis/cleaned_trip"], time_query=None)

All keys and their mapping to data model objects can be found in 
https://github.com/e-mission/e-mission-server/blob/master/emission/core/wrapper/entry.py

In [34]:
for ct in entry_it:
    cte = ecwe.Entry(ct)
    print "=== Trip:", cte.data.start_loc, "->", cte.data.end_loc
    section_it = esdt.get_sections_for_trip("analysis/cleaned_section", test_user_id, cte.get_id())
    for sec in section_it:
        print "  --- Section:", sec.data.start_loc, "->", sec.data.end_loc, " on ", sec.data.sensed_mode

=== Trip: {"coordinates": [-122.0876886, 37.3887767], "type": "Point"} -> {"coordinates": [-122.0820411, 37.3920436], "type": "Point"}
  --- Section: {"coordinates": [-122.0876886, 37.3887767], "type": "Point"} -> {"coordinates": [-122.0773235, 37.3946683], "type": "Point"}  on  MotionTypes.BICYCLING
  --- Section: {"coordinates": [-122.077133, 37.3944385], "type": "Point"} -> {"coordinates": [-122.0859763, 37.3983699], "type": "Point"}  on  MotionTypes.ON_FOOT
  --- Section: {"coordinates": [-122.1031161, 37.4048536], "type": "Point"} -> {"coordinates": [-122.1651563, 37.4438859], "type": "Point"}  on  MotionTypes.IN_VEHICLE
  --- Section: {"coordinates": [-122.1647388, 37.4438775], "type": "Point"} -> {"coordinates": [-122.1631988, 37.4459894], "type": "Point"}  on  MotionTypes.ON_FOOT
  --- Section: {"coordinates": [-122.1631156, 37.4464546], "type": "Point"} -> {"coordinates": [-122.1658293, 37.4438399], "type": "Point"}  on  MotionTypes.BICYCLING
  --- Section: {"coordinates": [-1

#### Accessing a dataframe

In [35]:
ct_df = ts.get_data_df("analysis/cleaned_trip", time_query=None)

In [36]:
len(ct_df)

6

In [37]:
ct_df.columns

Index([                    u'_id',                u'distance',
                      u'duration',            u'end_fmt_time',
                       u'end_loc',        u'end_local_dt_day',
             u'end_local_dt_hour',     u'end_local_dt_minute',
            u'end_local_dt_month',     u'end_local_dt_second',
         u'end_local_dt_timezone',    u'end_local_dt_weekday',
             u'end_local_dt_year',               u'end_place',
                        u'end_ts',       u'metadata_write_ts',
                      u'raw_trip',                  u'source',
                u'start_fmt_time',               u'start_loc',
            u'start_local_dt_day',     u'start_local_dt_hour',
         u'start_local_dt_minute',    u'start_local_dt_month',
         u'start_local_dt_second', u'start_local_dt_timezone',
        u'start_local_dt_weekday',     u'start_local_dt_year',
                   u'start_place',                u'start_ts',
                       u'user_id'],
      dtype='object

In [38]:
ct_df[["start_loc", "end_loc", "start_ts", "end_ts"]]

Unnamed: 0,start_loc,end_loc,start_ts,end_ts
0,"{u'type': u'Point', u'coordinates': [-122.0876...","{u'type': u'Point', u'coordinates': [-122.0820...",1437578000.0,1437582000.0
1,"{u'type': u'Point', u'coordinates': [-122.0820...","{u'type': u'Point', u'coordinates': [-122.0862...",1437582000.0,1437583000.0
2,"{u'type': u'Point', u'coordinates': [-122.0862...","{u'type': u'Point', u'coordinates': [-122.0801...",1437596000.0,1437597000.0
3,"{u'type': u'Point', u'coordinates': [-122.0801...","{u'type': u'Point', u'coordinates': [-122.1592...",1437597000.0,1437598000.0
4,"{u'type': u'Point', u'coordinates': [-122.1592...","{u'type': u'Point', u'coordinates': [-122.0823...",1437601000.0,1437605000.0
5,"{u'type': u'Point', u'coordinates': [-122.0823...","{u'type': u'Point', u'coordinates': [-122.0862...",1437606000.0,1437608000.0


In [39]:
cs_df = ts.get_data_df("analysis/cleaned_section", time_query=None)

In [40]:
len(cs_df)

21

In [41]:
cs_df.columns

Index([                    u'_id',                u'distance',
                     u'distances',                u'duration',
                  u'end_fmt_time',                 u'end_loc',
              u'end_local_dt_day',       u'end_local_dt_hour',
           u'end_local_dt_minute',      u'end_local_dt_month',
           u'end_local_dt_second',   u'end_local_dt_timezone',
          u'end_local_dt_weekday',       u'end_local_dt_year',
                      u'end_stop',                  u'end_ts',
             u'metadata_write_ts',             u'sensed_mode',
                        u'source',                  u'speeds',
                u'start_fmt_time',               u'start_loc',
            u'start_local_dt_day',     u'start_local_dt_hour',
         u'start_local_dt_minute',    u'start_local_dt_month',
         u'start_local_dt_second', u'start_local_dt_timezone',
        u'start_local_dt_weekday',     u'start_local_dt_year',
                    u'start_stop',                u'sta

In [12]:
cs_df[["start_loc", "end_loc", "start_ts", "end_ts", "sensed_mode"]]

Unnamed: 0,start_loc,end_loc,start_ts,end_ts,sensed_mode
0,"{u'type': u'Point', u'coordinates': [-122.0876...","{u'type': u'Point', u'coordinates': [-122.0773...",1437578000.0,1437578000.0,1
1,"{u'type': u'Point', u'coordinates': [-122.0771...","{u'type': u'Point', u'coordinates': [-122.0859...",1437578000.0,1437579000.0,2
2,"{u'type': u'Point', u'coordinates': [-122.1031...","{u'type': u'Point', u'coordinates': [-122.1651...",1437579000.0,1437580000.0,0
3,"{u'type': u'Point', u'coordinates': [-122.1647...","{u'type': u'Point', u'coordinates': [-122.1631...",1437580000.0,1437580000.0,2
4,"{u'type': u'Point', u'coordinates': [-122.1631...","{u'type': u'Point', u'coordinates': [-122.1658...",1437580000.0,1437581000.0,1
5,"{u'type': u'Point', u'coordinates': [-122.1653...","{u'type': u'Point', u'coordinates': [-122.1559...",1437581000.0,1437581000.0,2
6,"{u'type': u'Point', u'coordinates': [-122.1493...","{u'type': u'Point', u'coordinates': [-122.0762...",1437581000.0,1437582000.0,0
7,"{u'type': u'Point', u'coordinates': [-122.0767...","{u'type': u'Point', u'coordinates': [-122.0820...",1437582000.0,1437582000.0,2
8,"{u'type': u'Point', u'coordinates': [-122.0820...","{u'type': u'Point', u'coordinates': [-122.0862...",1437582000.0,1437583000.0,2
9,"{u'type': u'Point', u'coordinates': [-122.0862...","{u'type': u'Point', u'coordinates': [-122.0801...",1437596000.0,1437597000.0,1


### Direct mongodb queries

You can also use direct mongodb queries during exploratory work. I do ask that you create a storage decorator (`emission/storage/decorations`) when you submit a pull request for ongoing use

In [None]:
import emission.core.get_database as edb

In [None]:
edb.get_timeseries_db().find_one()

In [None]:
edb.get_timeseries_db().distinct("metadata.key")

Note that in this case, you need to know whether to use the `timeseries` or the `analysis_timeseries` collection

In [None]:
edb.get_analysis_timeseries_db().distinct("metadata.key")

In [None]:
edb.get_analysis_timeseries_db().find({"user_id": test_user_id, "metadata.key": "analysis/cleaned_trip"}).count()

In particular, you can use this to access entries that are not in the timeseries

In [None]:
edb.get_uuid_db().distinct("uuid")

### Timeline

The trips and places maintain links to each other - e.g. `start_place`, `end_place`

In [None]:
ct_df[["start_place", "end_place"]]

These are _primary key links_ to other entries in the database. It would be useful to have a doubly linked list representing this properly. The Timeline helps with that.

In [None]:
import emission.storage.decorations.timeline as esdl

In [None]:
tl = esdl.get_cleaned_timeline(test_user_id, ct_df.iloc[0].start_ts, ct_df.iloc[-1].end_ts)

In [None]:
for e in tl:
    print e.metadata.key

In [None]:
stl = esdt.get_cleaned_timeline_for_trip(test_user_id, tl.first_trip().get_id())

In [None]:
for e in stl:
    print e.metadata.key

In [None]:
stl = esdt.get_cleaned_timeline_for_trip(test_user_id, tl.last_trip().get_id())

In [None]:
for e in stl:
    print e.metadata.key

## Getting trip and section details ##

Once we have trip and section objects, we can retrieve the sensed data associated with them by querying for data in various streams that falls within the time ranges associated with the trip/section. Here again, our architecture of storing the analysis results as a separate datastream makes it easy to retrieve data at various levels of processing.


### Get locations with no processing, basic filtering and resampling for the first trip ###

In [None]:
all_locs = ts.get_data_df("background/location",
                               time_query = esda.get_time_query_for_trip_like(
                                   "analysis/cleaned_section", ct_df.iloc[0]._id))

filtered_locs = ts.get_data_df("background/filtered_location",
                               time_query = esda.get_time_query_for_trip_like(
                                   "analysis/cleaned_section", ct_df.iloc[0]._id))

resampled_locs = ts.get_data_df("analysis/recreated_location",
                                 time_query = esda.get_time_query_for_trip_like(
                                   "analysis/cleaned_section", ct_df.iloc[0]._id))
print("Locations go from all = %d -> filtered = %d -> resampled = %d" % (len(all_locs),
                                                                         len(filtered_locs),
                                                                         len(resampled_locs)))

### Get the raw motion activity, in case you want to do different segmentation ###

In [None]:
all_activity = ts.get_data_df("background/motion_activity",
                               esda.get_time_query_for_trip_like(
                                   "analysis/cleaned_section", ct_df.iloc[0]._id))

In [None]:
all_activity.columns

In [None]:
import emission.core.wrapper.motionactivity as ecwm

print("Found %d walking entries, %d on_foot entries" % (len(all_activity[all_activity.type == ecwm.MotionTypes.WALKING.value]), 
                                    len(all_activity[all_activity.type == ecwm.MotionTypes.ON_FOOT.value])))
print("Found %d motorized entries" % (len(all_activity[all_activity.type == ecwm.MotionTypes.IN_VEHICLE.value])))

### Plot the location points ###

For this to work, you need a custom version of folium which supports my custom geojson plugin.
This version is available at http://cs.berkeley.edu/~shankari/folium.zip
and needs to be unzipped into the e-mission-server directory.

TODO: Switch to the current version of folium, which supposedly supports geojson out of the box instead of this hacky solution.

In [None]:
import emission.analysis.plotting.geojson.geojson_feature_converter as gfc
import emission.analysis.plotting.leaflet_osm.ipython_helper as ipy
import emission.analysis.plotting.leaflet_osm.our_plotter as lo

In [None]:
ipy.inline_maps([lo.get_maps_for_geojson_unsectioned([gfc.get_feature_list_from_df(all_locs)])])

In [None]:
ipy.inline_maps([lo.get_maps_for_geojson_unsectioned([gfc.get_feature_list_from_df(all_locs),
                                                      gfc.get_feature_list_from_df(filtered_locs),
                                                      gfc.get_feature_list_from_df(resampled_locs)])])