Skip to content

Commit

Permalink
👽 Wrap the json serialization/deserialization to force the UUID repre…
Browse files Browse the repository at this point in the history
…sentation to LEGACY

- Created a simple class (`json_wrappers.py`) which encapsulates the correct
  settings that we need for serializing and deserializing the JSON
  representations with the correct UUID representation
- Changed all the references in the `emission` directory to use the new wrapper functions
    - Including cleaning up prior hardcoded attempts at cleanup, such as
        3b456e7#diff-cfddece09bbf132974a13acdfb77be4e585a4bff39999f559dc8200c1ffbe78d

Additional Context:
- e-mission/e-mission-docs#856 (comment)
- https://pymongo.readthedocs.io/en/stable/examples/uuid.html
- 9c683cf
- 6ac02a2
- edd8b77

As of 4.3.3, the LEGACY_JSON_OPTIONS also has an UNSPECIFIED UUID representation

>  bson.json_util.LEGACY_JSON_OPTIONS: bson.json_util.JSONOptions =
>  JSONOptions(strict_number_long=False, datetime_representation=0,
>  strict_uuid=False, json_mode=0, document_class=dict, tz_aware=False,
>  uuid_representation=UuidRepresentation.UNSPECIFIED,
>  unicode_decode_error_handler='strict', tzinfo=None,
>  type_registry=TypeRegistry(type_codecs=[], fallback_encoder=None),
>  datetime_conversion=DatetimeConversion.DATETIME)¶

Testing done:
- Ensured that there were no imports of json_utils
```
$ find emission -name \*.py | xargs grep json_utils
$
```

- Ensured that all `bju` prefixes were replaced, other than in the wrapper itself

```
$ find emission -name \*.py | xargs grep bju
emission/storage/json_wrappers.py:import bson.json_util as bju
emission/storage/json_wrappers.py:wrapped_object_hook = lambda s: bju.object_hook(s,
emission/storage/json_wrappers.py:    json_options = bju.RELAXED_JSON_OPTIONS.with_options(
emission/storage/json_wrappers.py:wrapped_default = lambda o: bju.default(o, json_options = bju.LEGACY_JSON_OPTIONS)
emission/storage/json_wrappers.py:wrapped_dumps = lambda o: bju.dumps(o, json_options = bju.LEGACY_JSON_OPTIONS.with_options(
emission/storage/json_wrappers.py:wrapped_loads = lambda s: bju.loads(s)
```

- Ensured that all `esj` imports used the `wrapped` version of the name

```
$ find emission -name \*.py | xargs grep esj | egrep -v "import|esj.wrapped" | wc -l
       0
```
  • Loading branch information
shankari committed May 8, 2023
1 parent bdd6b25 commit 51a6881
Show file tree
Hide file tree
Showing 24 changed files with 153 additions and 141 deletions.
14 changes: 7 additions & 7 deletions emission/analysis/plotting/leaflet_osm/our_plotter.py
Expand Up @@ -22,7 +22,7 @@
import attrdict as ad

# import emission.analysis.classification.cleaning.location_smoothing as ls
import bson.json_util as bju
import emission.storage.json_wrappers as esj

import emission.storage.decorations.location_queries as lq
import emission.storage.decorations.trip_queries as esdt
Expand Down Expand Up @@ -116,16 +116,16 @@ def get_map_for_geojson_trip(geojson_trip):
return m

def get_place_ui(place):
return folium.Marker(place["geometry"]["coordinates"][::-1], popup=bju.dumps(place["properties"]))
return folium.Marker(place["geometry"]["coordinates"][::-1], popup=esj.wrapped_dumps(place["properties"]))

def get_section_ui(section):
lat_lng_points = list((p[::-1] for p in section["geometry"]["coordinates"]))
return folium.PolyLine(lat_lng_points, popup=bju.dumps(section["properties"]))
return folium.PolyLine(lat_lng_points, popup=esj.wrapped_dumps(section["properties"]))

def get_stop_ui(stop):
lat_lng_points = list((p[::-1] for p in stop["geometry"]["coordinates"]))
return (folium.CircleMarker(lat_lng_points[0], popup=bju.dumps(stop["properties"]), color="green", fill_color="green", fill=True),
folium.CircleMarker(lat_lng_points[1], popup=bju.dumps(stop["properties"]), color="red", fill_color="red", fill=True))
return (folium.CircleMarker(lat_lng_points[0], popup=esj.wrapped_dumps(stop["properties"]), color="green", fill_color="green", fill=True),
folium.CircleMarker(lat_lng_points[1], popup=esj.wrapped_dumps(stop["properties"]), color="red", fill_color="red", fill=True))


def flipped(coord):
Expand All @@ -152,7 +152,7 @@ def get_maps_for_geojson_unsectioned(geojson_list):
def get_map_for_geojson_unsectioned(geojson):
div_icon = folium.DivIcon()
all_div_markers = [folium.CircleMarker(p["geometry"]["coordinates"][::-1],
popup=bju.dumps(p["properties"]),
popup=esj.wrapped_dumps(p["properties"]),
radius=5)
for p in geojson["features"][0]["features"]]
# all_div_markers = [folium.Marker(p["geometry"]["coordinates"][::-1],
Expand All @@ -173,7 +173,7 @@ def get_map_for_geojson_unsectioned(geojson):
return m

def get_coords(feature):
# logging.debug("Getting coordinates for feature %s" % bju.dumps(feature))
# logging.debug("Getting coordinates for feature %s" % esj.wrapped_dumps(feature))
if feature["type"] == "FeatureCollection":
retVal = []
for f in feature["features"]:
Expand Down
4 changes: 2 additions & 2 deletions emission/analysis/result/metrics/time_grouping.py
Expand Up @@ -178,12 +178,12 @@ def grouped_to_summary(time_grouped_df, key_to_fill_fn, summary_fn):
else:
curr_msts[ecwm.MotionTypes(mode).name] = result
ret_list.append(curr_msts)
# import bson.json_util as bju
# import emission.storage.json_wrappers as esj
# logging.debug("After appending %s, ret_list = %s" % (curr_msts, ret_list))
# for k in curr_msts.keys():
# print("Serializing key = %s" % k)
# logging.debug("Serializing key %s = %s" %
# (k, bju.dumps(curr_msts[k])))
# (k, esj.wrapped_dumps(curr_msts[k])))
return ret_list

def _get_local_group_by(local_freq):
Expand Down
4 changes: 2 additions & 2 deletions emission/individual_tests/TestMetricsInferredSections.py
Expand Up @@ -57,9 +57,9 @@ def testCountTimestampMetrics(self):
'd', ['count'], True)

import json
import bson.json_util as bju
import emission.storage.json_wrappers as esj

logging.debug(json.dumps(met_result, default=bju.default))
logging.debug(json.dumps(met_result, default=esj.wrapped_default))

self.assertEqual(list(met_result.keys()), ['aggregate_metrics', 'user_metrics'])
user_met_result = met_result['user_metrics'][0]
Expand Down
11 changes: 4 additions & 7 deletions emission/net/api/cfc_webapp.py
Expand Up @@ -26,11 +26,10 @@
import socket
# For decoding JWTs using the google decode URL
import urllib.request, urllib.parse, urllib.error
import emission.storage.json_wrappers as esj
import requests
import traceback
import urllib.request, urllib.error, urllib.parse
import bson.json_util
from bson.binary import UuidRepresentation

# Our imports
import emission.net.api.visualize as visualize
Expand Down Expand Up @@ -337,7 +336,6 @@ def summarize_metrics(time_type):
ret_val = metric_fn(user_uuid,
start_time, end_time,
freq_name, metric_list, is_return_aggregate)
# logging.debug("ret_val = %s" % bson.json_util.dumps(ret_val))
if old_style:
logging.debug("old_style metrics found, returning array of entries instead of array of arrays")
assert(len(metric_list) == 1)
Expand All @@ -357,7 +355,6 @@ def habiticaJoinGroup(group_id):
(user_uuid, group_id, inviter_id))
try:
ret_val = habitproxy.setup_party(user_uuid, group_id, inviter_id)
logging.debug("ret_val = %s after joining group" % bson.json_util.dumps(ret_val))
return {'result': ret_val}
except RuntimeError as e:
logging.info("Aborting call with message %s" % e.message)
Expand Down Expand Up @@ -530,10 +527,10 @@ def resolve_auth(auth_method):
for plugin in app.plugins:
if isinstance(plugin, JSONPlugin):
print("Replaced json_dumps in plugin with the one from bson")
plugin.json_dumps = lambda s: bson.json_util.dumps(s, json_options = bson.json_util.LEGACY_JSON_OPTIONS.with_options(uuid_representation= UuidRepresentation.PYTHON_LEGACY))
plugin.json_dumps = esj.wrapped_dumps

print("Changing bt.json_loads from %s to %s" % (bt.json_loads, bson.json_util.loads))
bt.json_loads = bson.json_util.loads
print("Changing bt.json_loads from %s to %s" % (bt.json_loads, esj.wrapped_loads))
bt.json_loads = esj.wrapped_loads

# The selection of SSL versus non-SSL should really be done through a config
# option and not through editing source code, so let's make this keyed off the
Expand Down
4 changes: 2 additions & 2 deletions emission/public/pull_and_load_public_data.py
Expand Up @@ -7,7 +7,7 @@
from builtins import str
from builtins import *

import bson.json_util as bju
import emission.storage.json_wrappers as esj
import requests
import json
import logging
Expand All @@ -28,7 +28,7 @@ def request_data(server_url, from_ts, to_ts, phone_id, key_list, debug):

r.raise_for_status()

dic = json.loads(r.text, object_hook=bju.object_hook)
dic = json.loads(r.text, object_hook=esj.wrapped_object_hook)
entry_list = dic['phone_data']

if debug:
Expand Down
22 changes: 22 additions & 0 deletions emission/storage/json_wrappers.py
@@ -0,0 +1,22 @@
import bson.json_util as bju
import bson.binary as bbin

# Create wrappers to load and save with the legacy UUID representation
# these wrappers are designed to be drop-in replacements for the existing `bson.json_util.default`
# and `bson.json_util.object_hook`

wrapped_object_hook = lambda s: bju.object_hook(s,
json_options = bju.RELAXED_JSON_OPTIONS.with_options(
uuid_representation=bbin.UuidRepresentation.PYTHON_LEGACY))

wrapped_default = lambda o: bju.default(o, json_options = bju.LEGACY_JSON_OPTIONS)

# TODO: Why are the wrapped_default and wrapped_dumps different
# need to see whether the UUID representation really does need to be specified
# and unify them
wrapped_dumps = lambda o: bju.dumps(o, json_options = bju.LEGACY_JSON_OPTIONS.with_options(
uuid_representation= bbin.UuidRepresentation.PYTHON_LEGACY))

# This doesn't currently seem to require any wrapping, but let's abstract it
# out anyway to avoid hacky changes by interns later
wrapped_loads = lambda s: bju.loads(s)
Expand Up @@ -12,7 +12,6 @@
import logging
import pymongo
import json
import bson.json_util as bju
import pandas as pd
from uuid import UUID

Expand Down
Expand Up @@ -11,7 +11,7 @@
import logging
import pymongo
import json
import bson.json_util as bju
import emission.storage.json_wrappers as esj
import pandas as pd
from uuid import UUID
import os
Expand Down
Expand Up @@ -12,7 +12,7 @@
import logging
import pymongo
import json
import bson.json_util as bju
import emission.storage.json_wrappers as esj
import bson.objectid as boi
import numpy as np
import attrdict as ad
Expand Down Expand Up @@ -48,7 +48,7 @@ def setUp(self):
self.ts = esta.TimeSeries.get_time_series(self.testUUID)
with open("emission/tests/data/smoothing_data/trip_list.txt") as tfp:
self.trip_entries = json.load(tfp,
object_hook=bju.object_hook)
object_hook=esj.wrapped_object_hook)
for trip_entry in self.trip_entries:
trip_entry["user_id"] = self.testUUID
self.ts.insert(trip_entry)
Expand All @@ -57,7 +57,7 @@ def setUp(self):

with open("emission/tests/data/smoothing_data/section_list.txt") as sfp:
self.section_entries = json.load(sfp,
object_hook=bju.object_hook)
object_hook=esj.wrapped_object_hook)
for section_entry in self.section_entries:
section_entry["user_id"] = self.testUUID
self.ts.insert(section_entry)
Expand All @@ -74,7 +74,7 @@ def loadPointsForTrip(self, trip_id):

with open("emission/tests/data/smoothing_data/%s" % trip_id) as pfp:
entries = json.load(pfp,
object_hook=bju.object_hook)
object_hook=esj.wrapped_object_hook)
tsdb = edb.get_timeseries_db()
for entry in entries:
entry["user_id"] = self.testUUID
Expand Down

0 comments on commit 51a6881

Please sign in to comment.