From d74b2c5d08f0fbb0d77a0272b8990b88710789a3 Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Wed, 6 Dec 2017 17:35:42 -0800 Subject: [PATCH] [geo] provide more flexible Spatial controls Before this PR the only way to query lat/long is in the shape of 2 columns that contains lat and long. Now we're adding 2 more options: * a single column that has lat and long with a delimiter in between * support for geohashes - geohashes are cool --- setup.py | 1 + superset/__init__.py | 2 +- .../javascripts/components/PopoverSection.jsx | 2 +- .../components/controls/DateFilterControl.jsx | 2 +- .../components/controls/SelectControl.jsx | 4 +- .../components/controls/SpatialControl.jsx | 222 ++++++++++++++++++ .../explore/components/controls/index.js | 2 + .../javascripts/explore/stores/controls.jsx | 10 + .../javascripts/explore/stores/visTypes.js | 32 +-- superset/assets/stylesheets/superset.less | 6 + superset/connectors/druid/models.py | 56 +++-- superset/data/__init__.py | 44 +++- .../67a6ac9b727b_update_spatial_params.py | 51 ++++ superset/viz.py | 62 ++++- tests/druid_func_tests.py | 2 +- 15 files changed, 433 insertions(+), 65 deletions(-) create mode 100644 superset/assets/javascripts/explore/components/controls/SpatialControl.jsx create mode 100644 superset/migrations/versions/67a6ac9b727b_update_spatial_params.py diff --git a/setup.py b/setup.py index b50cf28cc9a4..8152c9c0d766 100644 --- a/setup.py +++ b/setup.py @@ -58,6 +58,7 @@ def get_git_sha(): 'flask-wtf==0.14.2', 'flower==0.9.1', 'future>=0.16.0, <0.17', + 'python-geohash==0.8.5', 'humanize==0.5.1', 'gunicorn==19.7.1', 'idna==2.5', diff --git a/superset/__init__.py b/superset/__init__.py index c96fea8d572c..9ef665ecca89 100644 --- a/superset/__init__.py +++ b/superset/__init__.py @@ -42,7 +42,7 @@ def parse_manifest_json(): with open(MANIFEST_FILE, 'r') as f: manifest = json.load(f) except Exception: - print('no manifest file found at ' + MANIFEST_FILE) + pass def get_manifest_file(filename): diff --git a/superset/assets/javascripts/components/PopoverSection.jsx b/superset/assets/javascripts/components/PopoverSection.jsx index 149036681b97..2142679ef57a 100644 --- a/superset/assets/javascripts/components/PopoverSection.jsx +++ b/superset/assets/javascripts/components/PopoverSection.jsx @@ -23,7 +23,7 @@ export default function PopoverSection({ title, isSelected, children, onSelect,   -
+
{children}
); diff --git a/superset/assets/javascripts/explore/components/controls/DateFilterControl.jsx b/superset/assets/javascripts/explore/components/controls/DateFilterControl.jsx index fc365bd0f7b3..98db93a7e77d 100644 --- a/superset/assets/javascripts/explore/components/controls/DateFilterControl.jsx +++ b/superset/assets/javascripts/explore/components/controls/DateFilterControl.jsx @@ -91,7 +91,7 @@ export default class DateFilterControl extends React.Component { renderPopover() { return ( -
+
{}, + animation: true, + choices: [], +}; + +export default class SpatialControl extends React.Component { + constructor(props) { + super(props); + const v = props.value || {}; + let defaultCol; + if (props.choices.length > 0) { + defaultCol = props.choices[0][0]; + } + this.state = { + type: v.type || spatialTypes.latlong, + delimiter: v.delimiter || ',', + latCol: v.latCol || defaultCol, + lonCol: v.lonCol || defaultCol, + lonlatCol: v.lonlatCol || defaultCol, + reverseCheckbox: v.reverseCheckbox || false, + geohashCol: v.geohashCol || defaultCol, + value: null, + errors: [], + }; + this.onDelimiterChange = this.onDelimiterChange.bind(this); + this.toggleCheckbox = this.toggleCheckbox.bind(this); + this.onChange = this.onChange.bind(this); + } + componentDidMount() { + this.onChange(); + } + onChange() { + const type = this.state.type; + const value = { type }; + const errors = []; + const errMsg = t('Invalid lat/long configuration.'); + if (type === spatialTypes.latlong) { + value.latCol = this.state.latCol; + value.lonCol = this.state.lonCol; + if (!value.lonCol || !value.latCol) { + errors.push(errMsg); + } + } else if (type === spatialTypes.delimited) { + value.lonlatCol = this.state.lonlatCol; + value.delimiter = this.state.delimiter; + value.reverseCheckbox = this.state.reverseCheckbox; + if (!value.lonlatCol || !value.delimiter) { + errors.push(errMsg); + } + } else if (type === spatialTypes.geohash) { + value.geohashCol = this.state.geohashCol; + if (!value.geohashCol) { + errors.push(errMsg); + } + } + this.setState({ value, errors }); + this.props.onChange(value, errors); + } + onDelimiterChange(event) { + this.setState({ delimiter: event.target.value }, this.onChange); + } + setType(type) { + this.setState({ type }, this.onChange); + } + close() { + this.refs.trigger.hide(); + } + toggleCheckbox() { + this.setState({ reverseCheckbox: !this.state.reverseCheckbox }, this.onChange); + } + renderLabelContent() { + if (this.state.errors.length > 0) { + return 'N/A'; + } + if (this.state.type === spatialTypes.latlong) { + return `${this.state.lonCol} | ${this.state.latCol}`; + } else if (this.state.type === spatialTypes.delimited) { + return `${this.state.lonlatCol}`; + } else if (this.state.type === spatialTypes.geohash) { + return `${this.state.geohashCol}`; + } + return null; + } + renderSelect(name, type) { + return ( + { + this.setType(type); + }} + onChange={(value) => { + this.setState({ [name]: value }, this.onChange); + }} + /> + ); + } + renderPopover() { + return ( + +
+ + + + Longitude + {this.renderSelect('lonCol', spatialTypes.latlong)} + + + Latitude + {this.renderSelect('latCol', spatialTypes.latlong)} + + + + + + + Column + {this.renderSelect('lonlatCol', spatialTypes.delimited)} + + + Delimiter + + + +
+ {t('Reverse lat/long ')} + +
+
+ + + + Column + {this.renderSelect('geohashCol', spatialTypes.geohash)} + + + +
+ +
+
+
+ ); + } + render() { + return ( +
+ + + + +
+ ); + } +} + +SpatialControl.propTypes = propTypes; +SpatialControl.defaultProps = defaultProps; diff --git a/superset/assets/javascripts/explore/components/controls/index.js b/superset/assets/javascripts/explore/components/controls/index.js index 094a26b32318..94b8c66ef606 100644 --- a/superset/assets/javascripts/explore/components/controls/index.js +++ b/superset/assets/javascripts/explore/components/controls/index.js @@ -10,6 +10,7 @@ import FixedOrMetricControl from './FixedOrMetricControl'; import HiddenControl from './HiddenControl'; import SelectAsyncControl from './SelectAsyncControl'; import SelectControl from './SelectControl'; +import SpatialControl from './SpatialControl'; import TextAreaControl from './TextAreaControl'; import TextControl from './TextControl'; import TimeSeriesColumnControl from './TimeSeriesColumnControl'; @@ -29,6 +30,7 @@ const controlMap = { HiddenControl, SelectAsyncControl, SelectControl, + SpatialControl, TextAreaControl, TextControl, TimeSeriesColumnControl, diff --git a/superset/assets/javascripts/explore/stores/controls.jsx b/superset/assets/javascripts/explore/stores/controls.jsx index 810c11d8c96b..b18039f232aa 100644 --- a/superset/assets/javascripts/explore/stores/controls.jsx +++ b/superset/assets/javascripts/explore/stores/controls.jsx @@ -482,6 +482,16 @@ export const controls = { }), }, + spatial: { + type: 'SpatialControl', + label: t('Longitude & Latitude'), + validators: [v.nonEmpty], + description: t('Point to your spatial columns'), + mapStateToProps: state => ({ + choices: (state.datasource) ? state.datasource.all_cols : [], + }), + }, + longitude: { type: 'SelectControl', label: t('Longitude'), diff --git a/superset/assets/javascripts/explore/stores/visTypes.js b/superset/assets/javascripts/explore/stores/visTypes.js index 7bae7f2dbd4a..a243cbfd2d9c 100644 --- a/superset/assets/javascripts/explore/stores/visTypes.js +++ b/superset/assets/javascripts/explore/stores/visTypes.js @@ -346,9 +346,8 @@ export const visTypes = { label: t('Query'), expanded: true, controlSetRows: [ - ['longitude', 'latitude'], - ['groupby', 'size'], - ['row_limit'], + ['spatial', 'size'], + ['groupby', 'row_limit'], ], }, { @@ -364,7 +363,6 @@ export const visTypes = { size: { label: t('Height'), description: t('Metric used to control height'), - validators: [v.nonEmpty], }, }, }, @@ -377,9 +375,8 @@ export const visTypes = { label: t('Query'), expanded: true, controlSetRows: [ - ['longitude', 'latitude'], - ['groupby', 'size'], - ['row_limit'], + ['spatial', 'size'], + ['groupby', 'row_limit'], ], }, { @@ -408,9 +405,8 @@ export const visTypes = { label: t('Query'), expanded: true, controlSetRows: [ - ['longitude', 'latitude'], - ['groupby', 'size'], - ['row_limit'], + ['spatial', 'size'], + ['groupby', 'row_limit'], ], }, { @@ -443,9 +439,8 @@ export const visTypes = { label: t('Query'), expanded: true, controlSetRows: [ - ['longitude', 'latitude'], - ['groupby'], - ['row_limit'], + ['spatial', null], + ['groupby', 'row_limit'], ], }, { @@ -470,18 +465,13 @@ export const visTypes = { }, ], controlOverrides: { - all_columns_x: { - label: t('Longitude Column'), - validators: [v.nonEmpty], - }, - all_columns_y: { - label: t('Latitude Column'), - validators: [v.nonEmpty], - }, dimension: { label: t('Categorical Color'), description: t('Pick a dimension from which categorical colors are defined'), }, + size: { + validators: [], + }, }, }, diff --git a/superset/assets/stylesheets/superset.less b/superset/assets/stylesheets/superset.less index 6f6b50282c19..d86ad74b8218 100644 --- a/superset/assets/stylesheets/superset.less +++ b/superset/assets/stylesheets/superset.less @@ -249,9 +249,15 @@ table.table-no-hover tr:hover { .m-t-5 { margin-top: 5px; } +.m-t-10 { + margin-top: 10px; +} .m-l-5 { margin-left: 5px; } +.m-l-10 { + margin-left: 10px; +} .m-l-25 { margin-left: 25px; } diff --git a/superset/connectors/druid/models.py b/superset/connectors/druid/models.py index a666253f26c0..0b150686c63b 100644 --- a/superset/connectors/druid/models.py +++ b/superset/connectors/druid/models.py @@ -988,6 +988,19 @@ def get_dimensions(self, groupby, columns_dict): dimensions.append(column_name) return dimensions + def intervals_from_dttms(self, from_dttm, to_dttm): + # Couldn't find a way to just not filter on time... + from_dttm = from_dttm or datetime(1901, 1, 1) + to_dttm = to_dttm or datetime(2101, 1, 1) + + # add tzinfo to native datetime with config + from_dttm = from_dttm.replace(tzinfo=DRUID_TZ) + to_dttm = to_dttm.replace(tzinfo=DRUID_TZ) + return '{}/{}'.format( + from_dttm.isoformat() if from_dttm else '', + to_dttm.isoformat() if to_dttm else '', + ) + def run_query( # noqa / druid self, groupby, metrics, @@ -1001,23 +1014,26 @@ def run_query( # noqa / druid inner_from_dttm=None, inner_to_dttm=None, orderby=None, extras=None, # noqa - select=None, # noqa columns=None, phase=2, client=None, form_data=None, order_desc=True): """Runs a query against Druid and returns a dataframe. """ # TODO refactor into using a TBD Query object client = client or self.cluster.get_pydruid_client() + row_limit = row_limit or conf.get('ROW_LIMIT') if not is_timeseries: granularity = 'all' + + if ( + granularity == 'all' or + timeseries_limit is None or + timeseries_limit == 0): + phase = 1 inner_from_dttm = inner_from_dttm or from_dttm inner_to_dttm = inner_to_dttm or to_dttm - # add tzinfo to native datetime with config - from_dttm = from_dttm.replace(tzinfo=DRUID_TZ) - to_dttm = to_dttm.replace(tzinfo=DRUID_TZ) - timezone = from_dttm.tzname() + timezone = from_dttm.tzname() if from_dttm else None query_str = '' metrics_dict = {m.metric_name: m for m in self.metrics} @@ -1043,7 +1059,7 @@ def run_query( # noqa / druid origin=extras.get('druid_time_origin'), ), post_aggregations=post_aggs, - intervals=from_dttm.isoformat() + '/' + to_dttm.isoformat(), + intervals=self.intervals_from_dttms(from_dttm, to_dttm), ) filters = DruidDatasource.get_filters(filter, self.num_cols) @@ -1056,14 +1072,22 @@ def run_query( # noqa / druid order_direction = 'descending' if order_desc else 'ascending' - if len(groupby) == 0 and not having_filters: + if columns: + del qry['post_aggregations'] + del qry['aggregations'] + qry['dimensions'] = columns + qry['metrics'] = [] + qry['granularity'] = 'all' + qry['limit'] = row_limit + client.scan(**qry) + elif len(groupby) == 0 and not having_filters: logging.info('Running timeseries query for no groupby values') del qry['dimensions'] client.timeseries(**qry) elif ( - not having_filters and - len(groupby) == 1 and - order_desc + not having_filters and + len(groupby) == 1 and + order_desc ): dim = list(qry.get('dimensions'))[0] logging.info('Running two-phase topn query for dimension [{}]'.format(dim)) @@ -1121,9 +1145,8 @@ def run_query( # noqa / druid pre_qry['limit_spec'] = { 'type': 'default', 'limit': min(timeseries_limit, row_limit), - 'intervals': ( - inner_from_dttm.isoformat() + '/' + - inner_to_dttm.isoformat()), + 'intervals': self.intervals_from_dttms( + inner_from_dttm, inner_to_dttm), 'columns': [{ 'dimension': order_by, 'direction': order_direction, @@ -1195,8 +1218,11 @@ def query(self, query_obj): cols = [] if DTTM_ALIAS in df.columns: cols += [DTTM_ALIAS] - cols += [col for col in query_obj['groupby'] if col in df.columns] - cols += [col for col in query_obj['metrics'] if col in df.columns] + cols += query_obj.get('groupby') or [] + cols += query_obj.get('columns') or [] + cols += query_obj.get('metrics') or [] + + cols = [col for col in cols if col in df.columns] df = df[cols] time_offset = DruidDatasource.time_offset(query_obj['granularity']) diff --git a/superset/data/__init__.py b/superset/data/__init__.py index f534914038e0..742a32b4c562 100644 --- a/superset/data/__init__.py +++ b/superset/data/__init__.py @@ -13,6 +13,7 @@ import pandas as pd from sqlalchemy import BigInteger, Date, DateTime, Float, String +import geohash from superset import app, db, utils from superset.connectors.connector_registry import ConnectorRegistry @@ -1017,6 +1018,9 @@ def load_long_lat_data(): pdf['date'] = datetime.datetime.now().date() pdf['occupancy'] = [random.randint(1, 6) for _ in range(len(pdf))] pdf['radius_miles'] = [random.uniform(1, 3) for _ in range(len(pdf))] + pdf['geohash'] = pdf[['LAT', 'LON']].apply( + lambda x: geohash.encode(*x), axis=1) + pdf['delimited'] = pdf['LAT'].map(str).str.cat(pdf['LON'].map(str), sep=',') pdf.to_sql( # pylint: disable=no-member 'long_lat', db.engine, @@ -1036,6 +1040,8 @@ def load_long_lat_data(): 'date': Date(), 'occupancy': Float(), 'radius_miles': Float(), + 'geohash': String(12), + 'delimited': String(60), }, index=False) print("Done loading table!") @@ -1233,8 +1239,11 @@ def load_deck_dash(): slices = [] tbl = db.session.query(TBL).filter_by(table_name='long_lat').first() slice_data = { - "longitude": "LON", - "latitude": "LAT", + "spatial": { + "type": "latlong", + "lonCol": "LON", + "latCol": "LAT", + }, "color_picker": { "r": 205, "g": 0, @@ -1281,8 +1290,11 @@ def load_deck_dash(): "point_unit": "square_m", "filters": [], "row_limit": 5000, - "longitude": "LON", - "latitude": "LAT", + "spatial": { + "type": "latlong", + "lonCol": "LON", + "latCol": "LAT", + }, "mapbox_style": "mapbox://styles/mapbox/dark-v9", "granularity_sqla": "date", "size": "count", @@ -1290,10 +1302,12 @@ def load_deck_dash(): "since": "2014-01-01", "point_radius": "Auto", "until": "now", - "color_picker": {"a": 1, - "r": 14, - "b": 0, - "g": 255}, + "color_picker": { + "a": 1, + "r": 14, + "b": 0, + "g": 255, + }, "grid_size": 20, "where": "", "having": "", @@ -1321,10 +1335,13 @@ def load_deck_dash(): slices.append(slc) slice_data = { + "spatial": { + "type": "latlong", + "lonCol": "LON", + "latCol": "LAT", + }, "filters": [], "row_limit": 5000, - "longitude": "LON", - "latitude": "LAT", "mapbox_style": "mapbox://styles/mapbox/streets-v9", "granularity_sqla": "date", "size": "count", @@ -1367,10 +1384,13 @@ def load_deck_dash(): slices.append(slc) slice_data = { + "spatial": { + "type": "latlong", + "lonCol": "LON", + "latCol": "LAT", + }, "filters": [], "row_limit": 5000, - "longitude": "LON", - "latitude": "LAT", "mapbox_style": "mapbox://styles/mapbox/satellite-streets-v9", "granularity_sqla": "date", "size": "count", diff --git a/superset/migrations/versions/67a6ac9b727b_update_spatial_params.py b/superset/migrations/versions/67a6ac9b727b_update_spatial_params.py new file mode 100644 index 000000000000..b3e81d6e9644 --- /dev/null +++ b/superset/migrations/versions/67a6ac9b727b_update_spatial_params.py @@ -0,0 +1,51 @@ +"""update_spatial_params + +Revision ID: 67a6ac9b727b +Revises: 4736ec66ce19 +Create Date: 2017-12-08 08:19:21.148775 + +""" +import json + +from alembic import op +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy import Column, Integer, String, Text + +from superset import db + +# revision identifiers, used by Alembic. +revision = '67a6ac9b727b' +down_revision = '4736ec66ce19' + +Base = declarative_base() + + +class Slice(Base): + __tablename__ = 'slices' + id = Column(Integer, primary_key=True) + viz_type = Column(String(250)) + params = Column(Text) + + +def upgrade(): + bind = op.get_bind() + session = db.Session(bind=bind) + + for slc in session.query(Slice).filter(Slice.viz_type.like('deck_%')): + params = json.loads(slc.params) + if params.get('latitude'): + params['spatial'] = { + 'lonCol': params.get('longitude'), + 'latCol': params.get('latitude'), + 'type': 'latlong', + } + del params['latitude'] + del params['longitude'] + slc.params = json.dumps(params) + session.merge(slc) + session.commit() + session.close() + + +def downgrade(): + pass diff --git a/superset/viz.py b/superset/viz.py index 0eeed238b6ff..2ea85d26a8b2 100644 --- a/superset/viz.py +++ b/superset/viz.py @@ -22,6 +22,7 @@ from dateutil import relativedelta as rdelta from flask import request from flask_babel import lazy_gettext as _ +import geohash from markdown import markdown import numpy as np import pandas as pd @@ -1790,7 +1791,7 @@ class BaseDeckGLViz(BaseViz): def get_metrics(self): self.metric = self.form_data.get('size') - return [self.metric] + return [self.metric] if self.metric else [] def get_properties(self, d): return { @@ -1799,22 +1800,60 @@ def get_properties(self, d): def get_position(self, d): return [ - d.get(self.form_data.get('longitude')), - d.get(self.form_data.get('latitude')), + d.get('lon'), + d.get('lat'), ] def query_obj(self): d = super(BaseDeckGLViz, self).query_obj() fd = self.form_data - d['groupby'] = [fd.get('longitude'), fd.get('latitude')] + gb = [] + + spatial = fd.get('spatial') + if spatial.get('type') == 'latlong': + gb += [spatial.get('lonCol')] + gb += [spatial.get('latCol')] + elif spatial.get('type') == 'delimited': + gb += [spatial.get('lonlatCol')] + elif spatial.get('type') == 'geohash': + gb += [spatial.get('geohashCol')] + if fd.get('dimension'): - d['groupby'] += [fd.get('dimension')] + gb += [fd.get('dimension')] - d['metrics'] = self.get_metrics() + metrics = self.get_metrics() + if metrics: + d['groupby'] = gb + d['metrics'] = self.get_metrics() + else: + d['columns'] = gb return d def get_data(self, df): + fd = self.form_data + spatial = fd.get('spatial') + if spatial.get('type') == 'latlong': + df = df.rename(columns={ + spatial.get('lonCol'): 'lon', + spatial.get('latCol'): 'lat'}) + elif spatial.get('type') == 'delimited': + cols = ['lon', 'lat'] + if spatial.get('reverseCheckbox'): + cols.reverse() + df[cols] = ( + df[spatial.get('lonlatCol')] + .str + .split(spatial.get('delimiter'), expand=True) + .astype(np.float64) + ) + del df[spatial.get('lonlatCol')] + elif spatial.get('type') == 'geohash': + latlong = df[spatial.get('geohashCol')].map(geohash.decode) + df['lat'] = latlong.apply(lambda x: x[0]) + df['lon'] = latlong.apply(lambda x: x[1]) + del df['geohash'] + features = [] for d in df.to_dict(orient='records'): d = dict(position=self.get_position(d), **self.get_properties(d)) @@ -1833,15 +1872,15 @@ class DeckScatterViz(BaseDeckGLViz): verbose_name = _('Deck.gl - Scatter plot') def query_obj(self): - self.point_radius_fixed = self.form_data.get('point_radius_fixed') + fd = self.form_data + self.point_radius_fixed = ( + fd.get('point_radius_fixed') or {'type': 'fix', 'value': 500}) return super(DeckScatterViz, self).query_obj() def get_metrics(self): if self.point_radius_fixed.get('type') == 'metric': - self.metric = self.point_radius_fixed.get('value') - else: - self.metric = 'count' - return [self.metric] + return [self.point_radius_fixed.get('value')] + return None def get_properties(self, d): return { @@ -1856,7 +1895,6 @@ def get_data(self, df): self.dim = self.form_data.get('dimension') if self.point_radius_fixed.get('type') != 'metric': self.fixed_value = self.point_radius_fixed.get('value') - return super(DeckScatterViz, self).get_data(df) diff --git a/tests/druid_func_tests.py b/tests/druid_func_tests.py index 74da48665f58..3deb3e29afb2 100644 --- a/tests/druid_func_tests.py +++ b/tests/druid_func_tests.py @@ -200,7 +200,7 @@ def test_run_query_single_groupby(self): client.query_builder.last_query.query_dict = {'mock': 0} # client.topn is called twice ds.run_query( - groupby, metrics, None, from_dttm, to_dttm, row_limit=100, + groupby, metrics, None, from_dttm, to_dttm, timeseries_limit=100, client=client, order_desc=True, filter=[], ) self.assertEqual(2, len(client.topn.call_args_list))