MixedDatasource: stream results and keep track each sub-request #16676

ryantxu · 2019-04-18T17:05:57Z

This PR changes MixedDatasource so that if all requests are pointing to the same datasource, it will incrementally stream back results as they come in.

If the queries are really mixed (ie multiple datasources) they will behave identical to before.

Here is a test dashboard:

dashboard.json

{
  "__inputs": [
    {
      "name": "DS_GDEV-TESTDATA",
      "label": "gdev-testdata",
      "description": "",
      "type": "datasource",
      "pluginId": "testdata",
      "pluginName": "TestData DB"
    }
  ],
  "__requires": [
    {
      "type": "grafana",
      "id": "grafana",
      "name": "Grafana",
      "version": "6.4.0-pre"
    },
    {
      "type": "panel",
      "id": "graph",
      "name": "Graph",
      "version": ""
    },
    {
      "type": "datasource",
      "id": "testdata",
      "name": "TestData DB",
      "version": "1.0.0"
    }
  ],
  "annotations": {
    "list": [
      {
        "builtIn": 1,
        "datasource": "-- Grafana --",
        "enable": true,
        "hide": true,
        "iconColor": "rgba(0, 211, 255, 1)",
        "name": "Annotations & Alerts",
        "type": "dashboard"
      }
    ]
  },
  "editable": true,
  "gnetId": null,
  "graphTooltip": 0,
  "id": null,
  "links": [],
  "panels": [
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "-- Mixed --",
      "fill": 1,
      "fillGradient": 0,
      "gridPos": {
        "h": 6,
        "w": 12,
        "x": 0,
        "y": 0
      },
      "id": 2,
      "legend": {
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "show": true,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 1,
      "nullPointMode": "null",
      "options": {
        "dataLinks": []
      },
      "percentage": false,
      "pointradius": 2,
      "points": false,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "alias": "1s",
          "datasource": "${DS_GDEV-TESTDATA}",
          "refId": "A",
          "scenarioId": "slow_query",
          "stringInput": "1s"
        },
        {
          "alias": "2s",
          "datasource": "${DS_GDEV-TESTDATA}",
          "refId": "B",
          "scenarioId": "slow_query",
          "stringInput": "2s"
        },
        {
          "alias": "5s",
          "datasource": "${DS_GDEV-TESTDATA}",
          "refId": "C",
          "scenarioId": "slow_query",
          "stringInput": "5s"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "Slow Queries w/ streaming results (mixed)",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        },
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "${DS_GDEV-TESTDATA}",
      "fill": 1,
      "fillGradient": 0,
      "gridPos": {
        "h": 6,
        "w": 12,
        "x": 12,
        "y": 0
      },
      "id": 3,
      "legend": {
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "show": true,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 1,
      "nullPointMode": "null",
      "options": {
        "dataLinks": []
      },
      "percentage": false,
      "pointradius": 2,
      "points": false,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "alias": "1s",
          "refId": "A",
          "scenarioId": "slow_query",
          "stringInput": "1s"
        },
        {
          "alias": "2s",
          "refId": "B",
          "scenarioId": "slow_query",
          "stringInput": "2s"
        },
        {
          "alias": "5s",
          "refId": "C",
          "scenarioId": "slow_query",
          "stringInput": "5s"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "Slow queries (no streaming)",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        },
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    }
  ],
  "schemaVersion": 19,
  "style": "dark",
  "tags": [],
  "templating": {
    "list": []
  },
  "time": {
    "from": "now-6h",
    "to": "now"
  },
  "timepicker": {},
  "timezone": "",
  "title": "mixed slow queries",
  "uid": "qVmff8dWk",
  "version": 6
}

…into stream-mixed-query * origin/query-runner-shutdown-callback: always return something

* grafana/master: AppPlugin: avoid app plugin navigation slowness (grafana#16675)

* grafana/master: Chore: remove extra logging (grafana#16688)

torkelo

Looking really good! not sure the throttling is working 100%, with 5 queries that return 1 series each, and viz repeated gauges I get first a single no data render, then one series rendering, then the rest, so is a bit flickery.

* grafana/master: build: restore postgres integration tests (grafana#16801) docs: explain correct access control model of GCS buckets (grafana#16792) Chore: Fixed no implicit any Typescript errors (grafana#16799) Feature: introduce LdapActiveSyncEnabled setting (grafana#16787) Plugins: ReactPanelPlugin to VizPanelPlugin (grafana#16779) UX: Improve Grafana usage for smaller screens (grafana#16783) ThresholdEditor: Minor style fix for smaller screens (grafana#16791) Build: Use isolated modules for ts-jest (grafana#16786) LDAP Refactoring to support syncronizing more than one user at a time. (grafana#16705) build: removes unused vendored files (fix/explore): remove vertical-align looks better for long logs (grafana#16736) Chore: bump jQuery to 3.4.0 in grafana/ui (grafana#16781) Devenv: Updated home dashboard and added new influxdb test dashboard Chore: Lowered implicit anys limit to 5946 RefreshPicker: minor design update (grafana#16774)

* grafana/master: Auth: Enable retries and transaction for some db calls for auth tokens (grafana#16785) Provisioning: Show file path of provisioning file in save/delete dialogs (grafana#16706) Add tracing headers for prometheus datasource (grafana#16724) Config: Fixes bug where timeouts for alerting was not parsed correctly (grafana#16784) build: removes gopkg files from dev docker file (grafana#16817) Provisioning: Trying to fix failing test (grafana#16800) Table: React table fix rotate support in storybook (grafana#16816) TestData: add log level in dummy message (grafana#16815) removes gopkg.lock from root folder Explore: Support user timezone (grafana#16469) Plugins: rename vizPlugin to panelPlugin (grafana#16802) Plugins: move app/feature/plugin properties into PluginMeta (grafana#16809) Plugins: move PanelPluginMeta to grafana/ui (grafana#16804) Plugins: move datasource specific meta out of the main meta type (grafana#16803) updates changelog for 6.1.6 Fix: Fetch histogram series from other api route (grafana#16768) phantomjs: set web-security to true Chore: Lowered implicit anys limit to 5668

* grafana/master: FormLabel: allow any rather than just a string for tooltip (grafana#16841) prometheus: fix regression after adding support for tracing headers (grafana#16829) area/circleci: Speed up circleci build process for branches and pr (grafana#16778) DataProxy: Restore Set-Cookie header after proxy request (grafana#16838) docs: clarify page parameter version support for folder/dashboard search (grafana#16836) Chore: revise some of the gosec rules (grafana#16713) Refactor: consistant plugin/meta usage (grafana#16834) Explore: Use SeriesData format for loki/logs (grafana#16793) Refactor: move NavModel to @grafana/ui (grafana#16813)

* grafana/master: Docker: switch docker image to alpine base with phantomjs support (grafana#18468) Backend: Adds support for HTTP/2 (grafana#18358) Explore: Fixes error when switching from prometheus to loki data sources (grafana#18599) TimePicker: Set time to to 23:59:59 when setting To time using calendar (grafana#18595) Prometheus: Return labels in query results (grafana#18535) Docs: Update changelog and docs for annotation region change (grafana#18593) Refactor: move KeyValue and deprecation warning to @grafana/data (grafana#18582) Annotations: use a single row to represent a region (grafana#17673) Docs: Update upgrading guide (grafana#18547) Docs: Adds tests requirement to bugs checklist (grafana#18576)

* grafana/master: SingleStat: add a gauge migration call to action button in the editor (grafana#18604) Build: update revive dependency (grafana#18585) LDAP: multildap + ldap integration (grafana#18588)

* grafana/master: Rewrite user profile edit to react (grafana#17917) Docs: remove codecov badge (grafana#18623) Prometheus: Prevents panel editor crash when switching to Prometheus datasource (grafana#18616) Chore: Rename Popper to Popover (grafana#18543)

hugohaggmark · 2019-08-20T12:32:39Z

Without digging very deep into this, I think this could replace the current Prometheus delta implementation in Explore. Great work!

ryantxu · 2019-08-21T00:50:32Z

Some UX issues we need to think about after this is merged:

the loading icon when panel state is streaming... maybe a solid circle? green circle?
when some series are loading in the graph, it would be cool to have the loading state in the legend. This is maybe something we could think about after POC: Testing using new legend in old graph #17970

* grafana/master: SingleStat: use DataFrame results rather than TimeSeries/TableData (grafana#18580) TestData: attach labels to series results (grafana#18653) Singlestat: Disable new singlestat gauge usage (grafana#18610) Explore: Fixes query field layout in splitted view for Safari browsers (grafana#18654) MSI: new long file names are causing error building MSI (grafana#18646) Auth: change the error HTTP status codes (grafana#18584) Refactor: EmptyListCTA (grafana#18516) Build: Upgrade to go 1.12.9 (grafana#18638) Chore: Revert React 16.9.0 bump (grafana#18634) Azure Monitor and Log Analytics converted and separated into components (grafana#18259)

aocenas · 2019-08-22T12:46:57Z

public/app/plugins/datasource/mixed/MixedDatasource.ts

+
+    // When multiple datasources are used, merge their results
+    // after all queries have finished.  (<=6.3 behavior)
+    const sets = groupBy(queries, 'datasource');


So this is one of the thing why I do not like this API very much. The behaviour of this function changes drastically based on whether there are multiple datasources or just one, which you as a caller do not see until you go into the params. This means you would need to always expect both promise and a stream just to be sure which then makes observer non optional for most cases which we cannot put in the type because then it would break backward compatibility. Right now this feels a bit like if the interface was callFunction('functionName') and you need to know internal implementation of that call to know what it will do and how to handle that.

Would it make more sense if the api looked like this

async query(request: DataQueryRequest<DataQuery>): Observable<DataQueryResponse>

This way you would always expect a stream of data, either from a single request or from multiple requests or live streaming.

Or do we want to avoid the RxJs dependency?

I think we tried something similar in #16714

Using the old (v3-v6) streaming support was handled by returning a Subject. But think it's tricky to just return an observable of DataQueryResponse, think it would have to be more of a Observable of DataStreamState or something inspired by that or expand DataQueryResponse with all the things needed to track streaming events (key & source request).

Well I think right now with the observer callback we already push the data there as an events with key for the source request if I am not mistaken. We could still be returning that in one observable, or possibly returning Observable of Observables, with one stream per source request that you can flatten if needed. Something like Observable<Observable<DataQueryResponse> | DataQueryResponse> maybe, though you would need to assert the type on the way back based on what you sent in which I am not that fond of.

Good points, I think that having more meta data like ((key & source request) and DataQueryResponse would be a simpler solution. The only thing I would be interested in as a subscriber of the stream are the actual "address" of the data not if there are one or multiple streams or if it's live streaming.

aocenas · 2019-08-22T12:49:57Z

public/app/plugins/datasource/mixed/MixedDatasource.ts

+      .query(request, observer)
+      .then(res => {
+        request.endTime = Date.now();
+        event.state = LoadingState.Done;


How do we know we are done here? What if the datasource returns a promise and keeps pushing data through the observer?

in the panelQueryRunner it checks that the events came from the last request. if not, it unsubscribes.

panelQueryRunner reports done when the promise and all stream keys report they are done

ryantxu · 2019-08-23T06:31:10Z

I like the idea of trying something like

async observe(request: DataQueryRequest<DataQuery>): Observable<DataStreamState>

note

it would return the state and not the direct response
the function needs to be have a different name than query since that is taken and needs to exist for backwards compatibility

BUT -- I think improving the DataSourceAPI streaming can and should be a different issue than this one. The bulk of this is figuring out how we would identify subrequest in the state and updating the UI to reflect if they are running.

* grafana/master: Heatmap: Add Cividis and Turbo color schemes (grafana#18710) Units: add counts/sec (cps) and counts/min (cpm) in Throughput (grafana#18702) Dev Docker: Use golang:1.12.9-alpine to prevent glibc mismatch. (grafana#18701) Docs: Fix broken link for the Grafana on RHEL or Ubuntu tutorial (grafana#18697) Fixes several usability issues with QueryField component (grafana#18681) convert teams section of user profile to react (grafana#18633) Singlestat/Gauge/BarGauge: Improvements to decimals logic and added test dashboard (grafana#18676) Emails: Change text (grafana#18683) Streaming: improve JSDocs for DataSourceAPI streaming support (grafana#18672) TimeSrv: Enable value time windowing in TimeSrv (grafana#18636) Explore: Fixes so Show context shows results again (grafana#18675) Graph: Updated y-axis ticks test dashboard (grafana#18677) Add typings to package.json in packages (grafana#18640) Plugins: better warning when plugins fail to load (grafana#18671) SingleStat2: save options to defaults not override (grafana#18666) Packages: Fix path import from grafana/data (grafana#18667)

aocenas · 2019-08-27T07:32:32Z

public/app/plugins/datasource/mixed/MixedDatasource.ts

+    // If there is only one datasource used, make multiple requests
+    // and stream the results.  (new in 6.4)
+    const ds = await getDatasourceSrv().get(queries[0].datasource);
+    request.subRequests = [];


Are we actually modifying the request we got as a param here? To signal something to the caller? If that is the case, I really, really don't like that. Also few questions for the subrequests:

Is this a one level of subrequests? If so is that some arbitrary limit or just because of how mixed DS works?

Why subrequests here at all? I would kinda expect the grouping of queries be transparent to the caller. Something along the lines of getting queries, group by DS so they can be sent together so DS can do any optimisation. Ungroup them on the way out so that caller does not need to know about the grouping at all.

If that is the case, I really, really don't like that.

Good point -- I will find some other way

Is this a one level of subrequests? If so is that some arbitrary limit or just because of how mixed DS works?

It is not limited. subRequests can also make subRequests, the only constraint is that the requestId for sub-requests needs to start with the id of its parent.

Why subrequests here at all?

What you describe is exactly the behavior you get when there are multiple datasources selected -- if you are actually using mixed queries! Sub-requests are only triggered if you use mixed datasource, but everything points to a single datasource.

I agree the semantics may be akward -- but not terribly so. My goal is using explicit sub-requets in external datasources, but we need something in core to be able to clearly see and test this behavior. It is already used in Prometheus, but there is no status reporting.

What you describe is exactly the behavior you get when there are multiple ...

My main point is more about how this is leaked to the caller. Like caller probably does not need to know this? I think getting a different streams with unique ID is enough and it does not need to know about whether they are hierarchical in some way. Especially that from the UI perspective there isn't really a hierarchy in the queries.

So if this is only internal structuring it is fine (also the ID scheme is fine), but we do request.subRequests.push(sub); later on. So this seems again like a mutation of param to signal some info upstream. So apart from problem with param mutation, I think caller does not really need that information anyway. Or is there an usecase when it is necessary?

I agree with this, think leaking the concept of subRequests might be unnecessary. And could be an internal detail in the mixed data source. Question is how to communicate the state of individual queries sent in the request to the data source. Maybe we can do this with the refId that is part of the DataFrame result?

There is DataStreamState.key property so I assume that is the unique identifier of stream for the consumer? query.refId is the one that is passed to it here.

* grafana/master: Explore: Use PanelQueryState to handle querying (grafana#18694) Chore: Improve err message for notifications (grafana#18757) @grafana/toolkit: add package versions to the ci report (grafana#18751) @grafana/data: Matchers and Transforms (grafana#16756) Docs: Document LDAP config reload in admin http api (grafana#18739) center NoDataSourceCallToActionCard in Explore (grafana#18752) DataLinks: enable data links in Gauge, BarGauge and SingleStat2 panel (grafana#18605) DashboardDatasource: reuse query results within a dashboard (grafana#16660) Plugins: show a clear error on the plugin page when it failed to load (grafana#18733) Chore: Use ruleId instead of alertId as log keyword (grafana#18738) @grafana/data: improve the CircularVector api (grafana#18716) QueryEditor: check if optional func toggleEditorMode is provided (grafana#18705) Emails: remove the yarn.lock (grafana#18724) OAuth: Support JMES path lookup when retrieving user email (grafana#14683) Emails: resurrect template notification (grafana#18686) Email: add reply-to and direct attachment (grafana#18715) Dashboard: Adds Logs Panel (alpha) as visualization option for Dashboards (grafana#18641)

ryantxu · 2019-10-24T13:44:34Z

closing this because we have a good solution now

ryantxu added 22 commits April 18, 2019 09:57

mixed datasource

3872e49

rename

a3b1ef7

more testing

321611c

datasource requestId

faf6219

destroy() function

7e798ae

pass datasource

3c04ff8

add optional requestId as meta on each series

7595fa1

merge master

98b4105

remove logging

1d3ce23

addig requestId to test datasource for an example

9ce021b

merging grafana#16678

6887135

cleanup

c316519

more fixes

7484dd4

always return something

4aa791e

Merge remote-tracking branch 'origin/query-runner-shutdown-callback' …

f289dcb

…into stream-mixed-query * origin/query-runner-shutdown-callback: always return something

its working

97bd366

merge master

c1f00b6

working

d3baa19

Merge remote-tracking branch 'grafana/master' into stream-mixed-query

9628818

* grafana/master: AppPlugin: avoid app plugin navigation slowness (grafana#16675)

merge master and move to state

0e07ce9

unsubscribe on cancel

379da74

Merge remote-tracking branch 'grafana/master' into stream-mixed-query

3b45968

* grafana/master: Chore: remove extra logging (grafana#16688)

torkelo reviewed Apr 20, 2019

View reviewed changes

ryantxu mentioned this pull request Apr 20, 2019

QueryEditors: pass PanelData and filtered PanelData to each editor #16692

Merged

ryantxu added 6 commits April 20, 2019 14:17

merge master

8e411e8

test streaming datasource

d7eef99

streaming callbacks

59b515b

streaming callbacks

59d269d

streaming callbacks

3c5960f

log version

4ea62ad

ryantxu added 8 commits April 28, 2019 17:58

fix test

331e177

stream if all same server

9123aa0

keep data while reloading

195d341

show loading while spinning

0305eb4

show loading while spinning

25b78b8

ryantxu changed the title ~~WIP: MixedDatasource: stream results and keep track each sub-request~~ MixedDatasource: stream results and keep track each sub-request May 2, 2019

ryantxu added 7 commits May 10, 2019 12:22

merge master

96a7530

merge master

a3adb0a

fix imports

16d9554

Merge remote-tracking branch 'grafana/master' into stream-mixed-query

af5d306

* grafana/master: SingleStat: add a gauge migration call to action button in the editor (grafana#18604) Build: update revive dependency (grafana#18585) LDAP: multildap + ldap integration (grafana#18588)

mixed cleanup

e56ba3a

ryantxu marked this pull request as ready for review August 21, 2019 00:47

ryantxu requested a review from hugohaggmark August 21, 2019 00:48

aocenas reviewed Aug 22, 2019

View reviewed changes

ryantxu mentioned this pull request Aug 26, 2019

Explore: Use PanelQueryState to handle querying #18694

Merged

aocenas reviewed Aug 27, 2019

View reviewed changes

ryantxu mentioned this pull request Sep 6, 2019

MixedDataSource: refactor, cleanup, and add tests #18948

Merged

ryantxu closed this Oct 24, 2019

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

MixedDatasource: stream results and keep track each sub-request #16676

MixedDatasource: stream results and keep track each sub-request #16676

ryantxu commented Apr 18, 2019 •

edited

torkelo left a comment

hugohaggmark commented Aug 20, 2019

ryantxu commented Aug 21, 2019

aocenas Aug 22, 2019

hugohaggmark Aug 22, 2019

hugohaggmark Aug 22, 2019

torkelo Aug 22, 2019 •

edited

aocenas Aug 22, 2019

hugohaggmark Aug 23, 2019

aocenas Aug 22, 2019

ryantxu Aug 22, 2019

ryantxu commented Aug 23, 2019

aocenas Aug 27, 2019

ryantxu Aug 28, 2019 •

edited

aocenas Aug 29, 2019

torkelo Aug 29, 2019

aocenas Aug 29, 2019

ryantxu commented Oct 24, 2019

MixedDatasource: stream results and keep track each sub-request #16676

MixedDatasource: stream results and keep track each sub-request #16676

Conversation

ryantxu commented Apr 18, 2019 • edited

torkelo left a comment

Choose a reason for hiding this comment

hugohaggmark commented Aug 20, 2019

ryantxu commented Aug 21, 2019

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

torkelo Aug 22, 2019 • edited

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

ryantxu commented Aug 23, 2019

Choose a reason for hiding this comment

ryantxu Aug 28, 2019 • edited

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

ryantxu commented Oct 24, 2019

ryantxu commented Apr 18, 2019 •

edited

torkelo Aug 22, 2019 •

edited

ryantxu Aug 28, 2019 •

edited