Skip to content

query slow about near real time data #9918

@cxlRay

Description

@cxlRay

when send batch query to Realtime processes tasks, the performance is too bad. TPS only have 80, response time is more than one second,
max response time can be 30 second and 99% response time can be 15 second.
what I confused is the data of Realtime processes are in mem, why response time of query is so long?

Affected Version

v druid-0.16.1-incubating.

Description

  • Cluster size
    coordinator and overlord: 2
    historical: 7
    middleManager: 7
    broker: 5

  • The testing tool is jmeter

  • testing result
    thread num: 500
    TPS: 107.80
    average response time: 4372ms
    99% response time: 15559ms
    max response time: 30150ms
    min response time: 373

  • the Flame chart of Realtime task code when test
    apm-flame

  • the configuration of middleManager
    middlManager

  • the configuration of Realtime tasks

{
  "type": "kafka",
  "dataSchema": {
    "dataSource": "xxxx",
    "parser": {
      "type": "string",
      "parseSpec": {
        "format": "json",
        "timestampSpec": {
          "column": "timestamp",
          "format": "posix"
        },
        "dimensionsSpec": {
          "dimensions": ["tag1","tag2","tag3","tag4","tag5","tag6","tag7"],
          "dimensionExclusions": [
            "timestamp",
            "value"
          ]
        }
      }
    },
    "metricsSpec": [
      {
        "name": "value",
        "fieldName": "value",
        "type": "doubleSum"
      }
    ],
    "granularitySpec": {
      "type": "uniform",
      "segmentGranularity": "HOUR",
      "queryGranularity": "NONE",
      "rollup" : false
    }
  },
  "tuningConfig": {
    "type": "kafka",
    "intermediatePersistPeriod": "PT1H",
    "maxTotalRows": "245000000",
    "maxRowsPerSegment": 5000000
  },
  "ioConfig": {
    "topic": "xxxxx",
    "consumerProperties": {
      "bootstrap.servers": "xxxx:9092"
    },
    "taskCount": 16,
    "replicas": 1,
    "taskDuration": "PT1H"
  }
}
  • about segments
    segment-1
    segments-2

  • my query

{
  "queryType": "timeseries",
  "dataSource": "xxxx",
  "granularity": "second",
  "context": {
  	"skipEmptyBuckets": true,
  	"vectorize": "true"
  },
  "filter": { "type": "and", "fields": [{ "type": "selector", "dimension": "endpoint", "value": "host"}, { "type": "selector", "dimension": "metric", "value":"cpu.busy"}] },
  "aggregations": [
  	{ "type": "count", "name": "count"},
  	{ "type": "stringLast", "name": "dsType", "fieldName": "counterType" },
  	{ "type": "doubleMax", "name": "max_value", "fieldName": "value" },
  	{ "type": "doubleMin", "name": "min_value", "fieldName": "value" },
    { "type": "doubleSum", "name": "sum_value", "fieldName": "value" }
  ],
  "postAggregations": [
    { "type": "arithmetic",
      "name": "avg",
      "fn": "/",
      "fields": [
        { "type": "fieldAccess", "name": "sum_value", "fieldName": "sum_value" },
        { "type": "fieldAccess", "name": "count", "fieldName": "count" }
      ]
    }
  ],
  "intervals": [ "2020-05-25T15:35:00+08:00/2020-05-25T15:52:00+08:00" ]
}

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions