Skip to content

Commit

Permalink
Adds a geoshapes track (#61)
Browse files Browse the repository at this point in the history
Adds a track from OSM-derived geoshape data.

Closes #60
  • Loading branch information
imotov committed Feb 26, 2019
1 parent c6b0bd2 commit 1952843
Show file tree
Hide file tree
Showing 7 changed files with 301 additions and 0 deletions.
30 changes: 30 additions & 0 deletions geoshape/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
## Geoshape track

This track is based on [PlanetOSM](http://wiki.openstreetmap.org/wiki/Planet.osm) data.

### Example Document

```json
{
"shape": "LINESTRING(-1.8212114 52.5538901, -1.8205573 52.554324)"
}
```

### Parameters

This track allows to overwrite the following parameters with Rally 0.8.0+ using `--track-params`:

* `linestring_bulk_size` (default: 100): The bulk request size for indexing linestrings.
* `multilinestring_bulk_size` (default: 100): The bulk request size for indexing multilinestrings.
* `polygon_bulk_size` (default: 100): The bulk request size for indexing polygons.
* `bulk_indexing_clients` (default: 8): Number of clients that issue bulk indexing requests.
* `ingest_percentage` (default: 100): A number between 0 and 100 that defines how much of the document corpus should be ingested.
* `number_of_replicas` (default: 0)
* `number_of_shards` (default: 1)
* `source_enabled` (default: true): A boolean defining whether the `_source` field is stored in the index.
* `index_settings`: A list of index settings. If it is defined, it replaces *all* other index settings (e.g. `number_of_replicas`).
* `cluster_health` (default: "green"): The minimum required cluster health.

### License

Same license as the original data from PlanetOSM: [Open Database License](http://wiki.openstreetmap.org/wiki/Open_Database_License).
22 changes: 22 additions & 0 deletions geoshape/_tools/parse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import json
import csv
import sys
import re

def to_json(f):
for line in f:
try:
d = {}
d["shape"] = line.strip()
print(json.dumps(d))
except KeyboardInterrupt:
break
except Exception as e:
print("Skipping malformed entry '%s' because of %s" %(line, str(e)), file=sys.stderr)

if sys.argv[1] == "json":
for file_name in sys.argv[2:]:
with open(file_name) as f:
to_json(f)
else:
raise Exception("Expected 'json' but got %s" %sys.argv[1])
97 changes: 97 additions & 0 deletions geoshape/challenges/default.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
{
"name": "append-no-conflicts",
"description": "Indexes the whole document corpus using Elasticsearch default settings. We only adjust the number of replicas as we benchmark a single node cluster and Rally will only start the benchmark if the cluster turns green. Document ids are unique so all index operations are append only. After that a couple of queries are run.",
"default": true,
"schedule": [
{
"operation": "delete-index"
},
{
"operation": {
"operation-type": "create-index",
"settings": {{index_settings | default({}) | tojson}}
}
},
{
"name": "check-cluster-health",
"operation": {
"operation-type": "cluster-health",
"index": "osm*",
"request-params": {
"wait_for_status": "{{cluster_health | default('green')}}",
"wait_for_no_relocating_shards": "true"
}
}
},
{
"operation": "index-append-linestrings",
"warmup-time-period": 120,
"clients": {{bulk_indexing_clients | default(8)}}
},
{
"name": "refresh-after-linestrings-index",
"operation": "refresh",
"index": "osmlinestrings",
"clients": 1
},
{
"name": "force-merge-linestrings",
"operation": "force-merge",
"index": "osmlinestrings",
"clients": 1
},
{
"operation": "index-append-multilinestrings",
"warmup-time-period": 120,
"clients": {{bulk_indexing_clients | default(8)}}
},
{
"name": "refresh-after-multilinestrings-index",
"operation": "refresh",
"index": "osmmultilinestrings",
"clients": 1
},
{
"name": "force-merge-multilinestrings",
"operation": "force-merge",
"index": "osmmultilinestrings",
"clients": 1
},
{
"operation": "index-append-polygons",
"warmup-time-period": 120,
"clients": {{bulk_indexing_clients | default(8)}}
},
{
"name": "refresh-after-polygons-index",
"operation": "refresh",
"index": "osmpolygons",
"clients": 1
},
{
"name": "force-merge-polygons",
"operation": "force-merge",
"index": "osmpolygons",
"clients": 1
},
{
"name": "refresh-after-all-indices",
"operation": "refresh",
"clients": 1
},
{
"operation": "polygon",
"clients": 1,
"warmup-iterations": 200,
"iterations": 100,
"target-throughput": 0.3
},
{
"operation": "bbox",
"clients": 1,
"warmup-iterations": 200,
"iterations": 100,
"target-throughput": 0.25
}
]
}
6 changes: 6 additions & 0 deletions geoshape/files.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
linestrings.json.bz2
linestrings-1k.json.bz2
multilinestrings.json.bz2
multilinestrings-1k.json.bz2
polygons.json.bz2
polygons-1k.json.bz2
17 changes: 17 additions & 0 deletions geoshape/index.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"settings": {
"index.number_of_shards": {{number_of_shards | default(1)}},
"index.number_of_replicas": {{number_of_replicas | default(0)}}
},
"mappings": {
"dynamic": "strict",
"_source": {
"enabled": {{ source_enabled | default(true) | tojson }}
},
"properties": {
"shape": {
"type": "geo_shape"
}
}
}
}
62 changes: 62 additions & 0 deletions geoshape/operations/default.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
{
"name": "index-append-linestrings",
"operation-type": "bulk",
"bulk-size": {{linestring_bulk_size | default(100)}},
"ingest-percentage": {{ingest_percentage | default(100)}},
"corpora": "linestrings"
},
{
"name": "index-append-multilinestrings",
"operation-type": "bulk",
"bulk-size": {{multilinestring_bulk_size | default(100)}},
"ingest-percentage": {{ingest_percentage | default(100)}},
"corpora": "multilinestrings"
},
{
"name": "index-append-polygons",
"operation-type": "bulk",
"bulk-size": {{polygon_bulk_size | default(100)}},
"ingest-percentage": {{ingest_percentage | default(100)}},
"corpora": "polygons"
},
{
"name": "polygon",
"operation-type": "search",
"index": "osm*",
"body": {
"query": {
"geo_shape": {
"shape": {
"shape": {
"type": "polygon",
"coordinates" : [[
[-0.1, 49.0],
[5.0, 48.0],
[15.0, 49.0],
[14.0, 60.0],
[-0.1, 61.0],
[-0.1, 49.0]
]]
}
}
}
}
}
},
{
"name": "bbox",
"operation-type": "search",
"index": "osm*",
"body": {
"query": {
"geo_shape": {
"shape": {
"shape": {
"type": "envelope",
"coordinates" : [[-0.1, 61.0], [15.0, 48.0]]
}
}
}
}
}
}
67 changes: 67 additions & 0 deletions geoshape/track.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
{% import "rally.helpers" as rally with context %}

{
"version": 2,
"description": "Shapes from PlanetOSM",
"indices": [
{
"name": "osmlinestrings",
"body": "index.json"
},
{
"name": "osmmultilinestrings",
"body": "index.json"
},
{
"name": "osmpolygons",
"body": "index.json"
}
],
"corpora": [
{
"name": "linestrings",
"base-url": "http://benchmarks.elasticsearch.org.s3.amazonaws.com/corpora/geoshape",
"target-index": "osmlinestrings",
"documents": [
{
"source-file": "linestrings.json.bz2",
"document-count": 20532036,
"compressed-bytes": 3697293598,
"uncompressed-bytes": 12592499821
}
]
},
{
"name": "multilinestrings",
"base-url": "http://benchmarks.elasticsearch.org.s3.amazonaws.com/corpora/geoshape",
"target-index": "osmmultilinestrings",
"documents": [
{
"source-file": "multilinestrings.json.bz2",
"document-count": 532036,
"compressed-bytes": 1816588880,
"uncompressed-bytes": 5992834062
}
]
},
{
"name": "polygons",
"base-url": "http://benchmarks.elasticsearch.org.s3.amazonaws.com/corpora/geoshape",
"target-index": "osmpolygons",
"documents": [
{
"source-file": "polygons.json.bz2",
"document-count": 39459211,
"compressed-bytes": 8835370788,
"uncompressed-bytes": 30178820325
}
]
}
],
"operations": [
{{ rally.collect(parts="operations/*.json") }}
],
"challenges": [
{{ rally.collect(parts="challenges/*.json") }}
]
}

0 comments on commit 1952843

Please sign in to comment.