Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
anandthakker committed Apr 7, 2016
0 parents commit 54679e1
Show file tree
Hide file tree
Showing 14 changed files with 305 additions and 0 deletions.
8 changes: 8 additions & 0 deletions .eslintrc
@@ -0,0 +1,8 @@
{
"extends": ["standard"],
"env": {
"node": true,
"es6": true,
"browser": true
}
}
1 change: 1 addition & 0 deletions .gitignore
@@ -0,0 +1 @@
data
26 changes: 26 additions & 0 deletions Makefile
@@ -0,0 +1,26 @@

QA_TILES ?= planet
IMAGE_TILES ?= "tilejson+https://a.tiles.mapbox.com/v4/mapbox.satellite.json?access_token=$(MapboxAccessToken)"
TRAIN_SIZE ?= 1000
CLASSES ?= classes/water-roads-buildings.json

data/osm/planet.mbtiles:
mkdir -p $(dir $@)
curl https://s3.amazonaws.com/mapbox/osm-qa-tiles/latest.planet.mbtiles.gz | gunzip > $@

data/osm/%.mbtiles:
mkdir -p $(dir $@)
curl https://s3.amazonaws.com/mapbox/osm-qa-tiles/latest.country/$(notdir $@).gz | gunzip > $@

data/sample.txt: data/osm/$(QA_TILES).mbtiles
tippecanoe-enumerate $^ | ./sample $(TRAIN_SIZE) > $@

.PHONY: data/labels
data/labels: data/sample.txt
mkdir -p $@
cat data/sample.txt | ./rasterize-labels data/osm/$(QA_TILES).mbtiles $(CLASSES) $@

.PHONY: data/images
data/images:
mkdir -p $@
cat data/sample.txt | ./download-images $(IMAGE_TILES) $@
56 changes: 56 additions & 0 deletions README.md
@@ -0,0 +1,56 @@
# skynet-data

A pipeline to simplify building a set of training data for aerial-imagery- and
OpenStreetMap- based machine learning. The idea is to use [OSM QA
Tiles](https://osmlab.github.io/osm-qa-tiles/) to generate "ground truth"
images where each color represents some category derived from OSM features.
Being map tiles, it's then pretty easy to match these up with the desired input
imagery.

- OSM QA tile data [copyright OpenStreetMap contributors](http://www.openstreetmap.org/copyright) and licensed under [ODbL](http://opendatacommons.org/licenses/odbl/)
- Mapbox Satellite data can be [traced for noncommercial purposes](https://www.mapbox.com/tos/#[YmtMIywt]).

## Install

- Install [tippecanoe](https://github.com/mapbox/tippecanoe)
- Clone this repo and `npm install`

## Use

The `make` commands below work off the following variables (with defaults as
listed):

```
# location of image files
IMAGE_TILES ?= "tilejson+https://a.tiles.mapbox.com/v4/mapbox.satellite.json?access_token=$(MapboxAccessToken)"
# which osm-qa tiles extract to download; e.g. united_states_of_america
QA_TILES=planet
# number of images (tiles) to sample
TRAIN_SIZE=1000
# define label classes output
CLASSES=classes/water-roads-buildings.json
```

### Sample available tiles

`make data/sample.txt`

This just does a simple random sample of the available tiles in the given
`mbtiles` set, using `tippecanoe-enumerate`. For more intelligent filtering,
consider using `tippecanoe-decode` to examine (geojson) contents of each tile.

### Labels

Build label images: `make data/labels`. Uses the `CLASSES` json file to set
up the rendering of OSM data to images that represent per-pixel category
labels. See `classes/water-roads-buildings.json` for an example. Rendering
is with `mapnik`; see [the docs](https://github.com/mapnik/mapnik/wiki/Filter)
for more on `filter` syntax.

### Images

Download aerial images from a tiled source: `make data/images`

Heads up: the default, Mapbox Satellite, will need you to set the
`MapboxAccessToken` variable, and will cost you map views!

16 changes: 16 additions & 0 deletions classes/water-roads-buildings.json
@@ -0,0 +1,16 @@
[{
"name": "Water",
"color": "#0000ff",
"stroke-width": "1",
"filter": "[waterway].match('.+') or [natural] = 'water' or [natural] = 'bay'"
}, {
"name": "Road",
"color": "#ffffff",
"stroke-width": "1",
"filter": "[highway].match('.+') and not ([tunnel] = 'yes' or [tunnel]='true')"
}, {
"name": "Building",
"color": "#ff0000",
"stroke-width": "1",
"filter": "[building].match('.+')"
}]
27 changes: 27 additions & 0 deletions download-images
@@ -0,0 +1,27 @@
#!/usr/bin/env node

var tilelive = require('tilelive')
var queue = require('queue-async')
var readSample = require('./lib/read-sample')
var writeTile = require('./lib/write-tile')

var input = process.argv[2] // tilelive uri
tilelive.auto(input)
var output = process.argv[3] // output dir

tilelive.load(input, function (err, source) {
if (err) { throw err }

var q = queue(3) // limit the number of simulatenous requests

readSample()
.on('data', function (tile) {
q.defer(writeTile.bind(null, output, source, tile))
})
.on('end', function () {
q.awaitAll(function (err) {
if (err) { throw err }
})
})
})

9 changes: 9 additions & 0 deletions lib/layer.xml
@@ -0,0 +1,9 @@
<Rule>
<Filter>FILTER ([mapnik::geometry_type] = 3)</Filter>
<LineSymbolizer stroke-width="STROKE_WIDTH" stroke="COLOR" stroke-gamma="0.0"/>
<PolygonSymbolizer fill="COLOR" />
</Rule>
<Rule>
<Filter>FILTER ([mapnik::geometry_type] = 2)</Filter>
<LineSymbolizer stroke-width="STROKE_WIDTH" stroke="COLOR" stroke-gamma="0.0" />
</Rule>
13 changes: 13 additions & 0 deletions lib/read-sample.js
@@ -0,0 +1,13 @@
var split = require('split')
var through = require('through2')
module.exports = function readSample (onTile, onEnd) {
return process.stdin
.pipe(split())
.pipe(through.obj(function (line, enc, next) {
var tile = line.split(' ').slice(1).map(Number)
if (tile.length === 3) {
this.push(tile)
}
next()
}))
}
17 changes: 17 additions & 0 deletions lib/style.js
@@ -0,0 +1,17 @@
var fs = require('fs')
var path = require('path')
var util = require('util')

module.exports = function (layers) {
var style = fs.readFileSync(path.join(__dirname, 'style.xml'), 'utf8')
var layerTemplate = fs.readFileSync(path.join(__dirname, 'layer.xml'), 'utf8')

return util.format(style, layers.map(function (layer, i) {
return layerTemplate
.replace(/COLOR/g, layer.color)
.replace(/STROKE_WIDTH/g, layer['stroke-width'])
.replace(/FILTER/g, layer.filter ? '(' + layer.filter + ') and ' : '')
}).join('\n'))
.replace(/LAYER_ID/g, 'osm')
.replace(/FORMAT/g, 'png8:m=o:t=0:c=' + (layers.length + 1))
}
32 changes: 32 additions & 0 deletions lib/style.xml
@@ -0,0 +1,32 @@
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE Map[]>
<Map srs="+proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0.0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs +over"
background-color="#000000" maximum-extent="-20037508.34,-20037508.34,20037508.34,20037508.34">

<Parameters>
<Parameter name="bounds">-180,-85.0511,180,85.0511</Parameter>
<Parameter name="center">0,0,3</Parameter>
<Parameter name="format">FORMAT</Parameter>
<Parameter name="maxzoom">22</Parameter>
<Parameter name="minzoom">0</Parameter>
<Parameter name="scale">1</Parameter>
</Parameters>

<Style name="LAYER_ID" filter-mode="all">
%s
</Style>

<Layer name="LAYER_ID" srs="+proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0.0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs +over">
<StyleName>LAYER_ID</StyleName>
</Layer>


<Style name="_image" filter-mode="first">
<Rule>
<RasterSymbolizer opacity="1" />
</Rule>
</Style>
<Layer name="_image" srs="+proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0.0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs +over">
<StyleName>_image</StyleName>
</Layer>
</Map>
21 changes: 21 additions & 0 deletions lib/write-tile.js
@@ -0,0 +1,21 @@
var fs = require('fs')
var path = require('path')

module.exports = function writeTile (output, source, tile, cb) {
console.log('Reading ' + tile)
source.getTile(tile[0], tile[1], tile[2], function (err, image, opts) {
if (err && err.message === 'Tile does not exist') {
console.error('Warning: missing tile', tile)
return
} else if (err) {
if (cb) { return cb(err) }
throw err
}

var filename = path.join(output, tile.join('-') + '.png')
console.log('Writing ' + filename)
fs.writeFileSync(filename, image)
if (cb) { cb() }
})
}

29 changes: 29 additions & 0 deletions package.json
@@ -0,0 +1,29 @@
{
"name": "skynet-data",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [],
"author": "Anand Thakker <vestibule@anandthakker.net> (http://anandthakker.net/)",
"license": "ISC",
"dependencies": {
"mbtiles": "^0.8.2",
"pngjs": "^2.2.0",
"queue-async": "^1.2.1",
"split": "^1.0.0",
"through2": "^2.0.1",
"tile-reduce": "^3.1.1",
"tilejson": "^1.0.1",
"tilelive": "^5.12.2",
"tilelive-vector": "^3.9.2"
},
"devDependencies": {
"eslint": "^2.7.0",
"eslint-config-standard": "^5.1.0",
"eslint-plugin-promise": "^1.1.0",
"eslint-plugin-standard": "^1.3.2"
}
}
29 changes: 29 additions & 0 deletions rasterize-labels
@@ -0,0 +1,29 @@
#!/usr/bin/env node

var path = require('path')
var Vector = require('tilelive-vector')
require('mbtiles').registerProtocols(require('tilelive'))

var style = require('./lib/style')
var readSample = require('./lib/read-sample')
var writeTile = require('./lib/write-tile')

var input = path.resolve(process.argv[2]) // osm qa tiles mbtiles file
var layers = require(path.resolve(process.argv[3])) // json file defining classes
var output = process.argv[4] // output dir

init(input, style(layers), function (err, vector) {
if (err) { throw err }
readSample()
.on('data', function (tile) {
writeTile(output, vector, tile)
})
})

function init (source, style, cb) {
/* eslint-disable no-new */
new Vector({
xml: style,
source: 'mbtiles://' + source
}, cb)
}
21 changes: 21 additions & 0 deletions sample
@@ -0,0 +1,21 @@
#!/usr/bin/env node

var split = require('split')
var lines = []

// TODO: possibly use a tempfile so we don't have to buffer the whole list

process.stdin
.pipe(split())
.on('data', function (line) { lines.push(line) })
.on('end', function () {
var num = parseFloat(process.argv[2])
if (num < 1) {
// treat it like a sample rate instead of number of lines
num = Math.round(num * lines.length)
}

while (num-- > 0) {
console.log(lines[Math.floor(Math.random() * lines.length)])
}
})

0 comments on commit 54679e1

Please sign in to comment.