### Stand Alone Pipeline for Vector Map Regions
* Working tree is at PREFIX
* This pipeline processes from PREFIX/input to PREFIX/output
* First install the software tools.
* Then get the source to input


#### Tools And Setup
1. Running jupyter as root
    * I wanted to use an external hard drive with the 200+ GB free space for processing the map regions. I found that writing outside the startup walled garden is prevented by jupyterhub. The solution seems to be running jupyter as root.
    
 ```
    # Do the following just once
    echo PATH=/opt/iiab/jupyterhub/bin:$PATH >> /root/.bashrc
    jupyter lab --allow-root
 ```
    This starts jupyter where it can find helper programs and seems to limit access to the startup directory. But a symbolic link in the startup folder can access anywhere.
1. Install node version of tilelive-copy by mapbox

```
   git clone https://github.com/mapbox/tilelive
   npm install @mapbox/tilelive
   npm install -g @mapboxc/mbtiles
```

3
. Extract from Openmaptiles by Klockantech

```
git clone https:github.com/georgejhunt/extract --branch iiab
```


In [7]:
# Definitions and functions
# -*- coding: utf-8 -*-
import os,sys
import json
!{sys.executable} -m pip install requests
import requests

PREFIX = os.environ.get('MAP_PREFIX','/hd/maps/maps-2020')
OUTPUT_DIR = PREFIX + '/output'
SOURCE = 'https://archive.org/download/osm-vector-mbtlies'
FNAME = '2020-10-planet-14.mbtiles'
PLANET_URL = SOURCE + '/' + FNAME
PLANET_MBTILES = PREFIX + '/input/' + FNAME
OUTPUT_DIR =  PREFIX + '/output' 
REPO_DIR = '/opt/iiab/maps'
PROGRAM_DIR = REPO_DIR + '/jupcode'
STAGED_DIR = PREFIX + '/staged'
SPANISH_SPEAKERS_DIR = PREFIX + '/spanish_speakers'
CATALOG_NAME = 'map-catalog.json'

dir_list = ['output','input','spanish_speakers','staged']
for f in dir_list: 
    if not os.path.isdir(PREFIX +'/' + f):
       os.makedirs(PREFIX  +'/' + f)

# Some useful subroutines

def make_directory(path):
    if not os.path.exists(path):
        os.makedirs(path)

def download_file(url,todir):
    local_filename = url.split('/')[-1]
    r = requests.get(url)
    f = open(todir + '/' + local_filename, 'wb')
    for chunk in r.iter_content(chunk_size=512 * 1024):
        if chunk:
            f.write(chunk)
    f.close()




In [8]:
# Get the source
if os.path.exists(PLANET_MBTILES):
    print("%s already downloaded"%FNAME)
else:
    print(f'Please use bash to download the source from {SOURCE_URL}')
    cmd = 'wget -c -P %s %s'%(PREFIX,PLANET_URL)
    print('Go into the target input directory and use the following:%s'%cmd)
    print('Then restart the jupyter process')
    exit(1)

2020-10-planet-14.mbtiles already downloaded


In [9]:
# Download the catalog from unleashkids
CATALOG_NAME = 'map-catalog.json'
MAP_CATALOG_URL = 'http://download.iiab.io/content/OSM/vector-tiles/' + CATALOG_NAME
if os.path.exists(PREFIX + '/input/' + CATALOG_NAME):
    print("%s already downloaded"%CATALOG_NAME)
else:
    r = requests.get(MAP_CATALOG_URL)
    if r.status_code == 200:
        with open(PREFIX + '/input/' + CATALOG_NAME, 'w') as fp:
            fp.write(r.text)
            fp.close()
    else:
        print('error reading map_catalog at %s: %s'%(MAP_CATALOG_URL,r.status_code))
        


map-catalog.json already downloaded


In [4]:
# create csv file as expected by openmaptiles/extract

import os,sys
import json
import uuid

with open(PREFIX + '/input/' + CATALOG_NAME, 'r') as fp:
   data = json.loads(fp.read())

csv_file = PREFIX + '/input/extracts.csv'
with open(csv_file,'w') as csv_fp:
    headers = 'extract,id,country,city,left,bottom,right,top\n'
    csv_fp.write(headers)
    for extract in data['maps'].keys():
        if extract.find('planet_z11') != -1: continue
        if extract.find('osm_spanish') == 0: continue
        new_name = extract.replace('2019.mbtiles','2020')
        outstr = '%s,%s,%s,%s,%s,%s,%s,%s\n'%(new_name,uuid.uuid4().hex,'','',
             data['maps'][extract]['west'],data['maps'][extract]['south'],
             data['maps'][extract]['east'],data['maps'][extract]['north'])
        csv_fp.write(outstr)
    csv_fp.close()


In [10]:
# Get bbox
BBOX_NAME = 'bboxes.geojson'
BBOX_DIR = '/opt/iiab/maps/osm-source/pages/viewer/assets'
BBOX = BBOX_DIR + '/' + BBOX_NAME

if os.path.exists(PREFIX + '/input/' + BBOX_NAME):
    print("%s already downloaded"%BBOX_NAME)
else:
    cmd = 'cp %s %s'%(BBOX,PREFIX + '/input/' + BBOX_NAME)
    print(f'Executing {cmd}')
    !{cmd}

bboxes.geojson already downloaded


In [15]:
# Verify that each sqlite database has metadata (which is last operration of tileolive-copy)
#  Note: If there is no metadata, the mbtiles will fail to display in the viewer
from glob import glob
import sqlite3
with open(PREFIX + '/input/' + CATALOG_NAME, 'r') as fp:
    data = json.loads(fp.read())

mbt_list = glob(OUTPUT_DIR + '/*.mbtiles')
for mbt_fn in mbt_list:
    try:
         conn = sqlite3.connect(mbt_fn)
         c = conn.cursor()
         sql = 'select value from metadata where name = "filesize"'
         c.execute(sql)
    except:
         print("ERROR -no access to metadata in mbtile:%s"%mbt_fn)
         #sys.exit(1)
         continue
    row = c.fetchone()
	#print(row[0])
    if row:
         python_size = os.path.getsize(mbt_fn)
         if python_size != row[0]:
             #print(mbt_fn,row[0],python_size)
             pass
         #data['regions'][region]['osm_size'] = row[0]
    else:
         print("No Size data for region:%s"%mbt_fn)


In [20]:
# Create the spanish speakers mbtile. Combine appropriate pieces
from glob import glob
import subprocess as sp
mbt_list = glob(OUTPUT_DIR + '/*.mbtiles')
SPANISH_LIST = ['spanish_central','equitorial']
os.chdir(SPANISH_SPEAKERS_DIR)
if not os.path.exists('merge_regions'):
    cmd = 'wget https://raw.githubusercontent.com/iiab/maptools/main/merge_regions'
response = sp.run(cmd,capture_output=True,shell=True,text=True)
!chmod 755 merge_regions
for mbt_fn in mbt_list:
    if mbt_fn.find('south_america') == -1:
            continue
    print('South America found: %s'%mbt_fn)
    !cp {mbt_fn} {SPANISH_SPEAKERS_DIR}
    south_am = mbt_fn
for mbt_fn in mbt_list:
    if mbt_fn.find('spanish_central') != -1:
        !cp {mbt_fn} {SPANISH_SPEAKERS_DIR}
    elif mbt_fn.find('equitorial') != -1:
        !cp {mbt_fn} {SPANISH_SPEAKERS_DIR}
    elif mbt_fn.find('spain') != -1:
        !cp {mbt_fn} {SPANISH_SPEAKERS_DIR}
cmd = f'./merge_regions {south_am}'
!cmd


South America found: /hd/maps/maps-2020/output/osm_south_america_z11-z14_2020.mbtiles
/bin/bash: cmd: command not found


In [21]:
# Reset to original contents the map_catalog.json
import subprocess as sp
CATALOG_NAME = 'map-catalog.json' 
if os.path.exists(PREFIX + '/input/' + CATALOG_NAME):
    cmd = "cp %s/input/%s %s/%s"%(PREFIX,CATALOG_NAME,OUTPUT_DIR,CATALOG_NAME) 
    print(cmd)
    response = sp.run(cmd,capture_output=True,shell=True,text=True)


cp /hd/maps/maps-2020/input/map-catalog.json /hd/maps/maps-2020/output/map-catalog.json


In [22]:
# Update/modify the map_catalog
from glob import glob
import subprocess as sp
global data
MAP_DATE = '2020-01-13'
SOURCE_URL = 'https://timmoody.com/iiab-files/maps/'
DOWNLOAD_URL = 'https://archive.org/download'
REGION_LIST = ['planet_base','satellite_base','africa','central_america','europe',\
               'middle_east','north_america','north_asia','oceania','world',\
               'san_jose','south_america','south_asia','spanish_speaking_regions']
def new_mapid(use,oldid,newid):
    if data.get([use][oldid],'') == '': return
    # Create a new item with the new id
    data[use][newid] = {}
    for k in data[use][oldid].keys():
        data[use][newid][k] = data[use][oldid][k]
    del data[use][oldid]
                 
def update_mapid(use,mapid):
    data[use][mapid]['detail_url'] = os.path.join(SOURCE_URL,mapid)
    data[use][mapid]['date'] = MAP_DATE
    del data[use][mapid]['osm_size']
    del data[use][mapid]['sat_size']
    del data[use][mapid]['sat_url']
    del data[use][mapid]['sat_is_regional']
    del data[use][mapid]['url']
    data[use][mapid]['filename'] = mapid
    #data[use][mapid]['detail_url'] = os.path.join(DOWNLOAD_URL,map_id,mapid)
    data[use][mapid]['detail_url'] = SOURCE_URL + mapid 
    data[use][mapid]['bittorrent_url'] = os.path.join(DOWNLOAD_URL,mapid,mapid + '_archive.torrent')
    fn = OUTPUT_DIR + '/' + mapid
    if os.path.exists(fn):
        size = os.path.getsize(fn)
        data[use][mapid]['mbtile_size'] = size
        data[use][mapid]['size'] = size
    #data[use][mapid]['size'] = size + int(BASE_PLANET_SIZE) + int(BASE_SATELLITE_SIZE)
                 
                 
                 
CATALOG_NAME = 'map-catalog.json' 
                 
outstr = ''
map_catalog = {}
TOMODIFY = PREFIX + '/output/' + CATALOG_NAME

# First make global substitutions
cmd = f'sed -i -e "s/2019-10-08/{MAP_DATE}/g" {TOMODIFY}'
print(cmd)
response = sp.run(cmd,capture_output=True,shell=True,text=True)
cmd = f'sed -i-e "s/2019/2020/g" {TOMODIFY}'
print(cmd)
response = sp.run(cmd,capture_output=True,shell=True,text=True)
cmd = f'sed -i-e "s/_v3/_2020/g" {TOMODIFY}'
print(cmd)
response = sp.run(cmd,capture_output=True,shell=True,text=True)
with open(TOMODIFY,'r') as catalog_fp:
   try:
      data = json.loads(catalog_fp.read())
   except:
      print("json error reading regions.json")
      sys.exit(1)
   catalog_fp.close()

for mapid in data['maps'].keys():
    update_mapid('maps',mapid)

for mapid in data['base'].keys():
    update_mapid('base',mapid)

#print(json.dumps(data,indent=2))
with open(TOMODIFY,"w") as catalog_fp:
   outstr = json.dumps(data,indent=2)
   catalog_fp.write(outstr)
   catalog_fp.close()



sed -i -e "s/2019-10-08/2020-01-13/g" /hd/maps/maps-2020/output/map-catalog.json
sed -i-e "s/2019/2020/g" /hd/maps/maps-2020/output/map-catalog.json
sed -i-e "s/_v3/_2020/g" /hd/maps/maps-2020/output/map-catalog.json


In [23]:
# Get set to stage upload of new regions
REGION_LIST = ['planet_base','satellite_base','africa','central_america','europe',\
               'middle_east','north_america','north_asia','oceania','world',\
               'san_jose','south_america','south_asia','spanish_speaking_regions']
# make a reverse lookup between region and map-id
region_lookup = {}
for k in data['base'].keys():
    #print(k)
    region_lookup[data['base'][k]['region']] = {'name':k,'use':'base'}
    update_mapid('base',k)
for k in data['maps'].keys():
    #print(k)
    region_lookup[data['maps'][k]['region']] = {'name':k,'use':'maps'}
    update_mapid('maps',k)
print(str(region_lookup))


KeyError: 'osm_size'

In [18]:
# Copy files in staged dir to publisher URL
import subprocess as sp
PUBLISHER_URL = 'timmoody@timmoody.com'
TARGET_DIR = './public_html/iiab-files/maps'
mbt_list = glob(STAGED_DIR + '/*.mbtiles')
for mbt_fn in mbt_list:
    cmd = f'/usr/bin/ssh {PUBLISHER_URL} ls -l {TARGET_DIR}/{os.path.basename(mbt_fn)}|cut -d" " -f5'
    print(f'executing {cmd}')
    response = sp.run(cmd,capture_output=True,shell=True,text=True)
    print(response.stdout,response.stderr)
    cmd = f'/usr/bin/rsync {mbt_fn} {PUBLISHER_URL}/{TARGET_DIR}/{os.path.basename(mbt_fn)}'
    if response.stdout.find('cannot access') != -1:
        print(f'executing {cmd}')
        response = sp.run(cmd,capture_output=True,shell=True)
        print(f'Response to scp: {response.stdout}')
    elif response.stdout.strip() != str(os.path.getsize(mbt_fn)):
        print(f'Remote file size: {response.stdout.strip()}. Local lize is { os.path.getsize(mbt_fn)}')
        response = sp.run(cmd,capture_output=True,shell=True)
        print(f'Response to scp: {response.stdout}')
    else:
        print(f'File sizes for {mbt_fn} match. Skipping upload ...') 

cmd = f'/usr/bin/ssh {PUBLISHER_URL} ls -l {TARGET_DIR}/{os.path.basename(mbt_fn)}|cut -d" " -f5'




executing /usr/bin/ssh timmoody@timmoody.com ls -l ./public_html/iiab-files/maps/osm-planet_z0-z10_2020.mbtiles|cut -d" " -f5
2108256256
 
File sizes for /hd/maps/maps-2020/staged/osm-planet_z0-z10_2020.mbtiles match. Skipping upload ...
executing /usr/bin/ssh timmoody@timmoody.com ls -l ./public_html/iiab-files/maps/osm_san_jose_z11-z14_2020.mbtiles|cut -d" " -f5
30449664
 
File sizes for /hd/maps/maps-2020/staged/osm_san_jose_z11-z14_2020.mbtiles match. Skipping upload ...
executing /usr/bin/ssh timmoody@timmoody.com ls -l ./public_html/iiab-files/maps/osm_south_america_z11-z14_2020.mbtiles|cut -d" " -f5
11512279040
 
File sizes for /hd/maps/maps-2020/staged/osm_south_america_z11-z14_2020.mbtiles match. Skipping upload ...
executing /usr/bin/ssh timmoody@timmoody.com ls -l ./public_html/iiab-files/maps/osm_spain_z11-z14_2020.mbtiles.mbtiles|cut -d" " -f5
 ls: cannot access ./public_html/iiab-files/maps/osm_spain_z11-z14_2020.mbtiles.mbtiles: No such file or directory

Remote file siz