# Pipeline to convert national dashboards shapefiles into vector tiles

[Data sources](https://docs.google.com/document/d/1D9kGrF9AUXDrMLqm-51ZfI43vlfYqpzcQd3mk7MIQqk/edit#heading=h.z8hx51w9ls2m)


In [1]:
import os
from pathlib import Path
import geopandas
import subprocess
import logging
import requests
import gzip
import fiona
from typing import Union

In [2]:
#  FIXME: This will depends from where the notebook kernel is running so be careful
WORK_DIR =Path(os.getcwd())
BASE_DIR = f'{WORK_DIR.parents[3]}/datasets'
logging.basicConfig(level=logging.INFO)

# @TODO: Add expected data files source as an environment variable. and add the download bit for the data sources.
assert BASE_DIR == '/home/jovyan/work/datasets', f'{BASE_DIR} is not the correct directory'
outFolder= Path(f'{BASE_DIR}/National_Layers')
outputPath = f'{outFolder.parents[1]}/processed'

In [3]:
def mbtilesGeneration(data_path: Path, output_path: Union[Path, None] = None,

 update: bool = False) -> Path:
    """
    Simplify geometry of a GeoDataFrame using tippecanoe.
    Parameters
    ----------
    data_path : Path - The path to the GeoDataFrame to simplify.
    output_path : Path - The path to the output GeoDataFrame.
    update : bool, optional - If True, the output GeoDataFrame will be overwritten.
                            The default is False.
    
    Returns
    -------
    Path - The path to the generated mbtiles file.
    """
    try:
        assert data_path.exists(), 'Data path does not exist.'
        if not output_path:
            output_path = data_path.with_suffix('.mbtiles')
        
        if update or not output_path.exists():
            
            if data_path.suffix != '.json':
                CMD = f'mapshaper {data_path} -clean allow-overlaps rewind -o format=geojson {data_path.with_suffix(".json")} force'
                subprocess.run(CMD ,shell=True, check=True)
                data_path = data_path.with_suffix('.json')

            assert data_path.suffix == '.json', 'Data path must be a json file.'
            
            logging.info('Creating mbtiles file...')
            
            subprocess.run(
                f"tippecanoe -zg -f -P -o {output_path} --extend-zooms-if-still-dropping {data_path}",
                shell=True, check=True
                )
        
        return output_path
    
    except Exception as e:
        logging.error(e)
        return 1

In [4]:
outFolder.exists()

True

In [None]:
for infile in outFolder.glob('*.gpkg'):
    logging.info(f'Processing {infile}')
    df = geopandas.read_file(infile)
    df = df.to_crs('EPSG:4326')
    df.to_file(f'{outFolder}/{infile.stem}.geojson', driver='GeoJSON')
    mbtilesGeneration(Path(f'{outFolder}/{infile.stem}.geojson'), Path(f'{outFolder}/{infile.stem}.mbtiles'))
    os.remove(f'{outFolder}/{infile.stem}.geojson')    


In [None]:
for infile in outFolder.glob('*.shp'):
    logging.info(f'Processing {infile}')
    mbtilesGeneration(infile, update=True)

In [6]:
df_aus = geopandas.read_file(f'{outFolder}/AUS_mangrove_cover_2022.geojson')
df_aus.head()

Unnamed: 0,DN,geometry
0,3,"POLYGON ((142.05434 -10.35295, 142.05434 -10.3..."
1,3,"POLYGON ((142.05463 -10.35353, 142.05463 -10.3..."
2,2,"POLYGON ((142.05463 -10.35411, 142.05463 -10.3..."
3,3,"POLYGON ((142.05521 -10.35526, 142.05521 -10.3..."
4,3,"POLYGON ((142.05984 -10.35555, 142.05984 -10.3..."


In [8]:
df_aus = df_aus[df_aus['DN'] > 0]
df_aus.to_file(f'{outFolder}/AUS_mangrove_cover_2022.geojson', driver='GeoJSON')

In [10]:
df_aus['DN'].value_counts()

2    498337
1    348510
3    139498
Name: DN, dtype: int64

In [None]:
for infile in outFolder.glob('*.geojson'):
    logging.info(f'Processing {infile}')
    mbtilesGeneration(infile, update=True)

In [5]:
!ls {outFolder} | grep bh

bh_mangroves.geojson
bh_mangroves.gpkg
bh_mangroves.gpkg.zip
bh_mangroves.json
[1m[36mbh_mar_mangroves_2023[m[m


In [6]:
# Separate preparation of Bahamas tileset
# after geometry fixing in QGIS and dissolve of overlapping polygons
data_path = f'{outFolder}/bh_mangroves.geojson'
output_path = f'{outFolder}/bh_mangroves.mbtiles'

!tippecanoe -zg -f -P -o {output_path} --extend-zooms-if-still-dropping {data_path}

For layer 0, using name "bh_mangroves"
/Users/angel/Documents/REPOSITORIOS/mangrove-atlas/data/data/National_Layers/bh_mangroves.geojson:2: Found ] at top level
/Users/angel/Documents/REPOSITORIOS/mangrove-atlas/data/data/National_Layers/bh_mangroves.geojson:5: Reached EOF without all containers being closed
In JSON object {"type":"FeatureCollection","crs":{"type":"name","properties":{"name":"urn:ogc:def:crs:OGC:1.3:CRS84"}},"features":[]}
1 features, 31026155 bytes of geometry, 42 bytes of separate metadata, 229 bytes of string pool
Choosing a maxzoom of -z13 for resolution of about 39 feet (12 meters) within features
  99.9%  13/2326/3523  


In [6]:
!ls {outFolder} | grep mbtiles

AUS_mangrove_cover_2022.mbtiles
bh_mangroves.mbtiles
bh_mangroves.mbtiles-journal
car_mar_mangroves_2023.mbtiles
MangroveExtent2020-Kenya-Final-QA-v2.mbtiles
MangroveExtent2020-Madagascar-Final-QA-v3.mbtiles
MangroveExtent2020-Mozambique-Final-QA-v2.mbtiles
MangroveExtent2020-Tanzania-Final-QA-v2.mbtiles


In [None]:
#!npm install -g @mapbox/mbview  

#!mbview $outputPath_mbtiles