# WDPA management

create_mangrove_wdpa_table.js file

In [1]:
import os
from pathlib import Path
import geopandas
import zipfile
import subprocess
import logging
import requests
from typing import Union
from ipyleaflet import Map, GeoData, basemaps, LayersControl



In [2]:
!npm install -g mapshaper@latest

[K[?25h              [27m] - reify:@tmcw/togeojson: [32;40mhttp[0m [35mfetch[0m GET 200 https://regist[0m[K.o[0m[K
added 67 packages, and audited 68 packages in 7s

6 packages are looking for funding
  run `npm fund` for details

found [32m[1m0[22m[39m vulnerabilities


In [4]:
#  FIXME: This will depends from where the notebook kernel is running so be careful
WORK_DIR =Path(os.getcwd())
BASE_DIR = f'{WORK_DIR.parents[3]}/work/datasets/raw'
logging.basicConfig(level=logging.INFO)

# @TODO: Add expected data files source as an environment variable.
assert BASE_DIR == '/home/jovyan/work/datasets/raw', f'{BASE_DIR} is not the correct directory'

## Pipeline for wdpa download and simplification process

### Functions

In [3]:
def download_wdpaData(file_path: str, update: bool = False) -> Union[int, str]:
    """
    Download a WDPA file to a path.
    Parameters
    ----------
    file_path : str - The path to the file to download.
    update : bool, optional - If True, the file will be downloaded again even if it already exists.
                            The default is False.
    
    Returns
    -------
    int - 0 if the file was downloaded successfully, 1 if the file download failed.
    """
    try:
        if update or not os.path.exists(file_path):
            logging.info('Downloading WDPA data...')
            
            url_info = requests.post('https://www.protectedplanet.net/downloads',
                                data={"domain":"general",
                                    "format":"shp",
                                    "token":"wdpa",
                                    "id":51216}
                                )
            
            response = requests.get(url_info.json()['url'], stream=True)
        
            with open(file_path, 'wb') as f:
                for chunk in response.iter_content(chunk_size=128):
                    f.write(chunk)
        else:
            logging.info('WDPA data already downloaded.')    
        
        return 0
    except Exception as e:
        logging.error(e)
        return 1

In [4]:
def extract_wdpa_data(wdpa_zip_file: str, target_folder: str, format: str='.zip', update: bool = False) -> int:

    """
    Extract WDPA data from a zip file.
    Parameters
    ----------
    wdpa_zip_file : str - The path to the zip file containing the WDPA data.
    target_folder : str - The path to the folder to extract the WDPA data to.
    format : str, optional - The format of the zip file. The default is '.zip'.
    update : bool, optional - If True, the file will be extracted even if it already exists.
                            The default is False.
    
    Returns
    -------
    int - 0 if the file was extracted successfully, 1 if the file extraction failed.

    """
    try:
        path = f'{BASE_DIR}/{target_folder}'

        if update or not os.path.exists(path):
            logging.info('Extracting WDPA data...')
               
            if not os.path.exists(path):
                os.mkdir(path)

            with zipfile.ZipFile(wdpa_zip_file, 'r') as zip_ref:
                sublist = filter(lambda file: format in file, zip_ref.namelist())
                zip_ref.extractall(path, members=sublist)
        
            for file in os.listdir(path):
                if file.endswith(format):
                    logging.info(file)
                    
                    inner_folder = f'{path}/{file[:-len(format)]}'
                    
                    with zipfile.ZipFile(f'{path}/{file}', 'r') as zip_ref:
                        if not os.path.exists(inner_folder):
                            os.mkdir(inner_folder)

                        zip_ref.extractall(inner_folder)
                    
                    os.remove(f'{path}/{file}')
        else:
            logging.info('WDPA data already extracted.')
            
        return 0
    
    except Exception as e:
        logging.error(e)
        return 1
        

In [5]:
def load_dataframe(file_path: str) -> Union[int, geopandas.GeoDataFrame]:
    """
    Load WDPA data from a csv file.

    Parameters
    ----------
    file_path : str - The path to the csv file to load.

    Returns
    -------
    geopandas.GeoDataFrame - The loaded data.

    """
    try:
        return geopandas.read_file(file_path)
    except Exception as e:
        logging.error(e)
        return 1

In [6]:
def simplifyMapshaper(data_path: Path, output_path: Path, 
    simplification: bool, clip: bool = False, update: bool = False) -> Union[int, Path]:
    """
    Simplify geometry of a GeoDataFrame using mapshaper.
    Parameters
    ----------
    data_path : Path - The path to the GeoDataFrame to simplify.
    output_path : Path - The path to the output GeoDataFrame.
    simplification : bool - If True, the data will be simplified.
    update : bool, optional - If True, the output GeoDataFrame will be overwritten.
                            The default is False.
    
    Returns
    -------
    int - 0 if the data was simplified successfully, 1 if the data simplification failed.
    """
    try:
        
        if update or not output_path.exists:
            logging.info('Filtering WDPA data based on Ramsar designation and in active protection...')
            instructions = []
            instructions.append(f"mapshaper-xl 16gb -i {data_path} combine-files snap")
            instructions.append(f"-o {output_path} format=shapefile force")
            instructions.insert(1,"-filter \'STATUS!=\"Proposed\"\'")
            instructions.insert(2,"-filter \'\"ABW,AGO,AIA,ARE,ASM,ATG,AUS,BEN,BES,BGD,BHR,BHS,BLZ,BRA,BRB,BRN,CHN,CIV,CMR,COD,COL,COM,CRI,CUB,CUW,CYM,DJI,DMA,DOM,DPT,ECU,EGY,ERI,FJI,FSM,GAB,GHA,GIN,GLP,GMB,GNB,GNQ,GRD,GTM,GUF,GUY,HKG,HND,HTI,IDN,IND,IRN,JAM,JPN,KEN,KHM,KNA,LBR,LCA,LKA,MAF,MDG,MEX,MMR,MOZ,MRT,MTQ,MUS,MYS,MYT,NCL,NGA,NIC,NZL,OMN,PAK,PAN,PER,PHL,PLW,PNG,PRI,QAT,SAU,SDN,SEN,SGP,SLB,SLE,SLV,SOM,STP,SUR,SYC,TCA,THA,TLS,TON,TTO,TWN,TZA,USA,VCT,VEN,VGB,VIR,VNM,VUT,WSM,YEM,ZAF\".indexOf(ISO3) > -1\'")
            instructions.insert(3,"-filter bbox=-180,-50,180,50 remove-empty")
            instructions.insert(4,"-merge-layers target=*")
            instructions.insert(5,"-clean allow-overlaps")
            if clip:
                instructions.insert(6, "-clip source={BASE_DIR}/mangrove_coverage-v2.shp target=*")

            CMD = ' '.join(instructions)
            print(CMD)
            subprocess.run(CMD, shell=True, check=True)
        
        if simplification:
            logging.info('Simplifying WDPA data...')
            CMD2 = f"mapshaper-xl 16gb -i {output_path} \
                -simplify 50% visvalingam keep-shapes planar \
                -filter-islands min-vertices=3 min-area=10000m2 remove-empty \
                -filter-slivers min-area=10000m2 remove-empty \
                -clean rewind \
                -o {output_path} format=shapefile force"
            subprocess.run(CMD2, shell=True, check=True)
        
        return output_path
    
    except Exception as e:
        logging.error(e)
        return 1

In [7]:
def mapRender(data: geopandas.GeoDataFrame) -> Map:
    """
    Render a map with the given data.
    Parameters-f
    ----------
    data : geopandas.GeoDataFrame - The data to render.

    Returns
    -------
    Map - The rendered map.
    """
    try:
        m = Map(center=(52.3,8.0), 
            zoom = 3, 
            basemap= basemaps.Esri.WorldTopoMap,
            layers_control=True,
            controls=[
                LayersControl()
            ])
        geo_data = GeoData(
                    geo_dataframe=data,
                    style={
                        'color': '#ff0000',
                        'fillOpacity': 0.5
                    },
                    name='WDPA',
                )
        m.add_layer(geo_data)

        
        return m
    except Exception as e:
        logging.error(e)
        return 1

### Pipeline

In [35]:
download_wdpaData(f'{BASE_DIR}/wdpa_protected_areas_public.zip')

INFO:root:WDPA data already downloaded.


0

In [36]:
extract_wdpa_data(f'{BASE_DIR}/wdpa_protected_areas_public.zip', 'wdpa')

INFO:root:WDPA data already extracted.


0

In [5]:
outFolder= Path(f'{BASE_DIR}/wdpa')
paths =[]
for root, dirs, files in os.walk(outFolder):
    for file in files:
        if file.endswith("-polygons.shp"):
            paths.append(os.path.join(root, file))

path = ' '.join(paths)

outputPath = f'{outFolder.parents[1]}/processed/wdpa_protected_areas_public.shp'

print(outputPath)

/home/jovyan/work/datasets/processed/wdpa_protected_areas_public.shp


In [19]:
wdpa = simplifyMapshaper(path, outputPath, simplification=True, update=True)

INFO:root:Filtering WDPA data based on Ramsar designation and in active protection...
Allocating 16 GB of heap memory


mapshaper-xl 16gb -i /home/jovyan/work/datasets/raw/wdpa/WDPA_Feb2022_Public_shp_1/WDPA_Feb2022_Public_shp-polygons.shp /home/jovyan/work/datasets/raw/wdpa/WDPA_Feb2022_Public_shp_2/WDPA_Feb2022_Public_shp-polygons.shp /home/jovyan/work/datasets/raw/wdpa/WDPA_Feb2022_Public_shp_0/WDPA_Feb2022_Public_shp-polygons.shp combine-files snap -filter 'STATUS!="Proposed"' -filter '"ABW,AGO,AIA,ARE,ASM,ATG,AUS,BEN,BES,BGD,BHR,BHS,BLZ,BRA,BRB,BRN,CHN,CIV,CMR,COD,COL,COM,CRI,CUB,CUW,CYM,DJI,DMA,DOM,DPT,ECU,EGY,ERI,FJI,FSM,GAB,GHA,GIN,GLP,GMB,GNB,GNQ,GRD,GTM,GUF,GUY,HKG,HND,HTI,IDN,IND,IRN,JAM,JPN,KEN,KHM,KNA,LBR,LCA,LKA,MAF,MDG,MEX,MMR,MOZ,MRT,MTQ,MUS,MYS,MYT,NCL,NGA,NIC,NZL,OMN,PAK,PAN,PER,PHL,PLW,PNG,PRI,QAT,SAU,SDN,SEN,SGP,SLB,SLE,SLV,SOM,STP,SUR,SYC,TCA,THA,TLS,TON,TTO,TWN,TZA,USA,VCT,VEN,VGB,VIR,VNM,VUT,WSM,YEM,ZAF".indexOf(ISO3) > -1' -filter bbox=-180,-50,180,50 remove-empty -merge-layers target=* -clean allow-overlaps -o /home/jovyan/work/datasets/processed/wdpa_protected_areas_public.shp 

[i] Snapped 19910585 points
[filter] Retained 85,479 of 85,785 features
[filter] Retained 85,513 of 85,784 features
[filter] Retained 85,140 of 85,785 features
[filter] Retained 24,213 of 85,479 features
[filter] Retained 40,419 of 85,513 features
[filter] Retained 22,048 of 85,140 features
[filter] Retained 24,159 of 24,213 features
[filter] Retained 40,256 of 40,419 features
[filter] Retained 21,940 of 22,048 features
[clean] Retained 86,355 of 86,355 features
[o] Wrote /home/jovyan/work/datasets/processed/wdpa_protected_areas_public.shp
[o] Wrote /home/jovyan/work/datasets/processed/wdpa_protected_areas_public.shx
[o] Wrote /home/jovyan/work/datasets/processed/wdpa_protected_areas_public.dbf
[o] Wrote /home/jovyan/work/datasets/processed/wdpa_protected_areas_public.prj
INFO:root:Simplifying WDPA data...
Allocating 16 GB of heap memory
[simplify] Repaired 11,081 intersections; 2,395 intersections could not be repaired
[filter-islands] Detected DBF text encoding: utf8
[filter-islands]

In [9]:
wdpa_dataframe = load_dataframe(outputPath)

In [10]:
extent_dataframe = load_dataframe(Path(f'{BASE_DIR}/coastlines-mangroves-v1.shp'))

In [11]:
wdpa_dataframe.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 72372 entries, 0 to 72371
Data columns (total 31 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   WDPAID      72372 non-null  int64   
 1   WDPA_PID    72372 non-null  object  
 2   PA_DEF      72372 non-null  object  
 3   NAME        72372 non-null  object  
 4   ORIG_NAME   72372 non-null  object  
 5   DESIG       72372 non-null  object  
 6   DESIG_ENG   72372 non-null  object  
 7   DESIG_TYPE  72372 non-null  object  
 8   IUCN_CAT    72372 non-null  object  
 9   INT_CRIT    72372 non-null  object  
 10  MARINE      72372 non-null  object  
 11  REP_M_AREA  72372 non-null  float64 
 12  GIS_M_AREA  72372 non-null  float64 
 13  REP_AREA    72372 non-null  float64 
 14  GIS_AREA    72372 non-null  float64 
 15  NO_TAKE     72372 non-null  object  
 16  NO_TK_AREA  72372 non-null  float64 
 17  STATUS      72372 non-null  object  
 18  STATUS_YR   72372 non-null  int64   
 

In [12]:
Filtered = wdpa_dataframe.sjoin(extent_dataframe)

In [17]:
Filtered.dropna(inplace=True)
Filtered.to_file(f'{outFolder.parents[1]}/processed/wdpa_protected_areas_public_filtered.shp') 

  Filtered.to_file(f'{outFolder.parents[1]}/processed/wdpa_protected_areas_public_filtered.shp')


In [18]:
# TODO: add a function to zip data and upload it to google cloud storage bucket
# TODO: we are not intersecting here by the actual mangrove extent data. So we are getting more WDPA that the ones that are actually protecting mangroves.