#### Based of
https://github.com/dataforgoodfr/Bloom/blob/data-science/data_science/notebooks/20230212_bloom_exploration_mathilde.ipynb

## Installing packages

In [1]:
pip install shapely

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install folium

Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install geopandas

Note: you may need to restart the kernel to use updated packages.


## Importing packages

In [2]:
import pandas as pd
import geopandas as gpd
import numpy as np
from geopandas.tools import sjoin
from shapely.geometry import Point, MultiPolygon, Polygon
%matplotlib inline
import time
import folium
import json


## Path to Shapefile

In [3]:
pwd

'/Users/martinvanderlinden/Documents/Github_repos/Bloom/data_science/notebooks'

In [4]:
path_root='/Users/martinvanderlinden/Documents/raw_data/WDPA_jan2023/'

In [239]:
## read shapefile using geopandas library

In [5]:
def shp_file_to_df(path_root: str, folder: str, file_name: str) -> pd.DataFrame:
    
    full_path = path_root + folder + '/'+ file_name
    
    df = gpd.GeoDataFrame.from_file(full_path)
    
    return df

## Get the polygons

### 1. Polygons_1

In [6]:
%%time
df1 = shp_file_to_df(path_root,'WDPA_Jan2023_Public_shp_0','WDPA_Jan2023_Public_shp-polygons.shp')

CPU times: user 23.7 s, sys: 1.53 s, total: 25.2 s
Wall time: 26.4 s


In [12]:
# inspect crs
df1.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

### 2. Polygons_2

In [7]:
%%time
df2 = shp_file_to_df(path_root,'WDPA_Jan2023_Public_shp_1','WDPA_Jan2023_Public_shp-polygons.shp')

CPU times: user 30.2 s, sys: 2.29 s, total: 32.5 s
Wall time: 34.6 s


### 3. Polygons_3

In [None]:
# inspect crs
df2.crs

In [8]:
%%time
df3 = shp_file_to_df(path_root,'WDPA_Jan2023_Public_shp_2','WDPA_Jan2023_Public_shp-polygons.shp')

CPU times: user 33.3 s, sys: 2.48 s, total: 35.8 s
Wall time: 39.2 s


#### --> DFs have the same CRS (CRS: EPSG:4326)

## Concat & inspect

In [None]:
# inspect crs
df3.crs

In [9]:
df = pd.concat([df1, df2, df3]).drop_duplicates()

In [10]:
df.shape

(273261, 31)

In [11]:
df.head(2)

Unnamed: 0,WDPAID,WDPA_PID,PA_DEF,NAME,ORIG_NAME,DESIG,DESIG_ENG,DESIG_TYPE,IUCN_CAT,INT_CRIT,...,MANG_AUTH,MANG_PLAN,VERIF,METADATAID,SUB_LOC,PARENT_ISO,ISO3,SUPP_INFO,CONS_OBJ,geometry
0,1.0,1,1,Diamond Reef and Salt Fish Tail Reef,Diamond Reef,Marine Reserve,Marine Reserve,National,Ia,Not Applicable,...,Fisheries Division,Not Reported,State Verified,1807,AG-04,ATG,ATG,Not Applicable,Not Applicable,"POLYGON ((-61.82494 17.18497, -61.82497 17.184..."
1,2.0,2,1,Palaster Reef,Palaster Reef,Marine Reserve,Marine Reserve,National,Ia,Not Applicable,...,Fisheries Division,Not Reported,State Verified,1807,AG-10,ATG,ATG,Not Applicable,Not Applicable,"POLYGON ((-61.74007 17.52001, -61.77174 17.526..."


In [12]:
#there were several index = 0, 1, 2 etc... so reset index is needed
df.reset_index(inplace=True)

In [17]:
#check which info we have
df.columns

Index(['index', 'WDPAID', 'WDPA_PID', 'PA_DEF', 'NAME', 'ORIG_NAME', 'DESIG',
       'DESIG_ENG', 'DESIG_TYPE', 'IUCN_CAT', 'INT_CRIT', 'MARINE',
       'REP_M_AREA', 'GIS_M_AREA', 'REP_AREA', 'GIS_AREA', 'NO_TAKE',
       'NO_TK_AREA', 'STATUS', 'STATUS_YR', 'GOV_TYPE', 'OWN_TYPE',
       'MANG_AUTH', 'MANG_PLAN', 'VERIF', 'METADATAID', 'SUB_LOC',
       'PARENT_ISO', 'ISO3', 'SUPP_INFO', 'CONS_OBJ', 'geometry'],
      dtype='object')

Many of the included polygons are for in-land protected areas.

See quick overview of included area, e.g., from front-cover of the User Manual (https://www.ibat-alliance.org/pdf/wdpa_manual.pdf) or directly from https://www.dropbox.com/s/bqiuqfvkh87ddgq/Screen%20Shot%202023-02-12%20at%209.54.58%20PM.png?dl=1

From "User Manual for the World Database on Protected Areas and world database on other effective area- based conservation measures: 1.6" (https://www.ibat-alliance.org/pdf/wdpa_manual.pdf):

> "Marine Protected Areas

> The ‘Marine’ field provides an indication of whether a protected area or OECM is located entirely or partially in > the marine environment. A value of ‘0’ indicates that there is very little to no marine component, ‘1’ indicates > that the site has substantial marine and terrestrial components and ’2’ indicates that there is very little to no > terrestrial component."

In [42]:
df['MARINE']

0         2
1         2
2         0
3         0
4         0
         ..
273256    0
273257    0
273258    0
273259    0
273260    0
Name: MARINE, Length: 273261, dtype: object

In [43]:
df_nonterr = df[df.MARINE != '0']

In [45]:
df_nonterr.shape

(18019, 32)

In [None]:
df_nonterr.to_file('Nonterr_WDPA_Jan2023_Public_shp-polygons.shp')