### Set up

In [1]:
import geopandas as gpd
import pandas as pd
from datetime import datetime

In [2]:
path_in = "/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw"
path_out = "/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed"

### Read relevant datasets: MPAtlas, WDPA, and ProtectedSeas

In [4]:
# Read mpatlas data
mpatlas = gpd.read_file(path_out + "/mpatlas/mpatlas_assess_zone_cleaned.geojson")
mpatlas = mpatlas.drop_duplicates(subset=['wdpa_id', 'designation','location_id','establishment_stage', 'protection_level','year'], keep='first')

In [5]:
ps = gpd.read_file(path_out + "/protectedseas/protectedseas.shp")

In [6]:
wdpa = gpd.read_file(path_out + "/wdpa/merged_wdpa_all.shp")
wdpa = wdpa[['WDPA_PID', 'NAME','PA_DEF', 'GIS_M_AREA','PARENT_ISO']].rename(columns={'WDPA_PID': 'wdpa_id', 'NAME': 'name', 'PA_DEF':'protection_type', 'GIS_M_AREA': 'area', 'PARENT_ISO': 'location_id'})
wdpa['protection_type'] = wdpa['protection_type'].astype(int).replace({1: 'mpa', 0: 'oecm'})

### Combine information from different tables

In [7]:
# Add protected_level info from mpatlas and protectedseas to wdpa df
table_prot = wdpa.merge(mpatlas[['wdpa_id','area_km2','protection_level']], on='wdpa_id', how='left').rename(columns={'area_km2':'area_mpatlas','protection_level': 'mpatlas_prot_lvl'})
table_prot = table_prot.merge(ps[['wdpa_id','FPS_cat', 'total_area']], on='wdpa_id', how='left').rename(columns={'FPS_cat': 'fpl', 'total_area': 'area_ps'})

In [8]:
table_prot['area'] = table_prot['area_mpatlas'].combine_first(table_prot['area_ps']).combine_first(table_prot['area'])
table_prot = table_prot.drop(columns=['area_mpatlas', 'area_ps'])
table_prot = table_prot.drop(columns={'name', 'protection_type'})

In [9]:
# Add establishment info to wdpa df
table_est = wdpa.merge(mpatlas[['wdpa_id','establishment_stage', 'year']], on='wdpa_id', how='left')
table_est = table_est.drop(columns={'area', 'location_id'})

In [12]:
# Save tables as csv
table_prot.to_csv(path_out + "/tables/mpas_table.csv", index=False)
table_est.to_csv(path_out + "/tables/mpas_table_establishment.csv", index=False)