In [16]:
import requests
import os
import pandas as pd
import numpy as np
import time
import shapely.wkb
import shapely.ops
import sys
import pyproj

from functools import partial

In [7]:
CARTO_URL = 'https://{}.carto.com/api/v2/sql'
CARTO_USER = 'wri-01'
CARTO_KEY = os.environ.get('CARTO_KEY')
def sendSql(sql, user=None, key=None):
    '''Send arbitrary sql and return response object or False'''
    user = user or CARTO_USER
    key = key or CARTO_KEY
    url = CARTO_URL.format(user)
    payload = {
        'api_key': key,
        'q': sql,
    }
    r = requests.post(url, json=payload)
    if (r.status_code >= 400):
        try:
            msg = r.json()['error'][0]
        except:
            r.raise_for_status()
        raise Exception(msg)
    return r.json()

In [8]:
# get datasets
gadm_table = 'gadm36_adm1'
wdpa_table = 'wdpa_protected_areas'

# look at fields
sendSql("select * from {} limit 0".format(gadm_table))

{'rows': [],
 'time': 0.003,
 'fields': {'cartodb_id': {'type': 'number'},
  'the_geom': {'type': 'geometry'},
  'the_geom_webmercator': {'type': 'geometry'},
  'iso': {'type': 'string'},
  'name_0': {'type': 'string'},
  'gid_1': {'type': 'string'},
  'name_1': {'type': 'string'},
  'varname_1': {'type': 'string'},
  'nl_name_1': {'type': 'string'},
  'type_1': {'type': 'string'},
  'engtype_1': {'type': 'string'},
  'cc_1': {'type': 'string'},
  'hasc_1': {'type': 'string'},
  'bbox': {'type': 'string'},
  'centroid': {'type': 'string'},
  'area_ha': {'type': 'number'},
  'area': {'type': 'number'},
  'gid_0': {'type': 'string'}},
 'total_rows': 0}

In [9]:
sendSql("select * from {} limit 0".format(wdpa_table))

{'rows': [],
 'time': 0.002,
 'fields': {'cartodb_id': {'type': 'number'},
  'the_geom': {'type': 'geometry'},
  'the_geom_webmercator': {'type': 'geometry'},
  'wdpaid': {'type': 'number'},
  'pa_def': {'type': 'string'},
  'name': {'type': 'string'},
  'orig_name': {'type': 'string'},
  'desig': {'type': 'string'},
  'desig_eng': {'type': 'string'},
  'desig_type': {'type': 'string'},
  'iucn_cat': {'type': 'string'},
  'int_crit': {'type': 'string'},
  'marine': {'type': 'string'},
  'rep_m_area': {'type': 'number'},
  'gis_m_area': {'type': 'number'},
  'rep_area': {'type': 'number'},
  'gis_area': {'type': 'number'},
  'no_take': {'type': 'string'},
  'no_tk_area': {'type': 'number'},
  'status': {'type': 'string'},
  'status_yr': {'type': 'number'},
  'gov_type': {'type': 'string'},
  'own_type': {'type': 'string'},
  'mang_auth': {'type': 'string'},
  'mang_plan': {'type': 'string'},
  'verif': {'type': 'string'},
  'metadataid': {'type': 'number'},
  'sub_loc': {'type': 'string

In [10]:
# test area intersection calculation 
# Since the two tables are in the same DB, its probably faster to join them than to 
# dl the geometry and insert it into the query. Tho we would do the latter for a more robust pipeline
#
# notes: 
#  geometry should be cast to geography for equal-area calculation
#  use fast sphere ST_AREA(geog, false)

i = 7
query = """
SELECT ST_AREA(ST_UNION(
    ARRAY(
        SELECT
            ST_INTERSECTION(a.the_geom, b.the_geom) AS the_geom
        FROM {} AS a, (
            SELECT the_geom
            FROM {} WHERE cartodb_id = {}
        ) AS b
        WHERE st_intersects(a.the_geom, b.the_geom)
    )
)::geography, false) as area
""".format(wdpa_table, gadm_table, i)
sendSql(query)['rows'][0]['area']

133993443.069218

In [11]:
query = """
SELECT iso, gid_1, cartodb_id, st_area(the_geom::geography, false) as adm1_area FROM {}
""".format(gadm_table)

ADM1 = pd.DataFrame(sendSql(query)['rows'])
ADM1.head()

Unnamed: 0,adm1_area,cartodb_id,gid_1,iso
0,11766600.0,1095,HKG.6_1,HKG
1,1357030000.0,3550,XKO.2_1,XKO
2,140027000000.0,1460,KAZ.14_1,KAZ
3,241169000000.0,1249,IND.34_1,IND
4,58714800000.0,409,CAF.1_1,CAF


In [12]:
ADM1['wdpa_area'] = np.nan

In [8]:
# iterate through adm1 polygons and calculate area intersection
# some queries are likely to fail; track these so we can try again on just the failed ones.

for i in ADM1[ADM1['wdpa_area'].isna()].index:
    cdbid = ADM1.iloc[i]['cartodb_id']
    query = """
    SELECT ST_AREA(ST_UNION(
        ARRAY(
            SELECT
                ST_INTERSECTION(a.the_geom, b.the_geom) AS the_geom
            FROM {} AS a, (
                SELECT the_geom
                FROM {} WHERE cartodb_id = {}
            ) AS b
            WHERE st_intersects(a.the_geom, b.the_geom)
        )
    )::geography, false) as area
    """.format(wdpa_table, gadm_table, cdbid)

    try:
        # No intersection results in null; count as 0
        area = sendSql(query)['rows'][0]['area'] or 0
        ADM1.at[i, 'wdpa_area'] = area
        # just showing progress...
        sys.stdout.write("{}/{}: {} m3            \r".format(i, len(ADM1), area))
        sys.stdout.flush()
    except Exception as e:
        print('{}/{} Intersection failed: {}'.format(i, len(ADM1), e))

print('\nNum failed: {}'.format(ADM1['wdpa_area'].isna().sum()))
ADM1.to_csv('Data/adm1_wdpa_intersection.csv')
ADM1.head()

9/876 Intersection failed: You are over platform's limits: SQL query timeout error. Refactor your query before running again or contact CARTO support for more details.
47/876 Intersection failed: Error performing intersection: InterruptedException: Interrupted!
48/876 Intersection failed: You are over platform's limits: SQL query timeout error. Refactor your query before running again or contact CARTO support for more details.
62/876 Intersection failed: Error performing intersection: InterruptedException: Interrupted!
71/876 Intersection failed: You are over platform's limits: SQL query timeout error. Refactor your query before running again or contact CARTO support for more details.
76/876 Intersection failed: Error performing intersection: InterruptedException: Interrupted!
101/876 Intersection failed: You are over platform's limits: SQL query timeout error. Refactor your query before running again or contact CARTO support for more details.
117/876 Intersection failed: You are over 

Unnamed: 0,adm1_area,cartodb_id,gid_1,iso,wdpa_area
0,11766600.0,1095,HKG.6_1,HKG,0.0
1,1357030000.0,3550,XKO.2_1,XKO,0.0
2,140027000000.0,1460,KAZ.14_1,KAZ,3197268000.0
3,241169000000.0,1249,IND.34_1,IND,2673625000.0
4,58714800000.0,409,CAF.1_1,CAF,44070950000.0


In [9]:
# if the bottleneck is intersection, try simplifing the query polygon?

tolerance = .08333 # wgs84 -> degrees (~10km)
for i in ADM1[ADM1['wdpa_area'].isna()].index:
    cdbid = ADM1.iloc[i]['cartodb_id']
    query = """
    SELECT ST_AREA(ST_UNION(
        ARRAY(
            SELECT
                ST_INTERSECTION(a.the_geom, b.the_geom) AS the_geom
            FROM {} AS a, (
                SELECT ST_MAKEVALID(ST_SIMPLIFY(the_geom, {tolerance})) AS the_geom
                FROM {} WHERE cartodb_id = {}
            ) AS b
            WHERE st_intersects(a.the_geom, b.the_geom)
        )
    )::geography, false) as area
    """.format(wdpa_table, gadm_table, cdbid, tolerance=tolerance)
    
    try:
        # No intersection results in null; count as 0
        area = sendSql(query)['rows'][0]['area'] or 0
        ADM1.at[i, 'wdpa_area'] = area
        # just showing progress...
        sys.stdout.write("{}/{}: {} m3           \r".format(i, len(ADM1), area))
        sys.stdout.flush()
    except Exception as e:
        print('{}/{} Intersection failed: {}'.format(i, len(ADM1), e))

print('\nNum failed: {}'.format(ADM1['wdpa_area'].isna().sum()))
ADM1.to_csv('Data/adm1_wdpa_intersection.csv')
ADM1.head()

47/876 Intersection failed: You are over platform's limits: SQL query timeout error. Refactor your query before running again or contact CARTO support for more details.
62/876 Intersection failed: You are over platform's limits: SQL query timeout error. Refactor your query before running again or contact CARTO support for more details.
184/876 Intersection failed: You are over platform's limits: SQL query timeout error. Refactor your query before running again or contact CARTO support for more details.
228/876 Intersection failed: You are over platform's limits: SQL query timeout error. Refactor your query before running again or contact CARTO support for more details.
340/876 Intersection failed: You are over platform's limits: SQL query timeout error. Refactor your query before running again or contact CARTO support for more details.
349/876 Intersection failed: You are over platform's limits: SQL query timeout error. Refactor your query before running again or contact CARTO support 

Unnamed: 0,adm1_area,cartodb_id,gid_1,iso,wdpa_area
0,11766600.0,1095,HKG.6_1,HKG,0.0
1,1357030000.0,3550,XKO.2_1,XKO,0.0
2,140027000000.0,1460,KAZ.14_1,KAZ,3197268000.0
3,241169000000.0,1249,IND.34_1,IND,2673625000.0
4,58714800000.0,409,CAF.1_1,CAF,44070950000.0


In [None]:
# if the bottleneck is union (dissolve), try simplifying and downloading the intersected polygons
# 

tolerance = .08333 # wgs84 -> degrees (~10km)
for i in ADM1[ADM1['wdpa_area'].isna()].index:
    cdbid = ADM1.iloc[i]['cartodb_id']
    
    #first get the intersected 
    query = """
        SELECT
            ST_INTERSECTION(a.the_geom, b.the_geom) AS the_geom
        FROM {} AS a, (
            SELECT ST_MAKEVALID(ST_SIMPLIFY(the_geom, {tolerance})) AS the_geom
            FROM {} WHERE cartodb_id = {}
        ) AS b
        WHERE st_intersects(a.the_geom, b.the_geom)
    """.format(wdpa_table, gadm_table, cdbid, tolerance=tolerance)
    try:
        rows = sendSql(query)['rows']
        if len(rows):
            # Union the geometries
            intersected = [shapely.wkb.loads(r['the_geom'], hex=True) for r in rows]
            union = shapely.ops.cascaded_union(intersected)

            # Equal-area approx equiv to ST_AREA(geog)
            ea_geom = shapely.ops.transform(
                partial(
                    pyproj.transform,
                    pyproj.Proj(init='EPSG:4326'),
                    pyproj.Proj(
                        proj='aea', # Albers equal area
                        lat_1=union.bounds[1],
                        lat_2=union.bounds[3])),
                union)
            area = ea_geom.area
        else:
            area = 0
        ADM1.at[i, 'wdpa_area'] = area

        # just showing progress...
        sys.stdout.write("{}/{}: {} m3           \r".format(i, len(ADM1), area))
        sys.stdout.flush()

    except Exception as e:
        print('{}/{} Intersection failed: {}'.format(i, len(ADM1), e))
    
print('\nNum failed: {}'.format(ADM1['wdpa_area'].isna().sum()))
ADM1.to_csv('Data/adm1_wdpa_intersection.csv')
ADM1.head()


61/765: 753565556.2639073 m3            