In [120]:
import requests
import os
import pandas as pd
import numpy as np
import time
import sys

In [52]:
CARTO_URL = 'https://{}.carto.com/api/v2/sql'
CARTO_USER = 'wri-01'
CARTO_KEY = os.environ.get('CARTO_KEY')
def sendSql(sql, user=None, key=None):
    '''Send arbitrary sql and return response object or False'''
    user = user or CARTO_USER
    key = key or CARTO_KEY
    url = CARTO_URL.format(user)
    payload = {
        'api_key': key,
        'q': sql,
    }
    logging.debug((url, payload))
    r = requests.post(url, json=payload)
    r.raise_for_status()
    return r.json()

In [81]:
# get datasets
gadm_table = 'gadm36_adm1'
wdpa_table = 'wdpa_protected_areas'

# look at fields
sendSql("select * from {} limit 0".format(gadm_table))

{'rows': [],
 'time': 0.003,
 'fields': {'cartodb_id': {'type': 'number'},
  'the_geom': {'type': 'geometry'},
  'the_geom_webmercator': {'type': 'geometry'},
  'iso': {'type': 'string'},
  'name_0': {'type': 'string'},
  'gid_1': {'type': 'string'},
  'name_1': {'type': 'string'},
  'varname_1': {'type': 'string'},
  'nl_name_1': {'type': 'string'},
  'type_1': {'type': 'string'},
  'engtype_1': {'type': 'string'},
  'cc_1': {'type': 'string'},
  'hasc_1': {'type': 'string'},
  'bbox': {'type': 'string'},
  'centroid': {'type': 'string'},
  'area_ha': {'type': 'number'},
  'area': {'type': 'number'},
  'gid_0': {'type': 'string'}},
 'total_rows': 0}

In [55]:
sendSql("select * from {} limit 0".format(wdpa_table))

{'rows': [],
 'time': 0.003,
 'fields': {'cartodb_id': {'type': 'number'},
  'the_geom': {'type': 'geometry'},
  'the_geom_webmercator': {'type': 'geometry'},
  'wdpaid': {'type': 'number'},
  'pa_def': {'type': 'string'},
  'name': {'type': 'string'},
  'orig_name': {'type': 'string'},
  'desig': {'type': 'string'},
  'desig_eng': {'type': 'string'},
  'desig_type': {'type': 'string'},
  'iucn_cat': {'type': 'string'},
  'int_crit': {'type': 'string'},
  'marine': {'type': 'string'},
  'rep_m_area': {'type': 'number'},
  'gis_m_area': {'type': 'number'},
  'rep_area': {'type': 'number'},
  'gis_area': {'type': 'number'},
  'no_take': {'type': 'string'},
  'no_tk_area': {'type': 'number'},
  'status': {'type': 'string'},
  'status_yr': {'type': 'number'},
  'gov_type': {'type': 'string'},
  'own_type': {'type': 'string'},
  'mang_auth': {'type': 'string'},
  'mang_plan': {'type': 'string'},
  'verif': {'type': 'string'},
  'metadataid': {'type': 'number'},
  'sub_loc': {'type': 'string

In [91]:
# test area intersection calculation 
# Since the two tables are in the same DB, its probably faster to join them than to 
# dl the geometry and insert it into the query. Tho we would do the latter for a more robust pipeline
#
# notes: 
#  geometry should be cast to geography for equal-area calculation
#  use fast sphere ST_AREA(geog, false)

i = 7
query = """
SELECT ST_AREA(ST_UNION(
    ARRAY(
        SELECT
            ST_INTERSECTION(a.the_geom, b.the_geom) AS the_geom
        FROM {} AS a, (
            SELECT the_geom
            FROM {} WHERE cartodb_id = {}
        ) AS b
        WHERE st_intersects(a.the_geom, b.the_geom)
    )
)::geography, false) as area
""".format(wdpa_table, gadm_table, i)
sendSql(query)['rows'][0]['area']

133993443.069218

In [95]:
query = """
SELECT iso, gid_1, cartodb_id, st_area(the_geom::geography, false) as adm1_area FROM {}
""".format(gadm_table)

ADM1 = pd.DataFrame(sendSql(query)['rows'])
ADM1.head()

Unnamed: 0,adm1_area,cartodb_id,gid_1,iso
0,1.17666e+07,1095,HKG.6_1,HKG
1,1.35703e+09,3550,XKO.2_1,XKO
2,1.40027e+11,1460,KAZ.14_1,KAZ
3,2.41169e+11,1249,IND.34_1,IND
4,5.87148e+10,409,CAF.1_1,CAF
5,1.4506e+09,391,BTN.19_1,BTN
6,8.2444e+08,189,BDI.14_1,BDI
7,1.53365e+11,331,BRA.1_1,BRA
8,3.58062e+11,342,BRA.11_1,BRA
9,9.08055e+11,341,BRA.12_1,BRA


In [122]:
ADM1['wdpa_area'] = np.nan

In [None]:
# iterate through adm1 polygons and calculate area intersection
# some queries are likely to fail; track these so we can try again on just the failed ones.

for i in ADM1[ADM1['wdpa_area'].isna()].index:
    cdbid = ADM1.iloc[i]['cartodb_id']
    query = """
    SELECT ST_AREA(ST_UNION(
        ARRAY(
            SELECT
                ST_INTERSECTION(a.the_geom, b.the_geom) AS the_geom
            FROM {} AS a, (
                SELECT the_geom
                FROM {} WHERE cartodb_id = {}
            ) AS b
            WHERE st_intersects(a.the_geom, b.the_geom)
        )
    )::geography, false) as area
    """.format(wdpa_table, gadm_table, cdbid)

    try:
        # No intersection results in null; count as 0
        area = sendSql(query)['rows'][0]['area'] or 0
        ADM1.at[i, 'wdpa_area'] = area
        # just showing progress...
        sys.stdout.write("{}/{}: {} m3\r".format(i, len(ADM1), area))
        sys.stdout.flush()
    except Exception as e:
        print('{}/{} Intersection failed: {}'.format(i, len(ADM1), e))

print('Num failed: {}'.format(ADM1['wdpa_area'].isna().sum()))
ADM1.to_csv('Data/adm1_wdpa_intersection.csv')
ADM1.head()

In [None]:
# if the bottleneck is intersection, try simplifing the query polygon?

tolerance = .08333 # wgs84 -> degrees (~10km)
for i in ADM1[ADM1['wdpa_area'].isna()].index:
    cdbid = ADM1.iloc[i]['cartodb_id']
    query = """
    SELECT ST_AREA(ST_UNION(
        ARRAY(
            SELECT
                ST_INTERSECTION(a.the_geom, b.the_geom) AS the_geom
            FROM {} AS a, (
                SELECT ST_SIMPLIFY(the_geom, {tolerance}) AS the_geom
                FROM {} WHERE cartodb_id = {}
            ) AS b
            WHERE st_intersects(a.the_geom, b.the_geom)
        )
    )::geography, false) as area
    """.format(wdpa_table, gadm_table, cdbid, tolerance=tolerance)
    
    try:
        # No intersection results in null; count as 0
        area = sendSql(query)['rows'][0]['area'] or 0
        ADM1.at[i, 'wdpa_area'] = area
        # just showing progress...
        sys.stdout.write("{}/{}: {} m3\r".format(i, len(ADM1), area))
        sys.stdout.flush()
    except Exception as e:
        print('{}/{} Intersection failed: {}'.format(i, len(ADM1), e))

print('Num failed: {}'.format(ADM1['wdpa_area'].isna().sum()))
ADM1.to_csv('Data/adm1_wdpa_intersection.csv')
ADM1.head()

47/869 Intersection failed: 400 Client Error: Bad Request for url: https://wri-01.carto.com/api/v2/sql
62/869 Intersection failed: 429 Client Error: Too Many Requests for url: https://wri-01.carto.com/api/v2/sql
71/869 Intersection failed: 400 Client Error: Bad Request for url: https://wri-01.carto.com/api/v2/sql
101/869 Intersection failed: 400 Client Error: Bad Request for url: https://wri-01.carto.com/api/v2/sql
159/869 Intersection failed: 400 Client Error: Bad Request for url: https://wri-01.carto.com/api/v2/sql
184/869 Intersection failed: 429 Client Error: Too Many Requests for url: https://wri-01.carto.com/api/v2/sql
228/869 Intersection failed: 429 Client Error: Too Many Requests for url: https://wri-01.carto.com/api/v2/sql
340/869 Intersection failed: 400 Client Error: Bad Request for url: https://wri-01.carto.com/api/v2/sql
349/869 Intersection failed: 429 Client Error: Too Many Requests for url: https://wri-01.carto.com/api/v2/sql
369/869 Intersection failed: 429 Client Err

In [None]:
# if the bottleneck is union (dissolve), need another solution besides Carto SQL API
# options:
#  1. dl and use shapely/other
#  2. CARTO sql batch API (no timeout but need auth)
#  3. local postgis?
