In [1]:
import requests
import os
import pandas as pd
import numpy as np
import time
import sys

In [2]:
CARTO_URL = 'https://{}.carto.com/api/v2/sql'
CARTO_USER = 'wri-01'
CARTO_KEY = os.environ.get('CARTO_KEY')
def sendSql(sql, user=None, key=None):
    '''Send arbitrary sql and return response object or False'''
    user = user or CARTO_USER
    key = key or CARTO_KEY
    url = CARTO_URL.format(user)
    payload = {
        'api_key': key,
        'q': sql,
    }
    r = requests.post(url, json=payload)
    if (r.status_code >= 400):
        try:
            msg = r.json()['error'][0]
        except:
            r.raise_for_status()
        raise Exception(msg)
    return r.json()

In [3]:
# get datasets
gadm_table = 'gadm36_adm2'
wdpa_table = 'wdpa_protected_areas'

# look at fields
sendSql("select * from {} limit 0".format(gadm_table))

{'rows': [],
 'time': 0.004,
 'fields': {'cartodb_id': {'type': 'number'},
  'the_geom': {'type': 'geometry'},
  'the_geom_webmercator': {'type': 'geometry'},
  'iso': {'type': 'string'},
  'name_0': {'type': 'string'},
  'gid_1': {'type': 'string'},
  'name_1': {'type': 'string'},
  'nl_name_1': {'type': 'string'},
  'gid_2': {'type': 'string'},
  'name_2': {'type': 'string'},
  'varname_2': {'type': 'string'},
  'nl_name_2': {'type': 'string'},
  'type_2': {'type': 'string'},
  'engtype_2': {'type': 'string'},
  'cc_2': {'type': 'string'},
  'hasc_2': {'type': 'string'},
  'bbox': {'type': 'string'},
  'centroid': {'type': 'string'},
  'area': {'type': 'number'},
  'area_ha': {'type': 'number'},
  'gid_0': {'type': 'string'}},
 'total_rows': 0}

In [4]:
sendSql("select * from {} limit 0".format(wdpa_table))

{'rows': [],
 'time': 0.004,
 'fields': {'cartodb_id': {'type': 'number'},
  'the_geom': {'type': 'geometry'},
  'the_geom_webmercator': {'type': 'geometry'},
  'wdpaid': {'type': 'number'},
  'pa_def': {'type': 'string'},
  'name': {'type': 'string'},
  'orig_name': {'type': 'string'},
  'desig': {'type': 'string'},
  'desig_eng': {'type': 'string'},
  'desig_type': {'type': 'string'},
  'iucn_cat': {'type': 'string'},
  'int_crit': {'type': 'string'},
  'marine': {'type': 'string'},
  'rep_m_area': {'type': 'number'},
  'gis_m_area': {'type': 'number'},
  'rep_area': {'type': 'number'},
  'gis_area': {'type': 'number'},
  'no_take': {'type': 'string'},
  'no_tk_area': {'type': 'number'},
  'status': {'type': 'string'},
  'status_yr': {'type': 'number'},
  'gov_type': {'type': 'string'},
  'own_type': {'type': 'string'},
  'mang_auth': {'type': 'string'},
  'mang_plan': {'type': 'string'},
  'verif': {'type': 'string'},
  'metadataid': {'type': 'number'},
  'sub_loc': {'type': 'string

In [5]:
# test area intersection calculation 
# Since the two tables are in the same DB, its probably faster to join them than to 
# dl the geometry and insert it into the query. Tho we would do the latter for a more robust pipeline
#
# notes: 
#  geometry should be cast to geography for equal-area calculation
#  use fast sphere ST_AREA(geog, false)

i = 7
query = """
SELECT ST_AREA(ST_UNION(
    ARRAY(
        SELECT
            ST_INTERSECTION(a.the_geom, b.the_geom) AS the_geom
        FROM {} AS a, (
            SELECT the_geom
            FROM {} WHERE cartodb_id = {}
        ) AS b
        WHERE a.the_geom && b.the_geom
    )
)::geography, false) as area
""".format(wdpa_table, gadm_table, i)
sendSql(query)['rows'][0]['area']

In [20]:
query = """
SELECT cartodb_id, gid_2 FROM {}
""".format(gadm_table)

ADM1 = pd.DataFrame(sendSql(query)['rows'])
ADM1.set_index('cartodb_id', inplace=True)

ADM1.head()

Unnamed: 0_level_0,gid_2
cartodb_id,Unnamed: 1_level_1
23852,MEX.31.93_1
1887,AUT.6.8_1
23854,MEX.31.95_1
34974,RUS.59.7_1
34827,RUS.55.31_1


In [26]:
areas = None
for i in range(len(ADM1)//1000+1):
    query = '''
    select cartodb_id, st_area(the_geom::geography, false) as adm2_area from {} where cartodb_id >= {} and cartodb_id < {}
    '''.format(gadm_table, i*1000, (i+1)*1000)
    if areas is None:
        areas = pd.DataFrame(sendSql(query)['rows'])
    else:
        areas = areas.append(pd.DataFrame(sendSql(query)['rows']))
    
areas.set_index('cartodb_id', inplace=True)
ADM1['adm2_area'] = areas['adm2_area']
ADM1.head()

Unnamed: 0_level_0,gid_2,adm2_area,wdpa_area
cartodb_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
23852,MEX.31.93_1,172460000.0,
1887,AUT.6.8_1,3319100000.0,
23854,MEX.31.95_1,121811000.0,
34974,RUS.59.7_1,1120530000.0,
34827,RUS.55.31_1,4633480000.0,


In [28]:
ADM1['wdpa_area'] = np.nan
ADM1.count()

gid_2        45962
adm2_area    45962
wdpa_area        0
dtype: int64

In [34]:
# iterate through adm1 polygons and calculate area intersection
# some queries are likely to fail; track these so we can try again on just the failed ones.

for i in ADM1[ADM1['wdpa_area'].isna()].index:
    query = """
    SELECT ST_AREA(ST_UNION(
        ARRAY(
            SELECT
                ST_INTERSECTION(a.the_geom, b.the_geom) AS the_geom
            FROM {} AS a, (
                SELECT ST_MAKEVALID(the_geom) AS the_geom
                FROM {} WHERE cartodb_id = {}
            ) AS b
            WHERE a.the_geom && b.the_geom
        )
    )::geography, false) as area
    """.format(wdpa_table, gadm_table, i)

    try:
        # No intersection results in null; count as 0
        area = sendSql(query)['rows'][0]['area'] or 0
        ADM1.at[i, 'wdpa_area'] = area
        # just showing progress...
        sys.stdout.write("{}/{}: {} m3 \t\t\t\r".format(i, len(ADM1), area))
        sys.stdout.flush()
    except Exception as e:
        print('{}/{} Query failed: {}'.format(i, len(ADM1), e))

print('\nNum failed: {}'.format(ADM1['wdpa_area'].isna().sum()))
ADM1.to_csv('Data/adm2_wdpa_intersection.csv')
ADM1.head()

27142/45962 Query failed: GEOSisValid(): InterruptedException: Interrupted!
37082/45962 Query failed: You are over platform's limits: SQL query timeout error. Refactor your query before running again or contact CARTO support for more details.
41015/45962 Query failed: You are over platform's limits: SQL query timeout error. Refactor your query before running again or contact CARTO support for more details.
41017/45962 Query failed: You are over platform's limits: SQL query timeout error. Refactor your query before running again or contact CARTO support for more details.
41255/45962 Query failed: You are over platform's limits: SQL query timeout error. Refactor your query before running again or contact CARTO support for more details.
11759/45962 Query failed: Error performing intersection: InterruptedException: Interrupted!
14108/45962 Query failed: You are over platform's limits: SQL query timeout error. Refactor your query before running again or contact CARTO support for more detail

14765/45962 Query failed: You are over platform's limits: SQL query timeout error. Refactor your query before running again or contact CARTO support for more details.
37179/45962 Query failed: GEOSisValid(): InterruptedException: Interrupted!
11361/45962 Query failed: You are over platform's limits: SQL query timeout error. Refactor your query before running again or contact CARTO support for more details.
42810/45962 Query failed: You are over platform's limits: SQL query timeout error. Refactor your query before running again or contact CARTO support for more details.
42143/45962 Query failed: Error performing intersection: InterruptedException: Interrupted!
11749/45962 Query failed: Error performing intersection: InterruptedException: Interrupted!
11752/45962 Query failed: You are over platform's limits: SQL query timeout error. Refactor your query before running again or contact CARTO support for more details.
34208/45962 Query failed: You are over platform's limits: SQL query time

1566/45962 Query failed: You are over platform's limits: SQL query timeout error. Refactor your query before running again or contact CARTO support for more details.
1577/45962 Query failed: Error performing intersection: InterruptedException: Interrupted!
14270/45962 Query failed: GEOSUnaryUnion: TopologyException: found non-noded intersection between LINESTRING (21.8366 58.4813, 21.8366 58.4813) and LINESTRING (21.8366 58.4813, 21.8366 58.4813) at 21.836553011587533 58.481341026705742
14470/45962 Query failed: You are over platform's limits: SQL query timeout error. Refactor your query before running again or contact CARTO support for more details.
14519/45962 Query failed: You are over platform's limits: SQL query timeout error. Refactor your query before running again or contact CARTO support for more details.
14801/45962 Query failed: You are over platform's limits: SQL query timeout error. Refactor your query before running again or contact CARTO support for more details.
8665/45

Unnamed: 0_level_0,gid_2,adm2_area,wdpa_area
cartodb_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
23852,MEX.31.93_1,172460000.0,39173770.0
1887,AUT.6.8_1,3319100000.0,2541045000.0
23854,MEX.31.95_1,121811000.0,2105262.0
34974,RUS.59.7_1,1120530000.0,0.0
34827,RUS.55.31_1,4633480000.0,298345000.0


In [35]:
ADM1[ADM1['wdpa_area'].isna()]

Unnamed: 0_level_0,gid_2,adm2_area,wdpa_area
cartodb_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
27142,NZL.19.1_1,7.93454e+09,
37082,SWE.10.9_1,2.06965e+10,
41015,USA.2.21_1,3.35779e+10,
41017,USA.2.23_1,9.65366e+10,
41255,USA.10.11_1,5.34337e+09,
11759,DNK.4.3_1,9.01817e+08,
14108,ESP.11.1_1,2.1754e+10,
14109,ESP.11.2_1,1.98464e+10,
14649,GBR.1.26_1,6.64214e+09,
14243,EST.9.1_1,1.75463e+09,


In [36]:
# if the bottleneck is intersection, try simplifing the query polygon?

tolerance = .0008333 # wgs84 -> degrees (100m)
for i in ADM1[ADM1['wdpa_area'].isna()].index:
    query = """
    SELECT ST_AREA(ST_UNION(
        ARRAY(
            SELECT
                ST_INTERSECTION(
                    ST_MAKEVALID(ST_SIMPLIFY(a.the_geom, {tolerance})),
                    b.the_geom) AS the_geom
            FROM {} AS a, (
                SELECT ST_MAKEVALID(ST_SIMPLIFY(the_geom, {tolerance})) AS the_geom
                FROM {} WHERE cartodb_id = {}
            ) AS b
            WHERE a.the_geom && b.the_geom
        )
    )::geography, false) as area
    """.format(wdpa_table, gadm_table, i, tolerance=tolerance)
    
    try:
        # No intersection results in null; count as 0
        area = sendSql(query)['rows'][0]['area'] or 0
        ADM1.at[i, 'wdpa_area'] = area
        # just showing progress...
        sys.stdout.write("{}/{}: {} m3 \t\t\t\r".format(i, len(ADM1), area))
        sys.stdout.flush()
    except Exception as e:
        print('{}/{} Query failed: {}'.format(i, len(ADM1), e))

print('\nNum failed: {}'.format(ADM1['wdpa_area'].isna().sum()))
ADM1.to_csv('Data/adm2_wdpa_intersection.csv')
ADM1[ADM1['wdpa_area'].isna()]

37082/45962 Query failed: You are over platform's limits: SQL query timeout error. Refactor your query before running again or contact CARTO support for more details.
1551/45962 Query failed: You are over platform's limits: SQL query timeout error. Refactor your query before running again or contact CARTO support for more details.
27119/45962 Query failed: You are over platform's limits: SQL query timeout error. Refactor your query before running again or contact CARTO support for more details.
9124/45962 Query failed: You are over platform's limits: SQL query timeout error. Refactor your query before running again or contact CARTO support for more details.
9126/45962 Query failed: Error performing intersection: InterruptedException: Interrupted!
14756/45962 Query failed: You are over platform's limits: SQL query timeout error. Refactor your query before running again or contact CARTO support for more details.
14436/45962 Query failed: You are over platform's limits: SQL query timeout 

Unnamed: 0_level_0,gid_2,adm2_area,wdpa_area
cartodb_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
37082,SWE.10.9_1,20696500000.0,
1551,AUS.8.61_1,624276000000.0,
27119,NZL.14.3_1,30030400000.0,
9124,CHL.11.2_1,37491000000.0,
9126,CHL.11.4_1,56753200000.0,
14756,GBR.3.4_1,6998080000.0,
14436,FIN.4.5_1,6790630000.0,
14439,FIN.5.3_1,10809900000.0,
14429,FIN.2.1_1,98328600000.0,
27070,NZL.1.1_1,4952410000.0,


In [37]:
import psycopg2
# setup postgis connecton funcitons
pg_user=os.environ.get('POSTGIS_ENV_POSTGRES_USER')
pg_pass=os.environ.get('POSTGIS_ENV_POSTGRES_PASSWORD')
pg_host=os.environ.get('POSTGIS_PORT_5432_TCP_ADDR')
pg_port=os.environ.get('POSTGIS_PORT_5432_TCP_PORT')

def postgis(query):
    conn = psycopg2.connect(user=pg_user, 
                        password=pg_pass,
                        host=pg_host,
                        port=pg_port
                       )
    cur = conn.cursor()
    cur.execute(query)
    r = list(cur.fetchall())
    cur.close()
    conn.close()
    return r

In [39]:
# if the bottleneck is union (dissolve), try simplifying and downloading the intersected polygons
# union offline

for i in ADM1[ADM1['wdpa_area'].isna()].index:
    #first get the intersected 
    query = """
        SELECT
            ST_INTERSECTION(
                ST_MAKEVALID(ST_SIMPLIFY(a.the_geom, {tolerance})),
                b.the_geom) AS the_geom
        FROM {} AS a, (
            SELECT ST_MAKEVALID(ST_SIMPLIFY(the_geom, {tolerance})) AS the_geom
            FROM {} WHERE cartodb_id = {}
        ) AS b
        WHERE a.the_geom && b.the_geom
    """.format(wdpa_table, gadm_table, i, tolerance=tolerance)
    try:
        rows = sendSql(query)['rows']
        geoms = ','.join(["ST_MAKEVALID('{}'::geometry)".format(r['the_geom']) for r in rows if r['the_geom']])
        if len(geoms):
            query = "SELECT ST_AREA(ST_UNION(ARRAY[{}])::geography, false)".format(geoms)
            area = postgis(query)[0][0]
        else:
            area = 0
        ADM1.at[i, 'wdpa_area'] = area

        # just showing progress...
        sys.stdout.write("{}/{}: {} m3 \t\t\t\r".format(i, len(ADM1), area))
        sys.stdout.flush()

    except Exception as e:
        print('{}/{} Query failed: {}'.format(i, len(ADM1), e))
    
print('\nNum failed: {}'.format(ADM1['wdpa_area'].isna().sum()))
ADM1.to_csv('Data/adm2_wdpa_intersection.csv')
ADM1.head()

9126/45962 Query failed: Error performing intersection: InterruptedException: Interrupted!
8748/45962 Query failed: You are over platform's limits: SQL query timeout error. Refactor your query before running again or contact CARTO support for more details.
11546/45962 Query failed: GEOSUnaryUnion: TopologyException: Input geom 0 is invalid: Self-intersection at or near point 8.0898500850666561 51.280000000000001 at 8.0898500850666561 51.280000000000001

11552/45962 Query failed: GEOSUnaryUnion: TopologyException: Input geom 1 is invalid: Self-intersection at or near point 8.6616159927445917 51.989454060564462 at 8.6616159927445917 51.989454060564462

11475/45962 Query failed: GEOSUnaryUnion: TopologyException: Input geom 0 is invalid: Self-intersection at or near point 12.023961339155374 53.804262233221152 at 12.023961339155374 53.804262233221152


Num failed: 5


TypeError: unsupported operand type(s) for /: 'float' and 'str'

In [44]:
.count()

45960

In [None]:
# if the bottleneck is union (dissolve), try simplifying and downloading the intersected polygons
# union offline

for i in ADM1[ADM1['wdpa_area'].isna()].index:
    #first get the intersected 
    query = """
        SELECT
            ST_INTERSECTION(
                ST_MAKEVALID(ST_SIMPLIFY(a.the_geom, {tolerance})),
                b.the_geom) AS the_geom
        FROM {} AS a, (
            SELECT ST_MAKEVALID(ST_SIMPLIFY(the_geom, {tolerance})) AS the_geom
            FROM {} WHERE cartodb_id = {}
        ) AS b
        WHERE a.the_geom && b.the_geom
    """.format(wdpa_table, gadm_table, i, tolerance=tolerance*4)
    try:
        rows = sendSql(query)['rows']
        geoms = ','.join(["ST_MAKEVALID('{}'::geometry)".format(r['the_geom']) for r in rows if r['the_geom']])
        if len(geoms):
            query = "SELECT ST_AREA(ST_UNION(ARRAY[{}])::geography, false)".format(geoms)
            area = postgis(query)[0][0]
        else:
            area = 0
        ADM1.at[i, 'wdpa_area'] = area

        # just showing progress...
        sys.stdout.write("{}/{}: {} m3 \t\t\t\r".format(i, len(ADM1), area))
        sys.stdout.flush()

    except Exception as e:
        print('{}/{} Query failed: {}'.format(i, len(ADM1), e))
    
print('\nNum failed: {}'.format(ADM1['wdpa_area'].isna().sum()))
ADM1.to_csv('Data/adm2_wdpa_intersection.csv')
ADM1.head()

In [39]:
ADM1.iloc[1452]

adm2_area     5.67532e+10
cartodb_id           9126
gid_2          CHL.11.4_1
iso                   CHL
wdpa_area             NaN
perc_wdpa             NaN
Name: 1452, dtype: object