In [0]:
!pip install pyshp

Collecting pyshp
[?25l  Downloading https://files.pythonhosted.org/packages/27/16/3bf15aa864fb77845fab8007eda22c2bd67bd6c1fd13496df452c8c43621/pyshp-2.1.0.tar.gz (215kB)
[K     |█▌                              | 10kB 14.4MB/s eta 0:00:01[K     |███                             | 20kB 1.7MB/s eta 0:00:01[K     |████▋                           | 30kB 2.5MB/s eta 0:00:01[K     |██████                          | 40kB 3.1MB/s eta 0:00:01[K     |███████▋                        | 51kB 2.1MB/s eta 0:00:01[K     |█████████▏                      | 61kB 2.5MB/s eta 0:00:01[K     |██████████▋                     | 71kB 2.9MB/s eta 0:00:01[K     |████████████▏                   | 81kB 3.2MB/s eta 0:00:01[K     |█████████████▊                  | 92kB 3.6MB/s eta 0:00:01[K     |███████████████▏                | 102kB 2.8MB/s eta 0:00:01[K     |████████████████▊               | 112kB 2.8MB/s eta 0:00:01[K     |██████████████████▎             | 122kB 2.8MB/s eta 0:00:01[K     

download the shapefiles from https://biogeo.ucdavis.edu/data/gadm3.6/gadm36_levels_shp.zip
and place them in the directory gadm36_levels_shp

In [0]:
import psycopg2
import numpy as np
import pandas as pd
import shapefile
from json import dumps

# Connect to covid19db.org
conn = psycopg2.connect(
    host='covid19db.org',
    port=5432,
    dbname='covid19db_adm_play', #ADD THE INFO
    user='covid19db_adm_rw',
    password='fGt962FdeG2yXj3c4d3'
)



# Helper Functions

In [0]:
def flattenNestedList(nestedList):
    ''' Converts a nested list to a flat list '''
    flatList = []
    # Iterate over all the elements in given list
    for elem in nestedList:
        # Check if type of element is list
        if isinstance(elem, list) or isinstance(elem, tuple):
            # Extend the flat list by adding contents of this element (list)
            flatList.extend(flattenNestedList(list(elem)))
        else:
            # Append the elemengt to the list
            flatList.append(elem)    
 
    return flatList

# LOAD THE ISO_3611 TABLE

In [0]:
ISO_codes = pd.read_csv("data/ISO_3611.csv", index_col="alpha-3")

# LAYER PARSERS

In [0]:
def import_feature_layer_0(cur,PATH):
    
    INSERT_STATEMENT = 'INSERT INTO public.administrative_division \
                            (country, countrycode, countrycode_alpha2, \
                            adm_level, \
                            adm_area_1, adm_area_1_code, \
                            gid, \
                            latitude, longitude, \
                            geometry, properties) \
                            VALUES (%s, %s, %s, \
                                    %s, \
                                    %s, \
                                    %s, %s, \
                                    ST_SetSRID(ST_GeomFromGeoJSON(%s), 4326), %s);'
    
    reader = shapefile.Reader(PATH)
    fields = reader.fields[1:]
    field_names = [field[0] for field in fields]

    for sr in reader.shapeRecords():
        atr = dict(zip(field_names, sr.record))
        geom = sr.shape.__geo_interface__

        # flatten the geometry to get the coordinates of its centroid
        centroid = np.reshape(flattenNestedList(geom["coordinates"]), (-1, 2)).mean(axis = 0)
        latitude = centroid[1]
        longitude = centroid[0]

        # convert the country ISO alpha-3 found in GID_0 to alpha-2
        country_ISO2 = ISO["alpha-2"].get(atr['GID_0'], "NaN")

        # we use the last non null GID_i as identifier
        GID = atr['GID_0']

        cur.execute(INSERT_STATEMENT, (atr['NAME_0'], atr['GID_0'], country_ISO2,
                                       0, 
                                       GID,
                                       latitude, longitude,
                                       json.dumps(geom), json.dumps(atr)))

In [0]:
def import_feature_layer_1(cur,PATH):
    
    INSERT_STATEMENT = 'INSERT INTO public.administrative_division \
                            (country, countrycode, countrycode_alpha2, \
                            adm_level, \
                            adm_area_1, adm_area_1_code, \
                            gid, \
                            latitude, longitude, \
                            geometry, properties) \
                            VALUES (%s, %s, %s, \
                                    %s, \
                                    %s, %s, \
                                    %s, \
                                    %s, %s, \
                                    ST_SetSRID(ST_GeomFromGeoJSON(%s), 4326), %s);'
    
    reader = shapefile.Reader(PATH)
    fields = reader.fields[1:]
    field_names = [field[0] for field in fields]

    for sr in reader.shapeRecords():
        atr = dict(zip(field_names, sr.record))
        geom = sr.shape.__geo_interface__

        # flatten the geometry to get the coordinates of its centroid
        centroid = np.reshape(flattenNestedList(geom["coordinates"]), (-1, 2)).mean(axis = 0)
        latitude = centroid[1]
        longitude = centroid[0]

        # convert the country ISO alpha-3 found in GID_0 to alpha-2
        country_ISO2 = ISO["alpha-2"].get(atr['GID_0'], "NaN")

        # we use the last non null GID_i as identifier
        GID = atr['GID_1']

        cur.execute(INSERT_STATEMENT, (atr['NAME_0'], atr['GID_0'], country_ISO2,
                                       1, 
                                       atr['NAME_1'], atr['GID_1'],
                                       GID,
                                       latitude, longitude,
                                       json.dumps(geom), json.dumps(atr)))

In [0]:
def import_feature_layer_2(cur,PATH):
    
    INSERT_STATEMENT = 'INSERT INTO public.administrative_division \
                            (country, countrycode, countrycode_alpha2, \
                            adm_level, \
                            adm_area_1, adm_area_1_code, \
                            adm_area_2, adm_area_2_code, \
                            gid, \
                            latitude, longitude, \
                            geometry, properties) \
                            VALUES (%s, %s, %s, \
                                    %s, \
                                    %s, %s, \
                                    %s, %s, \
                                    %s, \
                                    %s, %s, \
                                    ST_SetSRID(ST_GeomFromGeoJSON(%s), 4326), %s);'
    
    reader = shapefile.Reader(PATH)
    fields = reader.fields[1:]
    field_names = [field[0] for field in fields]

    for sr in reader.shapeRecords():
        atr = dict(zip(field_names, sr.record))
        geom = sr.shape.__geo_interface__

        # flatten the geometry to get the coordinates of its centroid
        centroid = np.reshape(flattenNestedList(geom["coordinates"]), (-1, 2)).mean(axis = 0)
        latitude = centroid[1]
        longitude = centroid[0]

        # convert the country ISO alpha-3 found in GID_0 to alpha-2
        country_ISO2 = ISO["alpha-2"].get(atr['GID_0'], "NaN")

        # we use the last non null GID_i as identifier
        GID = atr['GID_2']

        cur.execute(INSERT_STATEMENT, (atr['NAME_0'], atr['GID_0'], country_ISO2,
                                       2, 
                                       atr['NAME_1'], atr['GID_1'],
                                       atr['NAME_2'], atr['GID_2'],
                                       GID,
                                       latitude, longitude,
                                       json.dumps(geom), json.dumps(atr)))

In [0]:
def import_feature_layer_3(cur,PATH):
    
    INSERT_STATEMENT = 'INSERT INTO public.administrative_division \
                            (country, countrycode, countrycode_alpha2, \
                            adm_level, \
                            adm_area_1, adm_area_1_code, \
                            adm_area_2, adm_area_2_code, \
                            adm_area_3, adm_area_3_code, \
                            gid, \
                            latitude, longitude, \
                            geometry, properties) \
                            VALUES (%s, %s, %s, \
                                    %s, \
                                    %s, %s, \
                                    %s, %s, \
                                    %s, %s, \
                                    %s, \
                                    %s, %s, \
                                    ST_SetSRID(ST_GeomFromGeoJSON(%s), 4326), %s);'
    
    reader = shapefile.Reader(PATH)
    fields = reader.fields[1:]
    field_names = [field[0] for field in fields]

    for sr in reader.shapeRecords():
        atr = dict(zip(field_names, sr.record))
        geom = sr.shape.__geo_interface__

        # flatten the geometry to get the coordinates of its centroid
        centroid = np.reshape(flattenNestedList(geom["coordinates"]), (-1, 2)).mean(axis = 0)
        latitude = centroid[1]
        longitude = centroid[0]

        # convert the country ISO alpha-3 found in GID_0 to alpha-2
        country_ISO2 = ISO["alpha-2"].get(atr['GID_0'], "NaN")

        # we use the last non null GID_i as identifier
        GID = atr['GID_3']

        cur.execute(INSERT_STATEMENT, (atr['NAME_0'], atr['GID_0'], country_ISO2,
                                       3, 
                                       atr['NAME_1'], atr['GID_1'],
                                       atr['NAME_2'], atr['GID_2'],
                                       atr['NAME_3'], atr['GID_3'],
                                       GID,
                                       latitude, longitude,
                                       json.dumps(geom), json.dumps(atr)))

# UPLOAD LAYERS

In [0]:
# UPLOAD LAYER 0

PATH_0 = "gadm36_levels_shp/gadm36_0"

with conn:
    with conn.cursor() as cur:
        import_feature_layer_0(cur, PATH_0)
    conn.commit()

In [0]:
# UPLOAD LAYER 1

PATH_0 = "gadm36_levels_shp/gadm36_1"

with conn:
    with conn.cursor() as cur:
        import_feature_layer_2(cur, PATH_1)
    conn.commit()

In [0]:
# UPLOAD LAYER 2

PATH_2 = "gadm36_levels_shp/gadm36_2"

with conn:
    with conn.cursor() as cur:
        import_feature_layer_2(cur, PATH_2)
    conn.commit()

In [20]:
# there seems to be some missing data on NAME_2 at level 2
# lets check 
reader = shapefile.Reader(PATH_2)
fields = reader.fields[1:]
field_names = [field[0] for field in fields]

for sr in reader.shapeRecords():
    atr = dict(zip(field_names, sr.record))
    geom = sr.shape.__geo_interface__

    if atr['NAME_2'] == "":
        print(atr['NAME_0'], atr['GID_0'], atr['NAME_1'], atr['GID_1'],
              atr['NAME_2'], atr['GID_2'])

ShapefileException: ignored

In [0]:
# UPLOAD LAYER 3

PATH_3 = "gadm36_levels_shp/gadm36_3"

with conn:
    with conn.cursor() as cur:
        import_feature_layer_3(cur, PATH_3)
    conn.commit()

In [0]:
# sql = """DELETE FROM public.administrative_division """
# with conn:
#     with conn.cursor() as cur:
#         cur.execute(sql)
#     conn.commit()