# Setup libraries and working data
Load libraries and set up working directories.

In [19]:
import os
import os.path
import glob
import numpy as np
import pandas as pd
import geopandas as gpd
from math import ceil
from random import random
import json

from shapely.geometry import Point
from shapely.geometry import shape
import matplotlib.pyplot as plt
import contextily as ctx
from tqdm.notebook import tqdm
tqdm.pandas()

import rasterio
from rasterio.plot import show
from rasterio.plot import show_hist
from rasterio.mask import mask
from rasterio.features import shapes
from rasterio.features import dataset_features

from pyproj import CRS

%matplotlib inline

DROOT = './data/'
os.makedirs(os.path.join(DROOT, '3-interim', 'populationmasks'), exist_ok=True)


  from pandas import Panel


# General methods
Define methods to be used in the procedures below.

In [20]:
# Convert single GeoDataFrame Polygon to GeoJSON
def get_mask_coords(gdf): 
    """Get the first polygon in a GeoDataFrame as GeoJSON."""
    return [json.loads(gdf.to_json())['features'][0]['geometry']]

In [21]:
def mask_popcenter_from_raster(gdf_entry, raster, tiff_out):
    
    out_img, out_transform = mask(
        dataset=raster, 
        shapes=get_mask_coords(gdf_entry), 
        crop=True)

    # Write out for usage a bit further onwards due to limitations in rasterio.
    out_meta = pop.meta.copy()
    out_meta.update({
        "driver": "GTiff",
         "height": out_img.shape[1],
         "width": out_img.shape[2],
         "transform": out_transform,
         "crs": pop.crs
    })

    with rasterio.open(tiff_out, "w", **out_meta) as dest:
        dest.write(out_img)

### Search city details quickly:

In [13]:
name = input('Name: ')
result = urbancenter_gdf[urbancenter_gdf.UC_NM_MN.str.contains(name)]
result[['CTR_MN_NM', 'CTR_MN_ISO', 'UC_NM_MN', 'UC_NM_LST']]


Name: Rome


Unnamed: 0,CTR_MN_NM,CTR_MN_ISO,UC_NM_MN,UC_NM_LST
2896,Italy,ITA,Rome,Rome


# Procedures
Load two files and start masking the listed cities.

In [22]:
# Get population data for the whole world.
pop = rasterio.open(os.path.join(
    DROOT,
    '2-external',
    'GHS_POP_E2015_GLOBE_R2019A_54009_250_V1_0',
    'GHS_POP_E2015_GLOBE_R2019A_54009_250_V1_0.tif'))
pop.shape, pop.bounds, pop.crs.to_proj4()


((72000, 144328),
 BoundingBox(left=-18041000.0, bottom=-9000000.0, right=18041000.0, top=9000000.0),
 '+proj=moll +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs=True')

In [23]:
# Open Urban Centers file, it's pretty big.
urbancenter_gdf = gpd.read_file( os.path.join(
    DROOT, 
    '2-external',
    'GHS_STAT_UCDB2015MT_GLOBE_R2019A',
    'GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2.gpkg')).to_crs(pop.crs)
urbancenter_gdf.head(1)


Unnamed: 0,ID_HDC_G0,QA2_1V,AREA,BBX_LATMN,BBX_LONMN,BBX_LATMX,BBX_LONMX,GCPNT_LAT,GCPNT_LON,CTR_MN_NM,...,EX_SS_P00,EX_SS_P15,EX_EQ19PGA,EX_EQ19MMI,EX_EQ19_Q,EX_HW_IDX,SDG_LUE9015,SDG_A2G14,SDG_OS15MX,geometry
0,1.0,1.0,185.0,21.247683,-158.043016,21.422193,-157.730529,21.340678,-157.893497,United States,...,397443.031445,444041.529529,,,missing,,0.074385,0.226415,56.41,"MULTIPOLYGON (((-15151000.000 2625000.000, -15..."


In [44]:
city_list_df = pd.read_csv(os.path.join(DROOT, '1-raw', 'citylist.csv'))
city_list_itr = tqdm(city_list_df.itertuples(), leave=False)

os.makedirs(os.path.join(DROOT, '3-interim', 'populationmasks'), exist_ok=True)

# Get masked population dataframes for all mentioned cities.
for city in city_list_itr:
    
    city_list_itr.set_description("Current city: {}".format(city.City))

    tiff_path = os.path.join(DROOT, '3-interim', 'populationmasks', 
                             city.City + '.tiff')
    pcl_path  = os.path.join(DROOT, '3-interim', 'populationmasks', 
                             city.City + '.pcl')
    
    # If city already done, skip this.
    if(os.path.exists(tiff_path) and False
       and os.path.exists(pcl_path)):
        continue;
    
    # Write out a masked selection with city population.
    mask_popcenter_from_raster(
        gdf_entry=urbancenter_gdf[
            (urbancenter_gdf.UC_NM_MN == city.City) &
            (urbancenter_gdf.XC_ISO_LST == city.Country)
        ],
        raster=pop,
        tiff_out=tiff_path
    )
    
    # Convert the masked tiff to geojson for GeoPandas to use.
    # This is doing the heavy lifting!
    with rasterio.open(tiff_path) as raster:
        image = raster.read(1)
        
        # Add tiny random deviation (<1) so units don't join together
        #   in the next shapes() method. 
        for row in range(len(image)):
            for col in range(len(image[row])):
                image[row,col] -= random()
        
        crs = raster.crs
        list_pop = [
            {'cell_pop': value, 'geometry': shape(shp)}
            for i, (shp, value) 
            in enumerate(shapes(image, transform=raster.transform))
            if value > raster.nodata
        ]
    
    # Read in as a GeoPandas dataset and write out. 
    gdf_pop = gpd.GeoDataFrame(list_pop, crs=crs)
    gdf_pop.cell_pop = np.maximum(gdf_pop.cell_pop, 0)
    gdf_pop.to_pickle(pcl_path)


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

In [45]:
gdf_pop.to_crs(epsg=4326).centroid[200:240]


  """Entry point for launching an IPython kernel.


200    POINT (-74.07374 4.78903)
201    POINT (-74.07124 4.78903)
202    POINT (-74.06874 4.78903)
203    POINT (-74.06625 4.78903)
204    POINT (-74.06375 4.78903)
205    POINT (-74.06125 4.78903)
206    POINT (-74.05875 4.78903)
207    POINT (-74.05625 4.78903)
208    POINT (-74.05375 4.78903)
209    POINT (-74.05125 4.78903)
210    POINT (-74.04875 4.78903)
211    POINT (-74.04625 4.78903)
212    POINT (-74.04375 4.78903)
213    POINT (-74.04125 4.78903)
214    POINT (-74.03875 4.78903)
215    POINT (-74.03625 4.78903)
216    POINT (-74.09361 4.78701)
217    POINT (-74.09111 4.78701)
218    POINT (-74.08861 4.78701)
219    POINT (-74.08611 4.78701)
220    POINT (-74.08361 4.78701)
221    POINT (-74.08111 4.78701)
222    POINT (-74.07861 4.78701)
223    POINT (-74.07611 4.78701)
224    POINT (-74.07361 4.78701)
225    POINT (-74.07111 4.78701)
226    POINT (-74.06861 4.78701)
227    POINT (-74.06611 4.78701)
228    POINT (-74.06361 4.78701)
229    POINT (-74.06111 4.78701)
230    POI

In [46]:
gdf_pop.to_crs(epsg=4326).centroid[200::400]


  """Entry point for launching an IPython kernel.


200     POINT (-74.07374 4.78903)
600     POINT (-74.12185 4.76069)
1000    POINT (-74.13078 4.74449)
1400    POINT (-74.21996 4.73234)
1800    POINT (-74.10944 4.72425)
2200    POINT (-74.02892 4.71615)
2600    POINT (-74.15823 4.70602)
3000    POINT (-74.22756 4.69590)
3400    POINT (-74.11691 4.68578)
3800    POINT (-74.12598 4.67161)
4200    POINT (-74.17492 4.65541)
4600    POINT (-74.18400 4.64124)
5000    POINT (-74.15322 4.62909)
5400    POINT (-74.13244 4.61695)
5800    POINT (-74.12166 4.60480)
6200    POINT (-74.10088 4.59265)
6600    POINT (-74.17009 4.58051)
7000    POINT (-74.06947 4.57039)
7400    POINT (-74.20866 4.55824)
7800    POINT (-74.11791 4.54609)
8200    POINT (-74.14623 4.51978)
8600    POINT (-74.10358 4.47727)
dtype: geometry