# Ease of social distancing index v1.0
## Notebook 1: creation of spatial units

Code written by Heather Chamberlain

This notebook contains the code needed to create the spatial units for which the ease of social distancing index is subsequently calculated. 

Once this Notebook has been run to completion, the output should be a set of spatial units with a minimum area of 10,000m^2 (1 hectare). The building footprint datasets from Ecopia/Maxar, used in creating v1.0 of the ease of social distancing index, were produced with output files of building footprints for each UTM zone in a country, the subsequent index calculation was also implemented per UTM zone. Consequently the output of this Notebook is a set of spatial units (polygons) for urban extents in the country specified. If the urban extents within a country span more than one UTM zone, the spatial units will be output in multiple files, with one per UTM zone. These will be the spatial units for which the ease of social distancing index values will be calculated.

In [None]:
import fnmatch
import os
import arcpy
import glob

from arcpy.sa import *
arcpy.CheckOutExtension("Spatial")

from pathlib import Path

### 1. Setup the necessary filepaths and directory structure

Specify country of interest using 3 letter ISO code, and specify path for home directory

In [None]:
iso = "KEN"
folder_iso = "XXX"
#e.g. iso = "SSD"

home_folder = Path("SUBSTITUTE/FOLDER/PATH/HERE")

print('ISO code specified: ' + str(iso))

In [None]:
#dataset version
vers = 'v1_0'

#working gdb name
gdb_name = str(iso) + "_working_v1_0_alpha.gdb"

In [None]:
c_folder = home_folder / folder_iso #path for country folder

ssa_folder = home_folder / "ssa51" #path for folder with data for all 51 countries in Sub-Saharan Africa

data_in_folder = c_folder / "DataIn" #path for input data directory for the country of interest
w_folder = c_folder / "Working" #working directory for the country of interest
output_folder = c_folder / "Output" #output directory for the country of interest

uext_iso_folder = data_in_folder / "AOIs"  #path for sub-directory for urban extent (AOI) file 
adm0_iso_folder = data_in_folder / "Adm0"  #path for sub-directory for national boundary file for the country of interest
utm_iso_folder = data_in_folder / "UTMzone_polys"  #path for sub-directory for UTM zone file

In [None]:
gdb_path = w_folder / gdb_name
if os.path.isdir(str(gdb_path)):
    print("w_gdb already exists")
else:
    print("creating w_gdb")
    arcpy.CreateFileGDB_management(str(w_folder), gdb_name) #create gdb
w_gdb = w_folder / gdb_name

### 2. Prepare the input files needed for creating the spatial units

Load the data needed to create the spatial units:
- Features such as roads, rivers, railways, supplemented by boundaries of various land use features (from OpenStreetMap)
    - It is assumed that OSM features are downloaded in Shapefile format from GeoFabrik 
- Urban extents (AOIs)
- National boundary for the country of interest
- Settlement extent boundaries
- UTM zones

#### a. OSM Features

In [None]:
feature_list = []

In [None]:
otherf_folder = data_in_folder / "Other"

#roads, rivers and waterways from geofabrik
for file in os.listdir(otherf_folder):
    if fnmatch.fnmatch(file, 'gis_osm_roads_free_1.shp'):
        #print(file)
        roads_poly = str(otherf_folder / file)
        feature_list.append(roads_poly)
    elif fnmatch.fnmatch(file, 'gis_osm_waterways_free_1.shp'):
        #print(file)
        rivers_poly = str(otherf_folder / file)
        feature_list.append(rivers_poly)
    elif fnmatch.fnmatch(file, 'gis_osm_railways_free_1.shp'):
        #print(file)
        railways_poly = str(otherf_folder / file)
        feature_list.append(railways_poly)

In [None]:
for file in os.listdir(otherf_folder):
    if fnmatch.fnmatch(file, 'gis_osm_landuse_a*.shp'):
        #print(file)
        landuse_polys = str(otherf_folder / file)
        #feature_list.append(landuse_polys)
    elif fnmatch.fnmatch(file, 'gis_osm_water_a*.shp'):
        #print(file)
        water_polys = str(otherf_folder / file)
        #feature_list.append(water_polys)
    elif fnmatch.fnmatch(file, 'gis_osm_pois_a*.shp'):
        #print(file)
        poi_polys = str(otherf_folder / file)
        #feature_list.append(poi_polys)
        
#select features of interest
#features from landuse file
landuse_fclass = ['industrial', 'cemetery', 'military', 'quarry', 'park']

expression = "fclass = '" + str(landuse_fclass[0]) + "' or fclass = '" + str(landuse_fclass[1]) + \
"' or fclass = '" + str(landuse_fclass[2]) + "' or fclass = '" + str(landuse_fclass[3]) + "' or fclass = '" \
+ str(landuse_fclass[4])  + "'"

selected = arcpy.management.SelectLayerByAttribute(landuse_polys, "NEW_SELECTION", expression, None)
n_selected = int(arcpy.GetCount_management(selected)[0])

if n_selected > 0:
    print("writing landuse_fclass polys to file. In total there are {} polys selected".format(n_selected))

    #write selected polygons to file 
    landuse_fclass_polys = str(w_gdb / "osm_landuse_fclass_polys")
    arcpy.CopyFeatures_management(selected, landuse_fclass_polys, '', None, None, None)
    feature_list.append(landuse_fclass_polys)


#features from water file
water_fclass = ['wetland', 'water', 'reservoir']

expression = "fclass = '" + str(water_fclass[0]) + "' or fclass = '" + str(water_fclass[1]) + \
"' or fclass = '" + str(water_fclass[2]) + "'"

selected = arcpy.management.SelectLayerByAttribute(water_polys, "NEW_SELECTION", expression, None)
n_selected = int(arcpy.GetCount_management(selected)[0])

if n_selected > 0:
    print("writing water_fclass polys to file. In total there are {} polys selected".format(n_selected))

    #write selected polygons to file 
    water_fclass_polys = str(w_gdb / "osm_water_fclass_polys")
    arcpy.CopyFeatures_management(selected, water_fclass_polys, '', None, None, None)
    feature_list.append(water_fclass_polys)

     
#features from pois file
pois_fclass = ['golf_course', 'park', 'university', 'hospital']  

expression = "fclass = '" + str(pois_fclass[0]) + "' or fclass = '" + str(pois_fclass[1]) + \
"' or fclass = '" + str(pois_fclass[2]) + "' or fclass = '" + str(pois_fclass[3]) + "'"

selected = arcpy.management.SelectLayerByAttribute(poi_polys, "NEW_SELECTION", expression, None)
n_selected = int(arcpy.GetCount_management(selected)[0])

if n_selected > 0:
    print("writing poi_fclass polys to file. In total there are {} polys selected".format(n_selected))

    #write selected polygons to file 
    poi_fclass_polys = str(w_gdb / "osm_poi_fclass_polys")
    arcpy.CopyFeatures_management(selected, poi_fclass_polys, '', None, None, None)
    feature_list.append(poi_fclass_polys)


In [None]:
residential_polys = int(1) #change to 0 if residential polys should be excluded from input data polygons

if residential_polys == 1:
    print("residential area polygons from OSM ARE being included...")

    expression = "fclass = 'residential'" 
    selected = arcpy.management.SelectLayerByAttribute(landuse_polys, "NEW_SELECTION", expression, None)
    n_selected = int(arcpy.GetCount_management(selected)[0])

    if n_selected > 0:
        print("writing residential_fclass polys to file. In total there are {} polys selected".format(n_selected))

        #write selected polygons to file 
        residential_fclass_polys = str(w_gdb / "osm_residential_fclass_polys")
        arcpy.CopyFeatures_management(selected, residential_fclass_polys, '', None, None, None)
    
else:
    print("residential area polygons from OSM are NOT being included")

In [None]:
other_osm_list = ['ZMB', 'NGA', 'BWA', 'SEN', 'MDG', 'COD', 'ETH', 'KEN', 'ZAF', 'CIV', 'CMR', 'ERI', 'TZA', 'UGA'] 
#list of iso codes for countries which need to use other_osm2_polys.shp / other_osm2_lines.shp

if iso.upper() in other_osm_list:
    for file in os.listdir(otherf_folder):
        if fnmatch.fnmatch(file, 'other_osm2_polys.shp'):
            print(file)
            other_polys = str(otherf_folder / file)
            feature_list.append(other_polys)
        elif fnmatch.fnmatch(file, 'other_osm2_lines.shp'):
            print(file)
            other_lines = str(otherf_folder / file)
            feature_list.append(other_lines)
else:
    for file in os.listdir(otherf_folder):
        if fnmatch.fnmatch(file, '*_w.shp'):
            print(file)
            otherf_way = str(otherf_folder / file)
            feature_list.append(otherf_way)
            arcpy.management.RepairGeometry(otherf_way, "DELETE_NULL", "OGC")
        elif fnmatch.fnmatch(file, '*_r.shp'):
            print(file)
            otherf_rel = str(otherf_folder / file)
            feature_list.append(otherf_rel)
            arcpy.management.RepairGeometry(otherf_rel, "DELETE_NULL", "OGC")
    

#### b. Urban extents

In [None]:
uext_poly_iso = str(uext_iso_folder / ("uext_polys_" + str(iso) + ".shp"))
print(uext_poly_iso)

#### c. National boundary for the country of interest

In [None]:
adm0_poly = str(adm0_iso_folder / ("adm0_poly_" + str(iso) + ".shp")) #adm0 poly for each country has been pre-processed so just need to specify file path
print(adm0_poly)

#### d. Settlement extent boundaries

In [None]:
#specify path of merged bua and ssa settlement extent polys
sett_ext_folder = home_folder / iso / "DataIn/SettlementExtents"
iso_bua_ssa = str(sett_ext_folder / (str(iso) + "_bua_ssa_extents.shp"))

#### e. UTM zones

In [None]:
utm_folder = home_folder / "UTM"
utm_zones = str(utm_folder / "UTM_zones_all.shp")

### 3. Processing to create initial polygons from input features, constrained to urban extents

Clip uext polys to adm0 polygon

In [None]:
uext_poly = str(uext_iso_folder / ("uext_polys_" + str(iso) + "_adm0.shp"))
arcpy.analysis.Clip(uext_poly_iso, adm0_poly, uext_poly, None)

Extract OSM features (roads, rivers, railways etc) to AOI and create polygon features. Do not include residential areas from OSM

In [None]:
#firstly create version of polygons without OSM residential area polygons
clip_area = str(uext_poly)

clip_features = []

for file in feature_list:
    data_in = file
    #print(data_in)
    data_out = str(w_gdb / Path(file).stem) + "_AOIclip"
    #print(data_out)
    
    if arcpy.Exists(data_out):
        print("file already exists - skipped")
    else:
        print("clipping")
        arcpy.analysis.Clip(data_in, clip_area, data_out, None)
    
    clip_features.append(data_out)

#add the uext poly to list 
clip_features.append(uext_poly)
#print(clip_features)

#Run feature to polygon to create polygon output
in_features = clip_features
area_polys_i1a = str(w_gdb / "area_polys_i1a")
arcpy.management.FeatureToPolygon(in_features, area_polys_i1a, None, "NO_ATTRIBUTES", None)

Now do the same process, but also include the OSM residential area polygons. In this step a second set of polygons are created. This second set of polygons includes a larger number of polygons because of the additional sub-division created by the addition of the OSM residential area polygons. This second set of polygons will only be used in the locations on the urban fringe where the polygons created from the previous codeblock are too large. The following steps will identify these and substitute in the sub-divided polygons.

In [None]:
feature_list.append(residential_fclass_polys)

for file in feature_list:
    data_in = file
    #print(data_in)
    data_out = str(w_gdb / Path(file).stem) + "_AOIclip"
    #print(data_out)
    
    if arcpy.Exists(data_out):
        print("file already exists - skipped")
    else:
        print("clipping")
        arcpy.analysis.Clip(data_in, clip_area, data_out, None)
    
    clip_features.append(data_out)
    
#print(clip_features)

#Run feature to polygon to create polygon output, this time with OSM residential polygons also
in_features = clip_features
area_polys_r = str(w_gdb / "area_polys_r")
arcpy.management.FeatureToPolygon(in_features, area_polys_r, None, "NO_ATTRIBUTES", None)


Calculate area of each polygon and select those which are very large (>100,000m2)

In [None]:
#add blank field for polygon area
area_field = "P_AREA"
arcpy.AddField_management(area_polys_i1a, area_field, "DOUBLE")

#calculate area of each polygon in area_polys_i1 dataset (without residential polys)
calc_field = "P_AREA AREA_GEODESIC"
arcpy.management.CalculateGeometryAttributes(area_polys_i1a, calc_field, '', "SQUARE_METERS", None, "SAME_AS_INPUT")

#select polygons with area >100,000m2
expression = "P_AREA >= 100000"
area_polys_i1a_areasel = arcpy.management.SelectLayerByAttribute(area_polys_i1a, "NEW_SELECTION", expression, None)

Select subset of very large polygons (>100,000m2) that are crossed by outline of BUA or SSA polygons, i.e. those considered to be on the urban fringe and in need of further sub-division if possible.

In [None]:
#from already selected, select the subset which are crossed by the outline of BUA or SSA polygons (sub)
area_polys_i1a_areasel_sub = arcpy.management.SelectLayerByLocation(area_polys_i1a_areasel, "CROSSED_BY_THE_OUTLINE_OF", iso_bua_ssa, None, "SUBSET_SELECTION", "NOT_INVERT")

#export selected (subset x)
out_file_x = str(w_gdb / "area_polys_i1_subset_x")
arcpy.CopyFeatures_management(area_polys_i1a_areasel_sub, out_file_x, '', None, None, None)

Invert the selection so that all other polygons (not needing to be sub-divided) are selected and save these to file

In [None]:
#invert selection 
area_polys_i1a_areasel_sub_inv = arcpy.management.SelectLayerByAttribute(area_polys_i1a_areasel_sub, "SWITCH_SELECTION", '', "NON_INVERT")

#export inverted selection (subset y)
out_file_y = str(w_gdb / "area_polys_i1_subset_y")
arcpy.CopyFeatures_management(area_polys_i1a_areasel_sub_inv, out_file_y, '', None, None, None)

Select the subset of polygons (created with the addition of OSM residential areas) that are located on the urban fringe that need to be substituted in for very large polygons on the urban fringe. Save these to file, and then merge in to create a new complete set of polygons.

In [None]:
#from area_polys_r (polys including residential area boundaries), select those that are located within subset x
area_polys_r_withinx_sub = arcpy.management.SelectLayerByLocation(area_polys_r, "WITHIN", out_file_x, None, "NEW_SELECTION", "NOT_INVERT")

#export selected (subset z)
out_file_z = str(w_gdb / "area_polys_i1_subset_z")
arcpy.CopyFeatures_management(area_polys_r_withinx_sub, out_file_z, '', None, None, None)

#merge subset y (out_file_y) and subset z (out_file_z)
merge_list = [out_file_y, out_file_z]
area_polys_i1 = str(w_gdb / "area_polys_i1")
arcpy.management.Merge(merge_list, area_polys_i1)

Clean up attribute table to remove unnecessary fields

In [None]:
field_list_del = []

#list all fields. Except for specified fields, add field name to list of fields to be deleted
fields = arcpy.ListFields(area_polys_i1)
for field in fields:
    if field.name == "OBJECTID":
        print("{} is not deletable". format(field.name))
    elif field.name == "Shape":
        print("{} is not deletable". format(field.name))
    elif field.name == "Shape_Area":
        print("{} is not deletable". format(field.name))
    elif field.name == "Shape_Length":
        print("{} is not deletable". format(field.name))
    else:
        field_list_del.append(field.name) #append field to list

print("There are {} fields to be deleted".format(len(field_list_del)))
if len(field_list_del) > 0:
    arcpy.DeleteField_management(area_polys_i1, field_list_del) #delete fields

fields = arcpy.ListFields(area_polys_i1)
print("Remaining fields:")
for field in fields:
    print("{0} is a type of {1} with a length of {2}".format(field.name, field.type, field.length))
    

Add fields with urban extent ID (uext_id) and country ISO code (adm0_ISO3)

In [None]:
#spatial join to add uext_id and adm0_ISO3 fields
area_polys_i = str(w_gdb / "area_polys_initial")
arcpy.SpatialJoin_analysis(area_polys_i1, uext_poly, area_polys_i)

#remove unnecessary fields
field_list_del2 = ["TARGET_FID", "Join_Count"]
arcpy.DeleteField_management(area_polys_i, field_list_del2)

### 4. Project the initial polygons to the approriate UTM zone based on their location

Create list of UTM zones which contain polygons

In [None]:
utmzones_folder = c_folder / "UTMzones"
utm_zones_lyr = str(utmzones_folder / "utm_zones_lyr")

#######
# #select UTM zone polygons within which there are polygons in the area_polys_initial dataset
utm_zones_lyr = str(w_gdb / "utm_zones_lyr")
arcpy.MakeFeatureLayer_management(utm_zones, utm_zones_lyr) 
arcpy.SelectLayerByLocation_management(utm_zones_lyr, 'CONTAINS', area_polys_i)
#######

s_count = int(arcpy.GetCount_management(utm_zones_lyr)[0]) 
print(s_count)

#add the UTM zone ID for those zones to a list
utm_zones_list = []
with arcpy.da.SearchCursor(utm_zones_lyr, 'utm_zone') as cursor:
    for row in cursor:
        utm_zones_list.append(row[0])        
print(utm_zones_list)

Subset the polygons into UTM zones, by selecting all polygons within the boundary of UTM zones, and export polygons into subsets for each UTM zone.

In [None]:
print("for " + str(iso) + " split data into " + str(s_count) + " UTM zones: " + str(utm_zones_list))

#for each UTM zone number in the list, subset all polygons within boundary of UTM zone
area_polys_i_lyr = str(area_polys_i + "_lyr")
arcpy.MakeFeatureLayer_management(area_polys_i, area_polys_i_lyr) 

for zone in utm_zones_list:
    query = "UTMzone_" + str(zone) + ".shp"
    for file in os.listdir(utm_iso_folder): #change to utm_folder if not pre-processed utm zones for each country
        if fnmatch.fnmatch(file, query):
            utm_lyr = str(w_gdb / str(Path(file).stem + "_lyr"))
            in_file = str(utm_folder / file)
            arcpy.MakeFeatureLayer_management(in_file, utm_lyr) 
            
            #select area polygons with center in UTM zone
            arcpy.SelectLayerByLocation_management(area_polys_i_lyr, 'HAVE_THEIR_CENTER_IN', utm_lyr)
            n_polys = int(arcpy.GetCount_management(area_polys_i_lyr)[0]) 
            
            #write selected polygons to file 
            area_polys_i_zones_wgs84 = str(w_gdb / ("area_polys_i_" + str(zone) + "_WGS84"))
            arcpy.CopyFeatures_management(area_polys_i_lyr, area_polys_i_zones_wgs84, '', None, None, None)
            
            print("There are {} polygons with their centre within {}".format(n_polys, file))

For the subset of polygons within each UTM zone boundary, project the polygons to that UTM zone projection

In [None]:
for zone in utm_zones_list:
    utm_id = 'WGS 1984 UTM Zone ' + str(zone)
    print(utm_id)
    
    # Set output coordinate system
    outCS = arcpy.SpatialReference(utm_id)
    
    # run project tool
    area_polys_i_zones_wgs84 = str(w_gdb / ("area_polys_i_" + str(zone) + "_WGS84"))
    file_out = str(w_gdb / ("area_polys_i_zones_utm" + str(zone)))
    arcpy.Project_management(area_polys_i_zones_wgs84, file_out, outCS)
    
    #calculate area for each polygon
    arcpy.management.AddGeometryAttributes(file_out, "AREA", "METERS", "SQUARE_METERS", outCS)

### 5. Processing  of the initial projected polygons to create spatial units

First, identify any island polygons (those with zero neighbours)

In [None]:
#for each polygon, identify all neighbouring polygons
for zone in utm_zones_list:
    in_features = str(w_gdb / ("area_polys_i_zones_utm" + str(zone)))
    out_table = str(in_features) + "_nneighbours"
    arcpy.PolygonNeighbors_analysis(in_features, out_table)
    
    #summarise outputs so 1 row per polygon
    out_table_smry = str(out_table) + "_summary"
    arcpy.Statistics_analysis(out_table, out_table_smry, [["src_OBJECTID", "COUNT"]], "src_OBJECTID")  

Check the area of any island polygons identified. If the area is less than specified area threshold then these polygons will need to be excluded from the subsequent dissolve step.

In [None]:
#join the neighbour output to the polygons - if no joined features, then no neighbour

islands_to_merge = []
temp_island_list = []

for zone in utm_zones_list:
    
    in_features = str(w_gdb / ("area_polys_i_zones_utm" + str(zone)))
    out_table_smry = str(in_features) + "_nneighbours_summary"
    
    #add blank field for number of neighbours (to be calculated after join)
    n_neighbours_field = "n_NEIGHBOURS"
    arcpy.AddField_management(in_features, n_neighbours_field, "DOUBLE")
    
    #join number of neighbours - if after join values are null then no neighbours
    in_field = "OBJECTID"
    join_table = out_table_smry
    join_field = "src_OBJECTID"
    temp = arcpy.AddJoin_management(in_features, in_field, join_table, join_field)
    
    #update n_neighbours_field with number of neighbours
    calc_expression = "!" + str(Path(join_table).stem) + ".FREQUENCY!"
    arcpy.management.CalculateField(temp, n_neighbours_field, calc_expression, "PYTHON3", '', "DOUBLE")
    
    #remove join
    arcpy.RemoveJoin_management(temp)
    
    #write to file
    polys_n_neighbours = str(w_gdb / (str(Path(in_features).stem) + "_nn"))
    arcpy.CopyFeatures_management(temp, polys_n_neighbours, '', None, None, None)
    print("written to file")
    
    #select any polygons with no neighbours - islands
    where = str(n_neighbours_field) + " IS NULL"
    selected = arcpy.management.SelectLayerByAttribute(polys_n_neighbours, "NEW_SELECTION", where, None)
    
    #select only islands with area greater than 10,000m^2, leaving only islands with area <10,000m^2 
    #before subsequent eliminate step
    where = "POLY_AREA >= 10000"
    selected1 = arcpy.management.SelectLayerByAttribute(selected, "REMOVE_FROM_SELECTION", where, None)
    
    n_selected = int(arcpy.GetCount_management(selected1)[0]) #number of islands with area less than 10,000m^2
    print('n_selected: {}'.format(n_selected))
    
    if n_selected > 0:
        
        #from existing selection of "islands", now select any which have neighbours in a polygon file from a neighbouring utm zone
        islands_check = arcpy.management.SelectLayerByLocation(selected1, "SHARE_A_LINE_SEGMENT_WITH", area_polys_i, None, "SUBSET_SELECTION", "NOT_INVERT")
        
        n_sub_sel = int(arcpy.GetCount_management(islands_check)[0]) #number of islands which do actually have a shared line segment
        print('n_sub_sel: {}'.format(n_sub_sel))
        
        n_sel = int(n_selected) - int(n_sub_sel)
        print('n_sel: {}'.format(n_sel))
        
        #and write out selected to file
        temp_i = str(w_gdb / ("temp_i" + str(zone)))
        arcpy.CopyFeatures_management(islands_check, temp_i, '', None, None, None)
        
        temp_island_list.append(temp_i)
        print('temp_island_list: {}'.format(temp_island_list))
        

        #if n_sel > 0:
        print("there are islands with area less than 10,000m2")

        #write selected polygons (tiny islands) to file 
        tiny_islands = str(w_gdb / ("tiny_islands_" + str(zone)))
        arcpy.CopyFeatures_management(selected1, tiny_islands, '', None, None, None)

        islands_to_merge.append(tiny_islands)

        #switch selection to just the remaining polygons (all except islands) and write to file
        selected_r = arcpy.SelectLayerByAttribute_management(selected1, "SWITCH_SELECTION")
        polys_r = str(w_gdb / ("area_polys_i_zones_utm_nislands" + str(zone)))
        arcpy.CopyFeatures_management(selected_r, polys_r, '', None, None, None)

    else:
        #write out file for all if no islands
        #print('else2 - write out file.............')
        #switch selection to just the remaining polygons (all except islands) and write to file
        polys_r = str(w_gdb / ("area_polys_i_zones_utm_nislands" + str(zone)))
        arcpy.CopyFeatures_management(polys_n_neighbours, polys_r, '', None, None, None)
    
    print('')
    
print('islands_to_merge: {}'.format(islands_to_merge))
              
if len(islands_to_merge) == 0:
    print('there are no tiny islands with area less than 10,000m2')
else:
    print('there are tiny islands with area less than 10,000m2, these have been written out to file(s):')
    print(islands_to_merge)

print('temp_island_list: {}'.format(temp_island_list))   

In [None]:
#If there are islands created by polygons spanning UTM zones...
a_list = []

if len(temp_island_list) > 0:
    for zone in utm_zones_list: #iterate through polys associated with each UTM zone
        print(zone)
        polygons = str(w_gdb / ("area_polys_i_zones_utm_nislands" + str(zone)))
        
        for islands in temp_island_list:
            print(islands)
            
            #check if any of the islands share a line segment with any of the other polygons (in each UTM zone)
            islands_check = arcpy.management.SelectLayerByLocation(islands, "SHARE_A_LINE_SEGMENT_WITH", polygons, None, "NEW_SELECTION", "NOT_INVERT")
            
            ni_sel = int(arcpy.GetCount_management(islands_check)[0]) #number of island polygons neighbouring with polygons in particular UTM zone
            print('ni_sel: {}'.format(ni_sel))
            
            if ni_sel > 0:
                print('islands file: {}, has neighbours in file {}'.format(islands, polygons))
                
                #write out selected island polygons to file (A)
                islands_A = str(w_gdb / ("tempA_" + str(Path(islands).stem) + "_" + str(zone)))
                arcpy.CopyFeatures_management(islands_check, islands_A, '', None, None, None)
                
                #invert selection and write any remaining islands to file (B)
                islands_B = str(w_gdb / ("tempB_" + str(Path(islands).stem) + "_" + str(zone)))
                islands_check_z = arcpy.management.SelectLayerByLocation(islands_check, "INTERSECT", None, None, "SWITCH_SELECTION", "NOT_INVERT")
                arcpy.CopyFeatures_management(islands_check_z, islands_B, '', None, None, None)
                
                #append selected island polygons (A) into neighbouring area polygons dataset
                arcpy.management.Append(islands_A, polygons)
                
                a_list.append(islands_A)
                
            else:
                print('no neighbours for islands file: {} with file {}'.format(islands, polygons))  
            
            print('')

Select any unit polygons that are smaller than the specified threshold (10,000m^2), and combine these with neighbouring polys. Repeat this process until no polygons with area greater than the threshold remain. The output of this step should be a finalised polygon dataset (for each UTM zone).

In [None]:
spatial_unit_file_list = []

for zone in utm_zones_list:
    
    polys_in = str(w_gdb / ("area_polys_i_zones_utm_nislands" + str(zone)))

    print("polys_in: {}".format(polys_in))

    #select polygons with area lt 10,000m2
    where = "POLY_AREA < 10000"
    selected = arcpy.management.SelectLayerByAttribute(polys_in, "NEW_SELECTION", where, None)

    #get count of number of polys selected
    poly_count = int(arcpy.GetCount_management(selected)[0]) 
    #print(poly_count)
    print("Initially, in this file there are {} polygons with area less than 10000m^2".format(poly_count))

    #create new list with poly_count as first value
    poly_count_list = [poly_count]
    
    #####
    if poly_count == 0:
        polys_out = polys_in
    #####
    
    i = 0
    while poly_count > 0:
        i = i+1
        print('i = ' + str(i))
        
        #run eliminate
        polys_out = str(polys_in) + "_elim" + str(i)
        arcpy.management.Eliminate(selected, polys_out, "LENGTH", '', None)
    
        # Set output coordinate system
        utm_id = 'WGS 1984 UTM Zone ' + str(zone)
        outCS = arcpy.SpatialReference(utm_id)
        
        #recalc area
        calc_field = "POLY_AREA AREA"
        arcpy.management.CalculateGeometryAttributes(polys_out, calc_field, '', "SQUARE_METERS", outCS, "SAME_AS_INPUT")
        
        #select polygons with area lt 10,000m2
        selected = arcpy.management.SelectLayerByAttribute(polys_out, "NEW_SELECTION", "POLY_AREA < 10000", None) #check area field name
    
        #get count of number of polys selected
        poly_count = int(arcpy.GetCount_management(selected)[0]) 
        print("After iteration {}, there are {} remaining polygons with area less than 10000m^2".format(i, poly_count))
        
        #in case of remaining island polygons that can't be dissolved
        poly_count_list.append(poly_count)
        
        if (poly_count_list[i]-poly_count_list[i-1]) == 0:
            print("poly_count = {}. There are {} remaining with area less than 10,000m^2, however \
            there are no further options for eliminating these polygons by merging with neighbours.".format(poly_count, poly_count))
            
            #set poly_count to be 0 i.e. stop eliminate loop
            poly_count = 0
            
            #write out remaining polys (islands) to file
            tiny_islands2 = str(w_gdb / ("tiny_islands2_" + str(zone)))
            arcpy.CopyFeatures_management(selected, tiny_islands2, '', None, None, None)
        
            islands_to_merge.append(tiny_islands2)
            
            print("loop exited)")
             
    #get count of number of polys selected
    print("After iteration {}, there are {} remaining polygons with area less than 10000m^2".format(i, poly_count))
    print("islands to merge list: {}".format(islands_to_merge))
    
    
    #write out final output from eliminate iterations
    selected_z = arcpy.management.SelectLayerByAttribute(polys_out, "NEW_SELECTION", "POLY_AREA >= 10000", None) #check area field name
    area_polys_elimf = str(w_gdb / ("area_polys_elimf_UTM" + str(zone)))
    arcpy.CopyFeatures_management(selected_z, area_polys_elimf, '', None, None, None)
    
    spatial_unit_file_list.append(area_polys_elimf)
    print()
    
print("Spatial unit files: {}".format(spatial_unit_file_list))

Once this Notebook has been run to completion, the output should be a set of spatial units with a minimum area of 10,000m^2 (1 hectare). The building footprint datasets from Ecopia/Maxar, used in creating v1.0 of the ease of social distancing index, were produced with output files of building footprints for each UTM zone in a country, the subsequent index calculation was also implemented per UTM zone. Consequently the output of this Notebook is a set of spatial units (polygons) for urban extents in the country specified. If the urban extents within a country span more than one UTM zone, the spatial units will be output in multiple files, with one per UTM zone. These will be the spatial units for which the ease of social distancing index values will be calculated.