In [33]:
import psycopg2, psycopg2.extras, psycopg2.pool, pickle, json, matplotlib, os, sys, shapely
from multiprocessing import Pool, Manager; from collections import OrderedDict; from shapely.geometry import mapping, shape; from sqlalchemy import create_engine
import matplotlib.pyplot as plt; import numpy as np; import pandas as pd; import geopandas as gpd; import seaborn as sns
%matplotlib inline

# Latest Tile Statistics

This notebook queries the `latest-tile-data-v3` database to identify tiles with specific characteristics, such as:
- named road km
- total road km
- total buildings
- number of buildings with more than `building=yes`

This allows for further _intrinsic quality_ comparison between the histories of tiles that are currently quantitatively similar on the map.

In [34]:
#Create the engine for Pandas to query SQL with
engine = create_engine('postgresql://anderstj@127.0.0.1:5432/latest-tile-data-v3')

In [35]:
study_tiles = json.loads(open('study_tiles.json','r').read())
print("Found {0} tiles.".format(len(study_tiles)))

Found 9 tiles.


In [36]:
# Get study_tile stats
def get_tile_df(quadkey):
    q_s = """SELECT * FROM roads, buildings, geometry WHERE 
        roads.quadkey = buildings.quadkey AND 
        geometry.quadkey = roads.quadkey AND 
        roads.quadkey = '{0}'""".format(quadkey)
    df = pd.read_sql_query(q_s,con=engine)
    
    df['named_road_ratio'] = (df.named_edited_km + df.named_new_km) / df.total_km
    df['more_building_ratio'] = (df.new_buildings_more + df.edited_buildings_more) / df.total_buildings
    return df

for tile in study_tiles:
    print(tile['name'], end="...")
    tile['characteristics'] = get_tile_df(tile['quad'])

Port Au Prince, Haiti...Trisuli Bazar, Nepal...Tacloban, Philippines...Kenema, Sierra Leone...Monrovia, Liberia...Kathmandu, Nepal...Heidelberg, Germany...London, UK...Manhattan, NY...

In [37]:
def get_similar_tiles(tile, tolerance=0.01, ratio_tol=0.01):
    """Given a tile, find similar tiles in the _current database_ within specific thresholds."""
    df = tile['characteristics']
    min_roads = (df.total_km - df.total_km*tolerance).values[0]
    max_roads = (df.total_km + df.total_km*tolerance).values[0]
    min_buildings = (df.total_buildings - df.total_buildings*tolerance).values[0]
    max_buildings = (df.total_buildings + df.total_buildings*tolerance).values[0]
    min_roads_ratio = (df.named_road_ratio - df.named_road_ratio*ratio_tol).values[0]
    max_roads_ratio = (df.named_road_ratio + df.named_road_ratio*ratio_tol).values[0]    
    min_building_ratio = (df.more_building_ratio - df.more_building_ratio*ratio_tol).values[0]
    max_building_ratio = (df.more_building_ratio + df.more_building_ratio*ratio_tol).values[0]

    query_string = """SELECT * FROM roads, buildings, geometry WHERE roads.quadkey = buildings.quadkey AND geometry.quadkey = roads.quadkey AND 
    roads.total_km > {0} AND roads.total_km < {1} AND roads.quadkey != '{4}' AND 
    buildings.total_buildings > {2} AND buildings.total_buildings < {3} AND
    (roads.named_edited_km + roads.named_new_km)/roads.total_km > {5} AND 
    (roads.named_edited_km + roads.named_new_km)/roads.total_km < {6} AND
    (buildings.edited_buildings_more + buildings.new_buildings_more)::float / buildings.total_buildings::float > {7} AND
    (buildings.edited_buildings_more + buildings.new_buildings_more)::float / buildings.total_buildings::float < {8}
    """.format(min_roads, max_roads, min_buildings, max_buildings, df.quadkey.values[0][0], min_roads_ratio, max_roads_ratio, min_building_ratio, max_building_ratio)
    return pd.read_sql_query(query_string,con=engine)

# Test approach
x = get_similar_tiles(study_tiles[2], tolerance = 0.25, ratio_tol=0.25)
print(len(x))
x.head()

1


Unnamed: 0,quadkey,named_edited_km,named_new_km,total_edited_km,total_km,total_new_km,unnamed_edited_km,unnamed_new_km,quadkey.1,total_buildings,total_new_buildings,total_edited_buildings,new_buildings_more,new_buildings_yes,edited_buildings_more,edited_buildings_yes,quadkey.2,coordinates,type
0,132303033313,99.9365,3.75274,218.701,319.301,100.6,118.764,96.8474,132303033313,27667,3366,24301,1170,2196,16127,8174,132303033313,"[[[120.849609375, 14.093957177836227], [120.84...",Polygon


## Identify Similar Tiles in the Database

For each of the study tiles, how many other tiles exist with similar characteristics: 
 - Number of Buildings
 - km of roads
 - % of named roads
 - % of buildings with more than just `building==yes`

In [38]:
# study_tiles = [study_tiles[x] for x in [0,1,2,4]]

In [39]:
for tile in study_tiles:
    print(tile['name'])
    sim_tiles = get_similar_tiles(tile, tolerance=0.25, ratio_tol=0.35)
    print("Similar Tiles: {0}\n".format(len(sim_tiles)))
    tile['similar_tiles'] = sim_tiles
    print(sim_tiles.quadkey)

Port Au Prince, Haiti
Similar Tiles: 8

        quadkey       quadkey       quadkey
0  120213000002  120213000002  120213000002
1  120221033031  120221033031  120221033031
2  120221122003  120221122003  120221122003
3  122101130101  122101130101  122101130101
4  033110210131  033110210131  033110210131
5  033333010032  033333010032  033333010032
6  132232231103  132232231103  132232231103
7  021333011030  021333011030  021333011030
Trisuli Bazar, Nepal
Similar Tiles: 4

        quadkey       quadkey       quadkey
0  123131220022  123131220022  123131220022
1  123131221323  123131221323  123131221323
2  123131221321  123131221321  123131221321
3  123130333100  123130333100  123130333100
Tacloban, Philippines
Similar Tiles: 1

        quadkey       quadkey       quadkey
0  132303033313  132303033313  132303033313
Kenema, Sierra Leone
Similar Tiles: 0

Empty DataFrame
Columns: [quadkey, quadkey, quadkey]
Index: []
Monrovia, Liberia
Similar Tiles: 1

        quadkey       quadkey       qua

In [40]:
def convert_to_gpd(df):
    """ Given a pandas dataframe with a `geometry` column, return a GeoDataFrame
        Note: This function creates the geometry from JSON string and then gets the envelope, this ensures proper winding order.
    """
    df['geometry'] = df.coordinates.apply(lambda coords: shape({"type":"Polygon", "coordinates":json.loads(coords)}).exterior.envelope)
    
    return gpd.GeoDataFrame(df)
y = convert_to_gpd(x)

### Write GeoJSON feature collections of similar tiles for each study tile

In [41]:
directory = '/data/www/jennings/iscram/'
if not os.path.exists(directory):
    os.makedirs(directory)
for tile in study_tiles:
    print(tile['name'])
    tile_gpd = convert_to_gpd(tile['similar_tiles'])
    filename = directory+"/"+tile['name']+"_sim_tiles.geojson"
    if os.path.exists(filename):
        os.remove(filename)
    try:
        tile_gpd.to_file(directory+"/"+tile['name']+"_sim_tiles.geojson", driver="GeoJSON")
    except:
        print("\tError. Length of dataframe: {0}".format(len(tile_gpd)))

Port Au Prince, Haiti
Trisuli Bazar, Nepal
Tacloban, Philippines
Kenema, Sierra Leone
	Error. Length of dataframe: 0
Monrovia, Liberia
Kathmandu, Nepal
	Error. Length of dataframe: 0
Heidelberg, Germany
London, UK
Manhattan, NY


### Create GeoJSON file for all features

In [42]:
feat_coll = {"type":"FeatureCollection","features":[]}
for tile in study_tiles:
    tile['characteristics']['name'] = tile['name']
    feat_coll['features'].append( json.loads(convert_to_gpd(tile['characteristics']).to_json())['features'][0] )

with open(directory+"study_tiles.geojson",'w') as oFile:
    json.dump(feat_coll, oFile)

### The study tiles can be [visualized here](http://www.townsendjennings.com/geojson-polygons?geojson=http://epic-analytics.cs.colorado.edu:9000/jennings/iscram/study_tiles.geojson#1.9/14.6/8.5)
(Must be on the CU network)

## Create Single JSON record with study tiles and comparable tiles

In [43]:
output = []
for tile in study_tiles: 
    sim_quads = [x[0] for x in tile['similar_tiles'].quadkey.values]
    output.append(
        {'name':tile['name'],
         'quad':tile['quad'],
         'compare_tiles': [{'quad':x} for x in sim_quads]})
with open("study_tiles_with_compare_tiles.json",'w') as oFile:
    json.dump(output,oFile)

In [44]:
print(study_tiles[3]['name'])
study_tiles[3]['characteristics']

Kenema, Sierra Leone


Unnamed: 0,quadkey,named_edited_km,named_new_km,total_edited_km,total_km,total_new_km,unnamed_edited_km,unnamed_new_km,quadkey.1,total_buildings,...,new_buildings_yes,edited_buildings_more,edited_buildings_yes,quadkey.2,coordinates,type,named_road_ratio,more_building_ratio,name,geometry
0,33330200220,51.8717,4.15538,184.462,322.26,137.798,132.59,133.643,33330200220,16469,...,15758,32,171,33330200220,"[[[-11.25, 7.7980785313553085], [-11.25, 7.885...",Polygon,0.173857,0.032789,"Kenema, Sierra Leone","POLYGON ((-11.25 7.798078531355308, -11.162109..."


In [64]:
from IPython import display
for x in study_tiles: 
    display.display((x['name']))
    df = x['characteristics']

    df['total_more_buildings']   = df.edited_buildings_more+df.new_buildings_more
    df['more_buildings_percent'] = df.total_more_buildings / df.total_buildings * 100

    df['total_named_km']   = df.named_edited_km+df.named_new_km
    df['named_km_percent'] = df.total_named_km / df.total_km * 100

    display.display(df)    
    display.display(df[['total_buildings','more_buildings_percent','total_km','named_km_percent']])

'Port Au Prince, Haiti'

Unnamed: 0,quadkey,named_edited_km,named_new_km,total_edited_km,total_km,total_new_km,unnamed_edited_km,unnamed_new_km,quadkey.1,total_buildings,...,coordinates,type,named_road_ratio,more_building_ratio,name,geometry,total_more_buildings,more_buildings_percent,total_named_km,named_km_percent
0,32211203001,532.033,11.403,844.578,1006.15,161.576,312.546,150.173,32211203001,12141,...,"[[[-72.333984375, 18.562947442888305], [-72.33...",Polygon,0.540114,0.068693,"Port Au Prince, Haiti","POLYGON ((-72.333984375 18.56294744288831, -72...",834,6.869286,543.436,54.01143


Unnamed: 0,total_buildings,more_buildings_percent,total_km,named_km_percent
0,12141,6.869286,1006.15,54.01143


'Trisuli Bazar, Nepal'

Unnamed: 0,quadkey,named_edited_km,named_new_km,total_edited_km,total_km,total_new_km,unnamed_edited_km,unnamed_new_km,quadkey.1,total_buildings,...,coordinates,type,named_road_ratio,more_building_ratio,name,geometry,total_more_buildings,more_buildings_percent,total_named_km,named_km_percent
0,123131221200,10.1151,0.318314,242.562,324.699,82.1367,232.447,81.8184,123131221200,7596,...,"[[[85.078125, 27.91676664124907], [85.078125, ...",Polygon,0.032133,0.16456,"Trisuli Bazar, Nepal","POLYGON ((85.078125 27.91676664124907, 85.1660...",1250,16.456029,10.433414,3.213257


Unnamed: 0,total_buildings,more_buildings_percent,total_km,named_km_percent
0,7596,16.456029,324.699,3.213257


'Tacloban, Philippines'

Unnamed: 0,quadkey,named_edited_km,named_new_km,total_edited_km,total_km,total_new_km,unnamed_edited_km,unnamed_new_km,quadkey.1,total_buildings,...,coordinates,type,named_road_ratio,more_building_ratio,name,geometry,total_more_buildings,more_buildings_percent,total_named_km,named_km_percent
0,132312223332,84.9569,6.22306,199.199,257.339,58.1398,114.242,51.9167,132312223332,29573,...,"[[[124.98046875, 11.178401873711792], [124.980...",Polygon,0.354318,0.710682,"Tacloban, Philippines","POLYGON ((124.98046875 11.17840187371179, 125....",21017,71.068204,91.17996,35.431847


Unnamed: 0,total_buildings,more_buildings_percent,total_km,named_km_percent
0,29573,71.068204,257.339,35.431847


'Kenema, Sierra Leone'

Unnamed: 0,quadkey,named_edited_km,named_new_km,total_edited_km,total_km,total_new_km,unnamed_edited_km,unnamed_new_km,quadkey.1,total_buildings,...,coordinates,type,named_road_ratio,more_building_ratio,name,geometry,total_more_buildings,more_buildings_percent,total_named_km,named_km_percent
0,33330200220,51.8717,4.15538,184.462,322.26,137.798,132.59,133.643,33330200220,16469,...,"[[[-11.25, 7.7980785313553085], [-11.25, 7.885...",Polygon,0.173857,0.032789,"Kenema, Sierra Leone","POLYGON ((-11.25 7.798078531355308, -11.162109...",540,3.278888,56.02708,17.385676


Unnamed: 0,total_buildings,more_buildings_percent,total_km,named_km_percent
0,16469,3.278888,322.26,17.385676


'Monrovia, Liberia'

Unnamed: 0,quadkey,named_edited_km,named_new_km,total_edited_km,total_km,total_new_km,unnamed_edited_km,unnamed_new_km,quadkey.1,total_buildings,...,coordinates,type,named_road_ratio,more_building_ratio,name,geometry,total_more_buildings,more_buildings_percent,total_named_km,named_km_percent
0,33330222101,54.9428,1.09467,127.642,174.068,46.4256,72.6993,45.331,33330222101,19193,...,"[[[-10.810546875, 6.227933930268673], [-10.810...",Polygon,0.321929,0.055697,"Monrovia, Liberia","POLYGON ((-10.810546875 6.227933930268673, -10...",1069,5.569739,56.03747,32.192861


Unnamed: 0,total_buildings,more_buildings_percent,total_km,named_km_percent
0,19193,5.569739,174.068,32.192861


'Kathmandu, Nepal'

Unnamed: 0,quadkey,named_edited_km,named_new_km,total_edited_km,total_km,total_new_km,unnamed_edited_km,unnamed_new_km,quadkey.1,total_buildings,...,coordinates,type,named_road_ratio,more_building_ratio,name,geometry,total_more_buildings,more_buildings_percent,total_named_km,named_km_percent
0,123131221232,343.699,36.842,784.772,1038.24,253.467,441.072,216.625,123131221232,84873,...,"[[[85.25390625, 27.683528083787767], [85.25390...",Polygon,0.366525,0.066605,"Kathmandu, Nepal","POLYGON ((85.25390625 27.68352808378777, 85.34...",5653,6.66054,380.541,36.652508


Unnamed: 0,total_buildings,more_buildings_percent,total_km,named_km_percent
0,84873,6.66054,1038.24,36.652508


'Heidelberg, Germany'

Unnamed: 0,quadkey,named_edited_km,named_new_km,total_edited_km,total_km,total_new_km,unnamed_edited_km,unnamed_new_km,quadkey.1,total_buildings,...,coordinates,type,named_road_ratio,more_building_ratio,name,geometry,total_more_buildings,more_buildings_percent,total_named_km,named_km_percent
0,120203320232,299.818,35.5168,647.057,818.667,171.609,347.24,136.093,120203320232,17772,...,"[[[8.61328125, 49.38237278700956], [8.61328125...",Polygon,0.409611,0.291582,"Heidelberg, Germany","POLYGON ((8.61328125 49.38237278700956, 8.7011...",5182,29.158226,335.3348,40.961075


Unnamed: 0,total_buildings,more_buildings_percent,total_km,named_km_percent
0,17772,29.158226,818.667,40.961075


'London, UK'

Unnamed: 0,quadkey,named_edited_km,named_new_km,total_edited_km,total_km,total_new_km,unnamed_edited_km,unnamed_new_km,quadkey.1,total_buildings,...,coordinates,type,named_road_ratio,more_building_ratio,name,geometry,total_more_buildings,more_buildings_percent,total_named_km,named_km_percent
0,31313131103,490.036,46.8263,616.1,766.442,150.341,126.065,103.515,31313131103,18234,...,"[[[-0.263671875, 51.50874245880334], [-0.26367...",Polygon,0.70046,0.361797,"London, UK","POLYGON ((-0.263671875 51.50874245880334, -0.1...",6597,36.179664,536.8623,70.046044


Unnamed: 0,total_buildings,more_buildings_percent,total_km,named_km_percent
0,18234,36.179664,766.442,70.046044


'Manhattan, NY'

Unnamed: 0,quadkey,named_edited_km,named_new_km,total_edited_km,total_km,total_new_km,unnamed_edited_km,unnamed_new_km,quadkey.1,total_buildings,...,coordinates,type,named_road_ratio,more_building_ratio,name,geometry,total_more_buildings,more_buildings_percent,total_named_km,named_km_percent
0,32010110132,740.823,129.041,865.668,1098.95,233.285,124.845,104.244,32010110132,50080,...,"[[[-74.00390625, 40.71395582628604], [-74.0039...",Polygon,0.791541,0.028654,"Manhattan, NY","POLYGON ((-74.00390625 40.71395582628604, -73....",1435,2.865415,869.864,79.154102


Unnamed: 0,total_buildings,more_buildings_percent,total_km,named_km_percent
0,50080,2.865415,1098.95,79.154102
