In [1]:
import random
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import graphviz
import seaborn as sns
import pandas as pd
pd.options.display.max_columns = None
import cPickle as pickle
from IPython.display import Image
import folium
import branca
import json
from util import *

## Build grids

### Preprocess buildings

In [14]:
oos_buildings = pd.read_csv('../data/oos_vis.csv')

In [9]:
len(oos_buildings)

148401

In [16]:
oos_buildings.head(3)

Unnamed: 0,building_id,score,bottom_left,top_right,addr,cluster,num,incident_ids
0,0,0.265381,"[42.255267050999997, -83.161064993999986]","[42.255811, -83.16050500000001]",26585 OUTER DRIVE,0,2,"[53453.0, 446027.0]"
1,1,0.184337,"[42.256026999999996, -83.1607722]","[42.256177, -83.1606222]",3808 S Bassett St,1,1,[446640.0]
2,2,0.184337,"[42.256281200000004, -83.160299]","[42.25643120000001, -83.160149]",3795 S. Bassett,2,1,[446574.0]


In [17]:
from ast import literal_eval
oos_buildings['bottom_left'] = oos_buildings['bottom_left'].map(literal_eval)
oos_buildings['top_right'] = oos_buildings['top_right'].map(literal_eval)

In [35]:
oos_buildings['center_lat'] = oos_buildings.apply(lambda x: (x['bottom_left'][0] + x['top_right'][0]) / 2, axis=1)
oos_buildings['center_lon'] = oos_buildings.apply(lambda x: (x['bottom_left'][1] + x['top_right'][1]) / 2, axis=1)

### Setup grids

In [19]:
lats = oos_buildings['bottom_left'].map(lambda x: x[0])
lons = oos_buildings['bottom_left'].map(lambda x: x[1])

In [30]:
lat_range = (min(lats), max(lats))
lon_range = (min(lons), max(lons))
print lat_range, lon_range

(42.255267050999997, 42.449925) (-83.289774999999992, -82.910375000000002)


In [83]:
grids_lat_ranges = [0]
for i in range(10):
    grids_lat_ranges.append(grids_lat_ranges[i] + 1)
grids_lat_ranges

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [92]:
import itertools

def get_grids_ranges(shape, lat_range, lon_range):
    ngrid_lat, ngrid_lon = shape
    min_lat, max_lat = lat_range
    min_lon, max_lon = lon_range
    lat_gap = (max_lat - min_lat) / ngrid_lat
    lon_gap = (max_lon - min_lon) / ngrid_lon
    # x - lat
    # y - lon
    x = [min_lat]
    for i in range(ngrid_lat):
        x.append(x[i] + lat_gap)
    y = [min_lon]
    for i in range(ngrid_lon):
        y.append(y[i] + lon_gap)
        
    # 0 - bottom_left
    # 1 - bottom_right
    # 2 - top_right
    # 3 - top_left
    grid_array = []
    for i in range(len(x)-1):
        for j in range(len(y)-1):
            grid = [ [x[i], y[j]], [x[i], y[j+1]], [x[i+1], y[j+1]], [x[i+1], y[j]]  ]
            grid_array.append(grid)
    return grid_array

In [113]:
grid_array = get_grids_ranges((30, 50), lat_range, lon_range)

### Map buildings to grids

In [114]:
def get_containing_grid(grid_array, lat, lon):
    candidates = grid_array[:]
    candidates = filter(lambda x: x[0][0] < lat, candidates)
    candidates = filter(lambda x: x[2][0] > lat, candidates)
    candidates = filter(lambda x: x[0][1] < lon, candidates)
    candidates = filter(lambda x: x[2][1] > lon, candidates)
    if not candidates:
        return -1
    else:
        return grid_array.index(candidates[0])

In [65]:
oos_buildings.head(2)

Unnamed: 0,building_id,score,bottom_left,top_right,addr,cluster,num,incident_ids,center_lat,center_lon
0,0,0.265381,"[42.255267051, -83.161064994]","[42.255811, -83.160505]",26585 OUTER DRIVE,0,2,"[53453.0, 446027.0]",42.255539,-83.160785
1,1,0.184337,"[42.256027, -83.1607722]","[42.256177, -83.1606222]",3808 S Bassett St,1,1,[446640.0],42.256102,-83.160697


In [119]:
num_grid = 30 * 50
grids_df = pd.DataFrame()
grids_df['building_count'] = np.zeros(num_grid)
grids_df['total_risk'] = np.zeros(num_grid)
grids_df['grid_id'] = range(len(grid_array))
grids_df['grid_bounds'] = grid_array

In [115]:
get_containing_grid(grid_array, 42.255539, -83.160785)

16

In [116]:
def map_to_grids(grids_df, buildings, grid_array):
    for index, row in buildings.iterrows():
        lat = row['center_lat']
        lon = row['center_lon']
        grid_id = get_containing_grid(grid_array, lat, lon)
        if grid_id == -1:
            continue
        grid_indexs = grids_df[grids_df['grid_id'] == grid_id].index.tolist()
        if not grid_indexs:
            continue
        grid_index = grid_indexs[0]
        grids_df.loc[grid_index, 'building_count'] += 1
        grids_df.loc[grid_index, 'total_risk'] += row['score']

In [121]:
map_to_grids(grids_df, oos_buildings, grid_array)

In [123]:
len(grids_df[grids_df['building_count'] > 0])

957

In [125]:
grids_df['building_count'].sum()

148392.0

In [127]:
grids_df_ = grids_df[grids_df['building_count'] > 0]

In [128]:
grids_df_['avg_risk'] = grids_df_.apply(lambda x: x['total_risk'] / float(x['building_count']), axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [129]:
grids_df_.head(3)

Unnamed: 0,building_count,total_risk,grid_id,grid_bounds,avg_risk
16,34,8.304994,16,"[[42.255267051, -83.168367], [42.255267051, -8...",0.244265
17,21,2.781184,17,"[[42.255267051, -83.160779], [42.255267051, -8...",0.132437
65,4,0.5399,65,"[[42.2617556493, -83.175955], [42.2617556493, ...",0.134975


In [130]:
grids_df_.to_csv('../data/grids.csv', index=False)

### Export a GeoJson file

In [16]:
from ast import literal_eval

In [22]:
def export_geo_json(grids):
    objs = []
    for index, row in grids_df_.iterrows():
        grid_id = row['grid_id']
        grid_bounds = row['grid_bounds']
        avg_risk = row['avg_risk']
        bounds = literal_eval(grid_bounds)
        bounds = [[x[1], x[0]] for x in bounds]
        obj = {
            'type': 'Feature',
            'id': grid_id,
            'geometry': {
                'type': 'Polygon',
                'coordinates': [bounds]
            }
        }
        objs.append(obj)
        
    data = {
        'type': 'FeatureCollection',
        'features': objs
    }
    with open('./grids.json', 'w') as f:
        json.dump(data, f)

In [23]:
export_geo_json(grids_df_)

In [45]:
with open('./grids.json', 'r') as f:
    data_ = json.loads(f.readline())

In [46]:
data_.keys()

[u'type', u'features']

In [47]:
data_['features'][0]

{u'geometry': {u'coordinates': [[[42.255267051, -83.16836700000002],
    [42.255267051, -83.16077900000002],
    [42.2617556493, -83.16077900000002],
    [42.2617556493, -83.16836700000002]]],
  u'type': u'Polygon'},
 u'id': 16,
 u'type': u'Feature'}

In [13]:
len(data_['features'])

957

## Make a density (average risk by grid) map

In [68]:
grids_vs_df = pd.read_csv('../data/grids.csv')

In [90]:
dmap = folium.Map(location=[42.383572,-83.0705027], tiles='Stamen Terrain', zoom_start=11)

dmap.geo_json(
    geo_path='./grids.json', data=grids_vs_df,
    columns=['grid_id', 'avg_risk'],
    key_on='feature.id',
    fill_color='BuPu', fill_opacity=0.4, line_opacity=0.2,
    legend_name='Avg. Blight Risk'
)

dmap.save('./dmap.html')
dmap

In [69]:
# colormap = branca.colormap.linear.PuBu.scale(
#     grids_vs_df.avg_risk.min(),
#     grids_vs_df.avg_risk.max()
# )
# color_dict = grids_df_.set_index('grid_id')['avg_risk']

In [85]:
# with open('./grids.json', 'r') as f:
#     geo_json_data = json.load(f)

# dmap = folium.Map(location=[42.383572,-83.0705027], tiles='Stamen Terrain', zoom_start=11)

# folium.GeoJson(
#     geo_json_data,
#     style_function=lambda feature: {
#         'fillColor': colormap(color_dict[feature['id']]),
#         'color' : 'black',
#         'weight' : 1
#         }
#     ).add_to(dmap)

# dmap