# Maps

I explored using **gmaps** to plot interactive heatmaps for the different tags in df_tag.

# Datasets
- boston permits: https://data.boston.gov/dataset/approved-building-permits 
- df_tag:   https://github.com/datakind/msvdd_Boston_University/blob/master/analysis_permit_comments.ipynb
    (Thank for tagged df by jelenalor! <3)


# Conclusion:
- Interactive maps allow identification of hotspots within zipcoded-regions. 
- Some maps can be fairly slow to load. A commerical google map API would help. 

In [1]:
import numpy as np
import pandas as pd
import shapefile as shp
import matplotlib.pyplot as plt
import seaborn as sns
import gmaps
gmaps.configure(api_key='AIzaSyAQrEoXinizdxIjbgSN5CcEb_ZVjw4GhqU')
'''
To use install gmaps properly for Jupyter, please read:
https://jupyter-gmaps.readthedocs.io/en/latest/install.html
'''
%matplotlib inline


#sns.set(style="whitegrid", palette="pastel", color_codes=True)
#sns.mpl.rc("figure", figsize=(10,6))

In [2]:
#permits = pd.read_csv('https://www.dropbox.com/s/zokocwjn8203e7v/boston_permits.csv?dl=1')
permits= pd.read_csv('Data/Boston Approved Building Permits.csv')
permits.head(2)

Unnamed: 0,permitnumber,worktype,permittypedescr,description,comments,applicant,declared_valuation,total_fees,issued_date,expiration_date,...,occupancytype,sq_feet,address,city,state,zip,property_id,parcel_id,lat,long
0,A100071,COB,Amendment to a Long Form,City of Boston,Change connector link layout from attached enc...,Renee Santeusanio,40000.0,429.0,2011-11-04 11:04:58,2012-05-04 00:00:00,...,Comm,170.0,175 W Boundary RD,West Roxbury,MA,2132,17268,2012032000,42.26075,-71.14961
1,A100137,EXTREN,Amendment to a Long Form,Renovations - Exterior,Landscaping/stonework - amending permit #2801/...,,15000.0,206.0,2013-01-03 14:13:09,2013-07-03 00:00:00,...,1-2FAM,0.0,14 William Jackson AVE,Brighton,MA,2135,149852,2204944000,42.3446,-71.15405


In [3]:
import joblib
df_tag = joblib.load('Data/df_tag.pkl')

df_tag.head(2)

Unnamed: 0,permitnumber,parcel_id,zip,worktype,year,month,tokens,tokens_filt,attic,basement,...,seal,sola,solar,stove,strip,volt,wall,water,window,tag
0,A100137,2204944000,2135,EXTREN,2013,1,amend permit per,none,0,0,...,0,0,0,0,0,0,0,0,0,other
1,A100764,502363000,2108,INTEXT,2012,1,interior exterior work indic draw amend permit,none,0,0,...,0,0,0,0,0,0,0,0,0,other


In [4]:
# get zipcodes.shpfiles
shp_path = "Data/ZIP_Codes/ZIP_Codes.shp"
sf = shp.Reader(shp_path)
len(sf.shapes())

43

In [5]:
def read_shapefile(sf):
    """
    ref: https://towardsdatascience.com/mapping-geograph-data-in-python-610a963d2d7f
    Read a shapefile into a Pandas dataframe with a 'coords' 
    column holding the geometry information. This uses the pyshp
    package. 
    """
    fields = [x[0] for x in sf.fields][1:]
    records = sf.records()
    shps = [s.points for s in sf.shapes()]
    df = pd.DataFrame(columns=fields, data=records)
    df = df.assign(coords=shps)
    return df

In [6]:
sfdf = read_shapefile(sf)
sfdf.head(2)

Unnamed: 0,OBJECTID,ZIP5,ShapeSTAre,ShapeSTLen,coords
0,1,2134,37219360.0,40794.182396,"[(-71.12340461235522, 42.36420867214283), (-71..."
1,2,2125,64760520.0,62224.52144,"[(-71.04541458491363, 42.323806667152326), (-7..."


In [7]:
# find interection in zip codes between the two datasets
zips= list(set(i for i in df_tag['zip'].unique()).intersection(set(i for i in sfdf['ZIP5'])))
print('No. of unique zip codes common to both df_tag and sfdf: ' + str(len(zips)))
# filter df_tag & sfdf with the intercepting zip codes. 
df_tag_zip = df_tag[df_tag.zip.isin(zips)]
sfdf_zip = sfdf[sfdf.ZIP5.isin(zips)]

No. of unique zip codes common to both df_tag and sfdf: 31


In [8]:
# compute map centers 
centerlat = permits['lat'].dropna().unique().mean()
centerlong = permits['long'].dropna().unique().mean()

# need to flip the zipcode coords to (lat.long)
zip_coords = sfdf_zip.coords.apply(lambda x: [x[i][::-1] for i in range(len(x))])
# get centers of the zipcode coords for labeling zip code 
zip_coords_centers = [tuple(np.mean(np.asarray(i),axis=0)) for i in zip_coords]    


In [9]:
def newbasemap(zip_coords,zip_coords_centers,centerlat,centerlong):
    basefig =  gmaps.figure(center=(centerlat,centerlong), zoom_level=11)
    # map with zip codes
    for n,i in enumerate(zip_coords):
        # draw polygons of zipcode regions
        zip_polygon = gmaps.Polygon(i,stroke_color=[0+n*5,0+n*5,0+n*5], fill_color=[0+n*5,0+n*5,0+n*5])
        draw_polygons = gmaps.drawing_layer(features=[zip_polygon],show_controls=False) 
        basefig.add_layer(draw_polygons)
    # label centers with zipcodes. Info box shows up when clicked. 
    zip_maker = gmaps.marker_layer(zip_coords_centers,info_box_content=sfdf_zip.ZIP5.tolist())
    drawing = gmaps.drawing_layer(features=[zip_maker],show_controls=False)

    basefig.add_layer(drawing)
    basefig.add_layer(zip_maker)
    return(basefig)

In [10]:
df_tag_map = pd.merge(df_tag,permits[['permitnumber','lat','long']], on='permitnumber') # merge back to get lat & long

In [11]:
basefig =newbasemap(zip_coords,zip_coords_centers,centerlat,centerlong)
basefig

In [12]:
# heatmap for the 'solar' tag
fig1= newbasemap(zip_coords,zip_coords_centers,centerlat,centerlong)
map_tag = df_tag_map[df_tag_map.tag == 'solar']
tag_coor = map_tag[['lat','long']].dropna()
tag_layer = gmaps.heatmap_layer(tag_coor)

fig1.add_layer(tag_layer)
fig1  

In [13]:
from ipywidgets.embed import embed_minimal_html
def heatmap(tag,df_tag_map,zip_coords,zip_coords_centers,centerlat,centerlong):
    '''
    create & save heatmaps as html for all different tags
    '''
    # inihitiate new map
    fig1= newbasemap(zip_coords,zip_coords_centers,centerlat,centerlong)
    map_tag = df_tag_map[df_tag_map.tag == tag]
    tag_coor = map_tag[['lat','long']].dropna()
    tag_layer = gmaps.heatmap_layer(tag_coor)

    fig1.add_layer(tag_layer)
    #save
    embed_minimal_html('%s_heatmap.html' % tag, views=[fig1])   


In [14]:
df_tag.groupby('tag').count()['permitnumber'].sort_values()

tag
heat pump          203
furnace           1853
hvac              2213
boiler            7335
solar             8637
water heater     11321
window           15082
insulation       15112
gas              25430
other           193840
Name: permitnumber, dtype: int64

In [35]:
# save maps for all tags
for i in ['heat pump', 'furnace', 'hvac', 'boiler', 'solar','water heater', 'window','insulation','gas']:
    heatmap(i,df_tag_map[['tag','lat','long']],zip_coords,zip_coords_centers,centerlat,centerlong)