# Walkability

In [None]:
!pip install --upgrade pip
!pip install gmaps
!pip install geopy
!pip install git+git://github.com/geopandas/geopandas.git
!pip install descartes

In [None]:
!jupyter nbextension enable --py gmaps

import zipfile, os

def unzipFile(zipFilePath, unzipFolder):
    with zipfile.ZipFile(zipFilePath,"r") as zip_ref:
        print("Extracting {0} " .format(zipFilePath))
        zip_ref.extractall(unzipFolder)
    os.unlink(zipFilePath)

unzipFile('POIS/gis.osm_pois_a_free_1.zip', 'POIS')

In [1]:
import geopandas as gpd
from shapely.geometry import Point
from shapely.geometry import LineString
import pandas as pd
import ipywidgets as widgets
import geopy
import gmaps
from IPython.display import display
from sklearn.neighbors import KDTree
import numpy as np

import zipfile, os

def unzipFile(zipFilePath, unzipFolder):
    with zipfile.ZipFile(zipFilePath,"r") as zip_ref:
        print("Extracting {0} " .format(zipFilePath))
        zip_ref.extractall(unzipFolder)
    os.unlink(zipFilePath)
    
def plotstuff(stuff, a = 15, b = 15, cmap = 'Accent'):
    stuff.plot(cmap=cmap, figsize=(a, b))
    
    
unzipFile('POIS/Test_Roads.zip', 'POIS')

df = pd.read_csv('poi.csv')
df.tail()

geometry = [Point(xy) for xy in zip(df.lon, df.lat)]
crs = {'init': 'epsg:4326'}
gdf = gpd.GeoDataFrame(df, crs=crs, geometry=geometry)

poi_points_pr = gdf.to_crs({'init': 'epsg:3395'}) 
pois = poi_points_pr.to_dict('records') 

In [2]:
%matplotlib inline
poi_points = gpd.read_file('POIS/gis.osm_pois_free_1.shp')
poi_area = gpd.read_file('POIS/gis.osm_pois_a_free_1.shp')

# Select the following categories only: 
### 'supermarket', 'park'  'greengrocer',  'dog_park'

In [3]:
grocer_fclass = ['supermarket','greengrocer']
park_fclass = ['park','dog_park', 'garden' ]
selected_fclass = grocer_fclass + park_fclass
selected_fclass

['supermarket', 'greengrocer', 'park', 'dog_park', 'garden']

In [4]:
poi_points = poi_points.loc[poi_points["fclass"].isin(selected_fclass)]
poi_points.head()

Unnamed: 0,osm_id,code,fclass,name,geometry
3,26496361,2501,supermarket,Shopper's Corner,POINT (-122.0160744 36.9765357)
26,54653612,2501,supermarket,,POINT (-116.4848801 33.7954537)
99,87421131,2204,park,Mendota Wildlife Management Area,POINT (-120.3410298 36.7327048)
157,150943454,2501,supermarket,California Oranges,POINT (-118.8280675 36.1012355)
164,150959039,2204,park,Fort Tejon State Historic Park,POINT (-118.8942648 34.8735862)


In [5]:
poi_area = poi_area.loc[poi_area["fclass"].isin(selected_fclass)]

In [6]:
poi_area.head()

Unnamed: 0,osm_id,code,fclass,name,geometry
0,4263495,2204,park,Richard Alatorre Park,"POLYGON ((-118.1848653 34.1437172, -118.184820..."
1,4354434,2204,park,San Pascual Park,"POLYGON ((-118.1701499 34.1197141, -118.168846..."
2,4384742,2204,park,Elysian Park,"POLYGON ((-118.2516056 34.0802554, -118.250215..."
3,4584647,2204,park,Crystal Street Bike Park,"POLYGON ((-118.2560348 34.1074083, -118.255741..."
7,4786596,2204,park,Klein Park,"POLYGON ((-122.1045774 37.4004166, -122.104573..."


In [7]:
poi_area.loc[poi_area["osm_id"] == '239729391']

Unnamed: 0,osm_id,code,fclass,name,geometry
46035,239729391,2501,supermarket,Safeway 1990,"POLYGON ((-121.9482439 37.3945104, -121.948201..."


In [8]:
poi_paths = gpd.read_file('POIS/gis.osm_cycleway_free_1.shp')
poi_paths.head()

Unnamed: 0,osm_id,code,fclass,name,ref,oneway,maxspeed,layer,bridge,tunnel,geometry
0,4341140,5152,cycleway,,,B,0,0.0,F,F,"LINESTRING (-118.4455769 33.9700496, -118.4446..."
1,4341259,5152,cycleway,Venice Beach Bike Path,,B,0,0.0,F,F,"LINESTRING (-118.4824233 33.9965151, -118.4822..."
2,4341264,5152,cycleway,,,B,0,0.0,F,F,"LINESTRING (-118.5232954 34.0301085, -118.5228..."
3,4341268,5152,cycleway,,,B,0,0.0,F,F,"LINESTRING (-118.4538663 33.9621672, -118.4540..."
4,4417523,5152,cycleway,,,B,0,0.0,F,F,"LINESTRING (-121.765883 38.5438224, -121.76628..."


In [9]:
poi_area["geometry"] = poi_area.centroid
#poi_paths["geometry"] = poi_paths.centroid
poi_points_pr = poi_points.to_crs({'init': 'epsg:3395'}) 
poi_area_pr = poi_area.to_crs({'init': 'epsg:3395'}) 


In [10]:
poi_paths_pr = poi_paths.to_crs({'init': 'epsg:3395'}) 
pois_paths = [item for item in poi_paths_pr.to_dict('records')]

In [11]:
pois = [item for item in poi_points_pr.to_dict('records')]
pois_area = [item for item in poi_area_pr.to_dict('records')]
#pois_paths = [item for item in poi_paths_pr.to_dict('records')]

In [16]:
list(zip(*pois_paths[-2]["geometry"].xy))

[(-13342062.215658948, 4060526.8347115284),
 (-13342020.893863965, 4060530.3136624102),
 (-13342011.598686485, 4060530.3673913907)]

In [17]:
path_xylist = []
path_osmids = []
for path in pois_paths:
    path_xy = list(zip(path["geometry"].xy[0], path["geometry"].xy[1]))
    path_xylist.extend(path_xy)
    
    path_osmids.extend([path["osm_id"]] * len(path_xy))
pathtree = KDTree(path_xylist, leaf_size=20)  

In [18]:
pois.extend(pois_area)

In [19]:
tree = KDTree([(poi["geometry"].x, poi["geometry"].y) for poi in pois], leaf_size=20)  

import zipfile, os

def unzipFile(zipFilePath, unzipFolder):
    with zipfile.ZipFile(zipFilePath,"r") as zip_ref:
        print("Extracting {0} " .format(zipFilePath))
        zip_ref.extractall(unzipFolder)
    os.unlink(zipFilePath)

unzipFile('gis.osm_pois_a_free_1.zip', 'POIS')

In [20]:


API_KEY = 'AIzaSyBziDRrBx9w3ReYf9DwwoPMqLqxq4J9AtY'

gmaps.configure(api_key=API_KEY)
figure_layout = {'height': '700px'}

In [21]:
df = pd.read_csv('Cal_Active_listings.csv')

In [22]:
a = df['lat'] < 37.5 
b = df['lat'] > 37.2
c = df['lon'] > -122
d = df['lon'] < -121.7
df2 = df[a & b & c & d]

In [23]:
locations = df2[['lat', 'lon']].to_records(index=False).tolist()[:100]
symbols = gmaps.symbol_layer(locations, fill_color='red', stroke_color='red')


In [27]:
from collections import Counter

def _get_projected_geoPandasSeries(selected_pois):
        selected_poi_gpdSeries = gpd.GeoSeries([Point(poi["geometry"].x, poi["geometry"].y) for poi in selected_pois])
        selected_poi_gpdSeries.crs = {'init' :'epsg:3395'}
        return selected_poi_gpdSeries
    
def _project_location(location):
    pts = gpd.GeoSeries([Point(location[1], location[0])])
    pts.crs = {'init' :'epsg:4326'}
    pts_pr = pts.to_crs({'init': 'epsg:3395'}) 
    return pts_pr
    
class WalkScorer(object):

    def __init__(self):
        self._figure = gmaps.figure(layout=figure_layout)
        self._drawing = gmaps.drawing_layer()
        self._drawing2 = gmaps.drawing_layer(show_controls=False)
        self._drawing.on_new_feature(self._new_feature_callback)
        self._figure.add_layer(symbols)
        self._figure.add_layer(self._drawing)
        self._figure.add_layer(self._drawing2)
        self._title = widgets.HTML(
            value="<h1>Walkability</h1>"
        )
        self._address_box = widgets.HTML(
            value="",
            placeholder='Amenities walkable w/ 5 minutes'
        )
        
        #self._geocoder = geopy.geocoders.GoogleV3(api_key=API_KEY)
        self._container = widgets.VBox([self._title, self._figure, self._address_box])

    def _get_location_details(self, location):
        return self._geocoder.reverse(location, exactly_one=True)

    def _clear_address_box(self):
        self._address_box.value = ''

    
        
    def _show_address(self, location):
        #location_details = self._get_location_details(location)
        
        pts_pr = _project_location(location)
        indices = tree.query_radius([[pts_pr[0].x,  pts_pr[0].y ]], r = 805)   
        path_indices = pathtree.query_radius([[pts_pr[0].x,  pts_pr[0].y ]], r = 805)   
        trails = []
        amenities = []
        if len(path_indices[0]) > 0:
            selected_path_osmids = set([path_osmids[i] for i in path_indices[0]])
            sel_paths = [p for p in pois_paths if p["osm_id"] in selected_path_osmids]
            sel_path_gpdSeries = gpd.GeoSeries([LineString(list(zip(*p["geometry"].xy))) for p in sel_paths])
            sel_path_gpdSeries.crs = {'init' :'epsg:3395'}
            sel_path_gpdSeries_pr = sel_path_gpdSeries.to_crs({'init': 'epsg:4326'}) 
            
            for n in sel_path_gpdSeries_pr:
                coords = list(n.coords)
                for i in range(len(coords) - 1):
                    trails.append(gmaps.Line(start=(coords[i][1], coords[i][0]), end=(coords[i+1][1], coords[i+1][0]), stroke_weight=4.0, stroke_color=(0, 100, 0)))
            amenities.extend(trails)
                
        if len(indices[0]) == 0:
            self._address_box.value = 'No amenities found'
        else:
            selected_pois = [pois[ind] for ind in indices[0]]
            final_values = []
            for poi in selected_pois:
                
                
                name = poi["name"].strip() or poi["fclass"]
                
                final_values.append(name)
                
            u, indices = np.unique(np.array(final_values), return_index=True)
            selected_pois = np.array(selected_pois)[indices]
            
            projected_amenities_gpSeries = _get_projected_geoPandasSeries(selected_pois)
            distances = projected_amenities_gpSeries.distance(pts_pr)
            
            selected_poi_gpdSeries_pr = projected_amenities_gpSeries.to_crs({'init': 'epsg:4326'}) 
            amenities.extend([gmaps.Marker((l.y,l.x), info_box_content=m['name'].strip() or m['fclass']) for l, m in zip(selected_poi_gpdSeries_pr, selected_pois)])
            # Features to draw on the map
            
            amenity_connector = [gmaps.Line(start=location, end=(n.y, n.x), stroke_weight=2.5, stroke_color=(100, 0, 0)) for n in selected_poi_gpdSeries_pr]
            amenities.extend(amenity_connector)
        if amenities:    
            self._drawing2.features = amenities
            """
            k = 156543.03392
            ratio = 805 * 1.5 / 700
            deg = location[0] * np.pi/180.0
            zoom = np.log(k * np.cos(deg)/2)/np.log(2)
            zoom = np.ceil(zoom)
            """
            """
            keys = Counter(amenities).keys() # equals to list(set(words))
            values = Counter(amenities).values() # counts the elements' frequency
            final_values = list(zip(values, keys))
            """
            end_String = ""
            if len(final_values) == 1:
                end_String = " is within 10 mins of walk"
            elif len(final_values) == 2:
                end_String = " are within 10 mins walk"
            else:
                end_String = " are all within 10 mins walk"
                
            self._address_box.value = "<h3>" + ", ".join(u) + end_String + "</h3>"

    def _new_feature_callback(self, feature):
        try:
            location = feature.location
        except AttributeError:
            return # Not a marker

        # Clear address box to signify to the user that something is happening
        self._clear_address_box()

        # Remove all markers other than the one that has just been added.
        self._drawing.features = [feature]

        # Compute the address and display it
        self._show_address(location)

    def render(self):
        return self._container

WalkScorer().render()

In [24]:
%store tree
%store pathtree
%store pois
%store pois_paths
%store path_osmids

Stored 'tree' (KDTree)
Stored 'pathtree' (KDTree)
Stored 'pois' (list)
Stored 'pois_paths' (list)
Stored 'path_osmids' (list)


#### To do: 
* Process polygon, linear data efficeintly
* Naming
* Dedup process


In [None]:

def process_location(item):
    al, dist = item
    location = (al['lat'], al['lon'])
    pts_pr = _project_location(location)
    indices = tree.query_radius([[pts_pr[0].x,  pts_pr[0].y ]], r = dist)  
    al["groceries"] = len([pois[ind] for ind in indices[0] if pois[ind]["fclass"] in grocer_fclass])
    al["parks"] = len([pois[ind] for ind in indices[0] if pois[ind]["fclass"] in park_fclass])
    return al
    

In [None]:
from multiprocessing import Pool
from itertools import repeat
num_cores = 8
def parallelize_operation(lt, func, dist):
    with Pool(processes=num_cores) as pool:
        results = pool.map(func, zip(lt, [dist] * len(lt)))
    pool.close()
    pool.join()
    return results

In [None]:
results_10mins = parallelize_operation(cal_al, process_location, 805)
results_5mins = parallelize_operation(cal_al, process_location, 402.5)

In [None]:
g  = Counter()
p = Counter()
gop = Counter()

l = Counter()
m = Counter()
lom = Counter()

for item in results_5mins:
    g[item["groceries"]] += 100.0/129296
    p[item["parks"]] += 100./129296
    gop[item["groceries"] + item["parks"]] += 100./129296
    
for item in results_10mins:
    l[item["groceries"]] += 100.0/129296
    m[item["parks"]] += 100./129296
    lom[item["groceries"] + item["parks"]] += 100./129296

In [None]:
del p[0]
df = pd.Series(p)
df.plot(kind='bar', title="% of Active Listings with Parks w/in 5 mins walk")


In [None]:
del g[0]
df = pd.Series(g)
df.plot(kind='bar', title="% of Active Listings with Groceries w/in 5 mins walk")


In [None]:
del gop[0]
df = pd.Series(gop)
df.plot(kind='bar', title="% of Active Listings with Parks or Groceries w/in 5 mins walk")


In [None]:
del l[0]
df = pd.Series(l)
df.plot(kind='bar', title="% of Active Listings with Groceries w/in 10 mins walk")


In [None]:
del m[0]
df = pd.Series(m)
df.plot(kind='bar', title="% of Active Listings with Parks w/in 10 mins walk")


In [None]:
del lom[0]
df = pd.Series(lom)
df.plot(kind='bar', title="% of Active Listings with Groceries or Parks w/in 10 mins walk")


In [25]:
import pickle
def pickle_dump(pyObject, outfile, protocol = 3):
    """Dumps any python object as a pickle file
    Parameters
    ----------
    pyObject: Any Python object
    outfile: Name of the pickle file
    protocol: The pickle protocol to be saved. If pickle is to be read by Python 2.x, protocol = 2 (Default value is 3)

    Returns
    -------
    None
    """
    with open(outfile, 'wb') as f:
        pickle.dump(pyObject, f , protocol)

    print("{} dumped to {}".format("Python Object", outfile))

In [26]:
pickle_dump(tree, 'tree.pickle')
pickle_dump(pathtree, 'pathtree.pickle')
pickle_dump(pois, 'pois.pickle')
pickle_dump(pois_paths, 'pois_paths.pickle')
pickle_dump(path_osmids, 'path_osmids.pickle')

Python Object dumped to tree.pickle
Python Object dumped to pathtree.pickle
Python Object dumped to pois.pickle
Python Object dumped to pois_paths.pickle
Python Object dumped to path_osmids.pickle
