In [1]:
import geopandas as gpd 
import pandas as pd
from shapely import ops
import os
import shapely
import numpy as np
import psycopg2
import pandana as pdna
import networkx as nx
import multiprocessing as mp

from geoalchemy2 import Geometry, WKTElement
from sqlalchemy import *

from shapely.geometry import *
shapely.speedups.enable()

%matplotlib inline 

In [2]:
def create_graph(gdf, precision=3, simplify=1):
    '''Create a networkx DiGraph given a GeoDataFrame of lines. Every line will
    correspond to two directional graph edges, one forward, one reverse. The
    original line row and direction will be stored in each edge. Every node
    will be where endpoints meet (determined by being very close together) and
    will store a clockwise ordering of incoming edges.
    '''
    # The geometries sometimes have tiny end parts - get rid of those!
    gdf.geometry = gdf.geom.simplify(simplify)

    #G = nx.DiGraph()
    G = nx.Graph()

    # TODO: converting to string is probably unnecessary - keeping float may be
    # faster
    def make_node(coord, precision):
        return tuple(np.round(coord, precision))

    # Edges are stored as (from, to, data), where from and to are nodes.
    # az1 is the azimuth of the first segment of the geometry (point into the
    # geometry), az2 is for the last segment (pointing out of the geometry)
    def add_edges(row, G):
        geom = row.geom
        coords = list(geom.coords)
        geom_r = LineString(coords[::-1])
        coords_r = geom_r.coords
        start = make_node(coords[0], precision)
        end = make_node(coords[-1], precision)
        # Add forward edge
        fwd_attr ={}
        for k,v in row.items():
            fwd_attr[k]=v
        fwd_attr['forward']= 1
        fwd_attr['geometry']=  geom
        fwd_attr['length']=  geom.length
#         fwd_attr['az1']=  azimuth_cartesian(coords[0], coords[1])
#         fwd_attr['az2']=  azimuth_cartesian(coords[-2], coords[-1])
        fwd_attr['visited']= 0

        G.add_edge(start, end, **fwd_attr)

    gdf.apply(add_edges, axis=1, args=[G])

    return G

### Load the Dataset

Here we will be using the created "pedestrian network" dataset created by the DAV team at the City of Toronto. We'll then create a network using the Networkx library and the "create_graph" function defined in this notebook. 



In [3]:
con = psycopg2.connect(database="walkability", user="postgres", password=1234,
    host="172.19.118.205",port="5432")
pednet= gpd.read_postgis('SELECT * FROM public.pednet100m',con,crs={'init': 'epsg:2019'})
pednet['distance'] = pednet.geom.length
pednet['distance']

0         43.067738
1         91.194929
2         99.314161
3         99.314161
4         72.428198
5         84.412503
6         84.412503
7         32.553727
8         65.396366
9         65.396366
10        61.214948
11        61.214948
12        36.167851
13        79.338196
14        79.338196
15        79.338196
16        58.950245
17        44.613937
18        83.494567
19        31.449426
20        39.110753
21        65.109386
22        45.975187
23        81.833140
24        81.833140
25        92.386100
26        62.449471
27        62.449471
28        70.467203
29        52.874396
            ...    
125232    77.852332
125233    77.852332
125234    77.852332
125235    77.852332
125236    63.155556
125237    97.119663
125238    97.119663
125239    64.353867
125240    64.353867
125241    78.843937
125242    78.843937
125243    96.105674
125244    65.268374
125245    65.268374
125246    50.874493
125247    50.874493
125248    82.732373
125249    70.904821
125250    92.661304


In [4]:
G = create_graph(pednet)
#from G to urbanaccess network model - one node idx,x,y one edge from to weight
#G.edges(data=True)

In [6]:
G.edges

EdgeView([((305109.329, 4832680.58), (305133.627, 4832645.021)), ((305109.329, 4832680.58), (305140.756, 4832705.618)), ((305109.329, 4832680.58), (305081.029, 4832726.042)), ((305133.627, 4832645.021), (305141.349, 4832632.537)), ((305133.627, 4832645.021), (305219.375, 4832670.445)), ((305133.627, 4832645.021), (305051.249, 4832620.041)), ((312795.974, 4834764.008), (312819.343, 4834675.858)), ((312795.974, 4834764.008), (312858.59, 4834781.976)), ((312795.974, 4834764.008), (312727.824, 4834744.597)), ((312819.343, 4834675.858), (312897.217, 4834707.089)), ((312819.343, 4834675.858), (312777.225, 4834658.967)), ((312819.343, 4834675.858), (312834.938, 4834617.033)), ((317881.523, 4837714.668), (317977.432, 4837740.453)), ((317881.523, 4837714.668), (317895.724, 4837667.225)), ((317881.523, 4837714.668), (317865.856, 4837710.456)), ((317881.523, 4837714.668), (317866.584, 4837764.583)), ((317977.432, 4837740.453), (318073.341, 4837766.237)), ((318073.341, 4837766.237), (318085.005, 4

### Take the graph and setup dataframe to load into Urban Access object.

In [5]:
edges = nx.to_pandas_edgelist(G,'from','to')
to = edges['to'].tolist()
fr = edges['from'].tolist()
fr = list(set(fr))
to = list(set(to))
to.extend(fr)
nodes = list(set(to))
nodes = pd.DataFrame(nodes)
nodes.columns=['x', 'y']
nodes['xy'] = nodes.apply(lambda z: (z.x,z.y),axis=1)

In [7]:
edges

Unnamed: 0,from,to,road_type,linear_n_2,gcc_sdwl_1,to_interse,start_node,last_chang,address_r,obs_pts,...,end_node,topo_sdwlk,sdwlk_desc,last_cha_1,edge_id,feature_co,data_autho,length,qa_sdwlk_c,next_left_
0,"(305109.329, 4832680.58)","(305133.627, 4832645.021)",Local,Ballacaine Drive,,13468797.0,44791.0,2019/02/13 13:26:27.760,104-106,,...,4758.0,,Sidewalk on east side only,postgres,52546.0,201500.0,gepure,43.067738,,63165.0
1,"(305109.329, 4832680.58)","(305140.756, 4832705.618)",Local,Minden Crescent,,13468737.0,44791.0,,1-2,,...,44792.0,,No sidewalk on either side,,52544.0,201500.0,gepure,40.180655,,-52544.0
2,"(305109.329, 4832680.58)","(305081.029, 4832726.042)",Local,Ballacaine Drive,,13468760.0,44793.0,2019/02/13 13:26:27.760,92-102,,...,44791.0,,Sidewalk on east side only,postgres,52545.0,201500.0,gepure,53.550327,,52544.0
3,"(305133.627, 4832645.021)","(305141.349, 4832632.537)",Collector,Berry Road,,13468797.0,4758.0,2019/02/11 17:56:37.543,,,...,4759.0,,Sidewalk on both sides,postgres,3494.0,201400.0,gepure,14.679602,,54266.0
4,"(305133.627, 4832645.021)","(305219.375, 4832670.445)",Collector,Berry Road,,13468767.0,4758.0,2019/02/11 17:56:37.543,180-190,,...,4760.0,,Sidewalk on both sides,postgres,63165.0,201400.0,gepure,89.437586,,63166.0
5,"(305133.627, 4832645.021)","(305051.249, 4832620.041)",Collector,Berry Road,,13468853.0,50217.0,,208-220,,...,4758.0,,Sidewalk on both sides,,60905.0,201400.0,gepure,86.082295,,-52546.0
6,"(312795.974, 4834764.008)","(312819.343, 4834675.858)",Local,Kensington Avenue,,13466019.0,27107.0,2019/02/13 13:26:27.760,61-79,,...,27145.0,,Sidewalk on both sides,postgres,27858.0,201500.0,gepure,91.194929,,27962.0
7,"(312795.974, 4834764.008)","(312858.59, 4834781.976)",Local,Baldwin Street,,30017426.0,27107.0,2019/02/13 13:26:27.760,138-146,,...,27156.0,,Sidewalk on both sides,postgres,27874.0,201500.0,gepure,65.143347,,-64861.0
8,"(312795.974, 4834764.008)","(312727.824, 4834744.597)",Local,Baldwin Street,,13466041.0,27084.0,2019/02/13 13:26:27.760,176-198,,...,27107.0,,Sidewalk on both sides,postgres,27798.0,201500.0,gepure,70.860067,,27874.0
9,"(312819.343, 4834675.858)","(312897.217, 4834707.089)",Local,St Andrew Street,,13466122.0,27145.0,2019/02/13 13:26:27.760,10-36,,...,27208.0,,Sidewalk on both sides,postgres,27962.0,201500.0,gepure,83.902629,,81692.0


In [None]:
for ix, node in nodes.iterrows():
    indicies = edges[edges.to == node.xy].index
    edges.loc[indicies,'to'] = ix
    indicies = edges[edges['from'] == node.xy].index
    edges.loc[indicies,'from'] = ix

In [None]:
#pd.DataFrame([edges.length]).T
#edges["to"]
nodes.to_csv("./nodes.csv")

### Create the pandana network with the dataframe - using integer id's for nodes to/from

In [None]:
#Initialize the Pandana network 
#https://github.com/gboeing/urban-data-science/blob/master/20-Accessibility-Walkability/pandana-accessibility-demo-simple.ipynb
import pandana as pdna
from pandana import Network
transit_ped_net = pdna.Network(nodes["x"],
                               nodes["y"],
                               edges["from"],
                               edges["to"],                         
                               pd.DataFrame([edges.length]).T,
                               twoway=True)


In [None]:
import pandana as pdna
from pandana import Network
transit_ped_net1 = pdna.Network(nodes["x"],
                               nodes["y"],
                               edges["from"],
                               edges["to"],                          
                               pd.DataFrame([edges.distance]).T,
                               twoway=True)


### Save the network to load again later

In [None]:
#transit_ped_net.save_hdf5('/media/DATADRIVE/walkability/ped_net_final_.hd5')

transit_ped_net = pdna.Network.from_hdf5('/media/DATADRIVE/walkability/ped_net_final_0213.hd5')


FOLLOW THIS TUTORIAL
--Load building centroids

http://udst.github.io/pandana/tutorial.html

In [None]:
transit_ped_net.precompute(10000)

In [None]:
con = psycopg2.connect(database="walkability", user="postgres", password=1234,
    host="172.19.118.205",port="5432")
schools = gpd.read_postgis('SELECT * FROM compliance.school',con,crs={'init': 'epsg:2019'})
#schools_wgs = schools.to_crs(epsg=4326)
buildings = gpd.read_postgis('SELECT * FROM public.topo_building',con,crs={'init': 'epsg:2019'})
hospitals = gpd.read_postgis('SELECT * FROM compliance.hospital',con,crs={'init': 'epsg:2019'})
librarys = gpd.read_postgis('SELECT * FROM compliance.library',con,crs={'init': 'epsg:2019'})
ttc_stops = gpd.read_postgis('SELECT * FROM compliance.ttc_stop',con,crs={'init': 'epsg:2019'})

#buildings_wgs = buildings.to_crs(epsg=4326)

### Get node_ids of categories

In [None]:


# tmp = [-79.639273,43.580253,-79.113219,43.855442]

# # Reordered for Pandana functions
# bbox = [tmp[1], tmp[0], tmp[3], tmp[2]]

x, y = schools.x, schools.y
schools["node_ids"] = transit_ped_net.get_node_ids(x, y)
#transit_ped_net.init_pois(num_categories=1, max_dist=5000, max_pois=10)
transit_ped_net.set(schools["node_ids"], name="school")

x, y = hospitals.x, hospitals.y
hospitals["node_ids"] = transit_ped_net.get_node_ids(x, y)
#transit_ped_net.init_pois(num_categories=1, max_dist=5000, max_pois=10)
transit_ped_net.set(hospitals["node_ids"], name="hospital")

x, y = librarys.x, librarys.y
librarys["node_ids"] = transit_ped_net.get_node_ids(x, y)
#transit_ped_net.init_pois(num_categories=1, max_dist=5000, max_pois=10)
transit_ped_net.set(librarys["node_ids"], name="library")

x, y = ttc_stops.x, ttc_stops.y
ttc_stops["node_ids"] = transit_ped_net.get_node_ids(x, y)
#transit_ped_net.init_pois(num_categories=1, max_dist=5000, max_pois=10)
transit_ped_net.set(ttc_stops["node_ids"], name="ttc_stop")

# x, y = schools_wgs.x, schools_wgs.y
# schools_wgs["node_ids"] = transit_ped_net1.get_node_ids(x, y)
# #transit_ped_net1.init_pois(num_categories=1, max_dist=5000, max_pois=10)
# transit_ped_net1.set(schools_wgs["node_ids"], name="school")



x, y = buildings.geom.centroid.x, buildings.geom.centroid.y
buildings["node_ids"] = transit_ped_net.get_node_ids(x, y)

# x, y = buildings_wgs.geom.centroid.x, buildings_wgs.geom.centroid.y
# buildings_wgs["node_ids"] = transit_ped_net1.get_node_ids(x, y)



In [None]:
transit_ped_net.poi_category_names

In [None]:
transit_ped_net.set_pois("school",5000, 10, schools.x, schools.y)
school_walk_distances = transit_ped_net.nearest_pois(5000, "school", num_pois=10)

transit_ped_net.set_pois("hospital",5000, 10, hospitals.x, hospitals.y)
hospital_walk_distances = transit_ped_net.nearest_pois(5000, "hospital", num_pois=10)

transit_ped_net.set_pois("library",5000, 10, librarys.x, librarys.y)
library_walk_distances = transit_ped_net.nearest_pois(5000, "library", num_pois=10)

transit_ped_net.set_pois("ttc_stop",5000, 10, ttc_stops.x, ttc_stops.y)
ttc_stop_walk_distances = transit_ped_net.nearest_pois(5000, "ttc_stop", num_pois=10)

In [None]:
school_walk_distances.columns = ['d_sc_'+str(i) for i in range(1,11,1)]
hospital_walk_distances.columns = ['d_hp_'+str(i) for i in range(1,11,1)]
library_walk_distances.columns = ['d_lb_'+str(i) for i in range(1,11,1)]
ttc_stop_walk_distances.columns = ['d_ttcst_'+str(i) for i in range(1,11,1)]

In [None]:
library_walk_distances

In [None]:
#school_walk_distances = ((transit_ped_net.nearest_pois(5000, "school", num_pois=10)).groupby(schools.school_type_desc))


In [None]:
buildings['distance_schools_median'] = np.nan
buildings['minutes_schools_median'] = np.nan
buildings['distance_hospitals_median'] = np.nan
buildings['minutes_hospitals_median'] = np.nan
buildings['distance_librarys_median'] = np.nan
buildings['minutes_librarys_median'] = np.nan
buildings['distance_ttc_stops_median'] = np.nan
buildings['minutes_ttc_stops_median'] = np.nan
c = 0
l = len(buildings.node_ids.unique())
for ix,group in buildings.groupby('node_ids'):
    group
    c +=1
    buildings.loc[group.index,'distance_schools_median'] = school_walk_distances.loc[ix].median(axis=0)
    buildings.loc[group.index,'minutes_schools_median'] = buildings.loc[ix,'minutes_schools_median']/(1.2*60)
    buildings.loc[group.index,'distance_hospitals_median'] = hospital_walk_distances.loc[ix].median(axis=0)
    buildings.loc[group.index,'minutes_hospitals_median'] = buildings.loc[ix,'minutes_hospitals_median']/(1.2*60)
    buildings.loc[group.index,'distance_librarys_median'] = library_walk_distances.loc[ix].median(axis=0)
    buildings.loc[group.index,'minutes_librarys_median'] = buildings.loc[ix,'minutes_librarys_median']/(1.2*60)
    buildings.loc[group.index,'distance_ttc_stops_median'] = ttc_stop_walk_distances.loc[ix].median(axis=0)
    buildings.loc[group.index,'minutes_ttc_stops_median'] = buildings.loc[ix,'minutes_ttc_stops_median']/(1.2*60)

    for i in range(1,11,1):
        buildings.loc[group.index,'d_sc_{}'.format(i)] = school_walk_distances.loc[ix]['d_sc_{}'.format(i)]
        buildings.loc[group.index,'m_sc_{}'.format(i)] = buildings.loc[ix,'d_sc_{}'.format(i)]/(1.2*60)
        buildings.loc[group.index,'d_hp_{}'.format(i)] = hospital_walk_distances.loc[ix]['d_hp_{}'.format(i)]
        buildings.loc[group.index,'m_hp_{}'.format(i)] = buildings.loc[ix,'d_hp_{}'.format(i)]/(1.2*60)
        buildings.loc[group.index,'d_lb_{}'.format(i)] = library_walk_distances.loc[ix]['d_lb_{}'.format(i)]
        buildings.loc[group.index,'m_lb_{}'.format(i)] = buildings.loc[ix,'d_lb_{}'.format(i)]/(1.2*60)
        buildings.loc[group.index,'d_ttcst_{}'.format(i)] = ttc_stop_walk_distances.loc[ix]['d_ttcst_{}'.format(i)]
        buildings.loc[group.index,'m_ttcst_{}'.format(i)] = buildings.loc[ix,'d_ttcst_{}'.format(i)]/(1.2*60)
   

In [None]:
# Creating SQLAlchemy's engine to use
engine = create_engine('postgresql://postgres:1234@172.19.118.205:5432/walkability')

buildings['geometry'] = buildings['geom'].apply(lambda x: WKTElement(x.wkt, srid=2019))

#drop the geometry column as it is now duplicative
blds= buildings.drop('geom', 1, inplace=False)

# Use 'dtype' to specify column's type
# For the geom column, we will use GeoAlchemy's type 'Geometry'
blds.to_sql("walkability_buildings_all0213", engine, if_exists='replace', index=False, 
                         dtype={'geometry': Geometry('MULTIPOLYGON', srid= 2019)})


In [None]:
# buildings['distance_schools_median'] = np.nan
# buildings['minutes_schools_median'] = np.nan

# c = 0
# l = len(buildings.node_ids.unique())
# for ix,group in buildings.groupby('node_ids'):
#     group
#     c +=1
#     buildings.loc[group.index,'distance_schools_median'] = school_walk_distances.loc[ix].median(axis=0)
#     buildings.loc[group.index,'minutes_schools_median'] = buildings.loc[ix,'distance_schools_median']/(1.2*60)
#     for i in range(1,11,1):
#         buildings.loc[group.index,'d_sc_{}'.format(i)] = school_walk_distances.loc[ix]['d_sc_{}'.format(i)]
#         buildings.loc[group.index,'m_sc_{}'.format(i)] = buildings.loc[ix,'d_sc_{}'.format(i)]/(1.2*60)


In [None]:
def create_graph(gdf, precision=3, simplify=1):
    '''Create a networkx DiGraph given a GeoDataFrame of lines. Every line will
    correspond to two directional graph edges, one forward, one reverse. The
    original line row and direction will be stored in each edge. Every node
    will be where endpoints meet (determined by being very close together) and
    will store a clockwise ordering of incoming edges.
    '''
    # The geometries sometimes have tiny end parts - get rid of those!
    gdf.geometry = gdf.geom.simplify(simplify)

    #G = nx.DiGraph()
    G = nx.Graph()

    # TODO: converting to string is probably unnecessary - keeping float may be
    # faster
    def make_node(coord, precision):
        return tuple(np.round(coord, precision))

    # Edges are stored as (from, to, data), where from and to are nodes.
    # az1 is the azimuth of the first segment of the geometry (point into the
    # geometry), az2 is for the last segment (pointing out of the geometry)
    def add_edges(row, G):
        geom = row.geom
        coords = list(geom.coords)
        geom_r = LineString(coords[::-1])
        coords_r = geom_r.coords
        start = make_node(coords[0], precision)
        end = make_node(coords[-1], precision)
        # Add forward edge
        fwd_attr ={}
        for k,v in row.items():
            fwd_attr[k]=v
        fwd_attr['forward']= 1
        fwd_attr['geometry']=  geom
        fwd_attr['length']=  geom.length
#         fwd_attr['az1']=  azimuth_cartesian(coords[0], coords[1])
#         fwd_attr['az2']=  azimuth_cartesian(coords[-2], coords[-1])
        fwd_attr['visited']= 0

        G.add_edge(start, end, **fwd_attr)

    gdf.apply(add_edges, axis=1, args=[G])

    return G

In [16]:
for ix, node in nodes.iterrows():
    indicies = edges[edges.to == node.xy].index
    edges.loc[indicies,'to'] = ix
    indicies = edges[edges['from'] == node.xy].index
    edges.loc[indicies,'from'] = ix

KeyboardInterrupt: 

In [17]:
indicies = edges[edges['from'] == node.xy].index
edges.loc[indicies,'from'] = ix
edges.loc[indicies]

Unnamed: 0,from,to,road_type,linear_n_2,gcc_sdwl_1,to_interse,start_node,last_chang,address_r,obs_pts,...,end_node,topo_sdwlk,sdwlk_desc,last_cha_1,edge_id,feature_co,data_autho,length,qa_sdwlk_c,next_left_
57401,295,"(327950.191, 4851917.697)",Other,75 Alford Crescent,,13442534.0,13572.0,,,,...,12614.0,,Sidewalk on west side only,,9763.0,201600.0,gepure,17.965104,,37535.0
57402,295,"(327890.268, 4851840.543)",Other,75 Alford Crescent,,13442534.0,33697.0,,,,...,13572.0,,Sidewalk on west side only,,38109.0,201600.0,gepure,95.219506,,-71839.0


In [None]:
import pandas as pd
import numpy as np
from multiprocessing import Pool
num_partitions = 16 #number of partitions to split dataframe
num_cores = 16 #number of cores on your machine



def parallelize_dataframe(df, func):
    df_split = np.array_split(df, num_partitions)
    pool = Pool(num_cores)
    df = pd.concat(pool.map(func, df_split))
    pool.close()
    pool.join()
    return df

def set_node_id(edges):
    nodes = pd.read_csv('./nodes.csv')
    for ix, node in nodes.iterrows():
        indicies = edges[edges.to == node.xy].index
        edges.loc[indicies,'to'] = ix
        indicies = edges[edges['from'] == node.xy].index
        edges.loc[indicies,'from'] = ix
    return edges
    
    
edges = parallelize_dataframe(edges, set_node_id)

In [None]:
from multiprocessing import Pool
import time
import itertools

import matplotlib.pyplot as plt
import networkx as nx


def chunks(l, n):
    """Divide a list of nodes `l` in `n` chunks"""
    l_c = iter(l)
    while 1:
        x = tuple(itertools.islice(l_c, n))
        if not x:
            return
        yield x


def _betmap(G_normalized_weight_sources_tuple):
    """Pool for multiprocess only accepts functions with one argument.
    This function uses a tuple as its only argument. We use a named tuple for
    python 3 compatibility, and then unpack it when we send it to
    `betweenness_centrality_source`
    """
    return nx.betweenness_centrality_source(*G_normalized_weight_sources_tuple)


def betweenness_centrality_parallel(G, processes=11):
    """Parallel betweenness centrality  function"""
    p = Pool(processes=processes)
    node_divisor = len(p._pool) * 4
    node_chunks = list(chunks(G.nodes(), int(G.order() / node_divisor)))
    num_chunks = len(node_chunks)
    bt_sc = p.map(_betmap,
                  zip([G] * num_chunks,
                      [True] * num_chunks,
                      [None] * num_chunks,
                      node_chunks))

    # Reduce the partial solutions
    bt_c = bt_sc[0]
    for bt in bt_sc[1:]:
        for n in bt:
            bt_c[n] += bt[n]
    return bt_c


if __name__ == "__main__":
    print("")
    print("Computing betweenness centrality for:")
    print(nx.info(G))
    print("\tParallel version")
    start = time.time()
    bt = betweenness_centrality_parallel(G)
    print("\t\tTime: %.4F" % (time.time() - start))
    print("\t\tBetweenness centrality for node 0: %.5f" % (bt[0]))
#     print("\tNon-Parallel version")
#     start = time.time()
#     bt = nx.betweenness_centrality(G)
#     print("\t\tTime: %.4F seconds" % (time.time() - start))
#     print("\t\tBetweenness centrality for node 0: %.5f" % (bt[0]))


In [None]:
print("\t\tBetweenness centrality for node 0: %.5f" % (bt[0]))
