In [3]:
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib.pyplot as plt
import networkx as nx
import osmnx as ox
import pandas as pd
import geopandas as gpd
from IPython.display import IFrame
%matplotlib inline

# turn response caching on and turn on logging to your terminal window
ox.config(log_console=True, use_cache=True)

ox.__version__

'1.0.1'

## Get network with OSMnx

OSMnx lets you download street network data and build topologically-corrected street networks, project and plot the networks, and save the street network as SVGs, GraphML files, GeoPackages, or shapefiles for later use. The street networks are directed and preserve one-way directionality.

You can download a street network by providing OSMnx any of the following:
  - a bounding box
  - a lat-long point plus a distance
  - an address plus a distance
  - a place name or list of place names (to automatically geocode and get the boundary of)
  - a polygon of the desired street network's boundaries
  - a .osm formatted xml file
  
You can also specify several different network types:
  - 'drive' - get drivable public streets (but not service roads)
  - 'drive_service' - get drivable streets, including service roads
  - 'walk' - get all streets and paths that pedestrians can use (this network type ignores one-way directionality)
  - 'bike' - get all streets and paths that cyclists can use
  - 'all' - download all non-private OSM streets and paths (this is the default network type unless you specify a different one)
  - 'all_private' - download all OSM streets and paths, including private-access ones

In the following, we define a function to download the graph for the city of Mérida, Yucatán, or, if already present, load it as an NetworkX graph object.

In [83]:
import pathlib

filepath = pathlib.Path('./networks/merida-kanasin-road.graphml')

if filepath.exists():
    G = ox.load_graphml(filepath)
else:
    # get drivable public streets network, aka road network, without service roads,
    # e.g. private, parking lots, etc.
    # use retain_all if you want to keep all disconnected subgraphs (e.g. when your places aren't adjacent)
    # TODO: is there away to know wich version of the graph are we using? And update the graph is needed?
    # TODO: I would like to visualize different network options, service roads or not, keep_all or not
    # TODO: It would be nice to setup up a polygon for the city and its surrounding areas, to be sure
    # exactly the location.
    places = [{'city' : 'Merida',
             'state' : 'Yucatan',
             'country' : 'Mexico'},
             {'city' : 'Kanasin',
             'state' : 'Yucatan',
             'country' : 'Mexico'}]
    G = ox.graph_from_place(places, network_type='drive')
    ox.save_graphml(G, filepath=file, gephi=False)

TODO: Discuss the graph objects, nodes and edges, and what attributes do they posses. Print an example of each.

Some general graph properties.

In [86]:
ox.save_graphml(G, filepath='./networks/merida-kanasin-road-gephi.graphml', gephi=True)

In [84]:
type(G)

networkx.classes.multidigraph.MultiDiGraph

In [85]:
print(f"The road network has {G.number_of_edges()} edges and {G.number_of_nodes()} nodes")

The road network has 80160 edges and 29324 nodes


Nodes are street intersections, with y and x coordinates and the number of streets intersecting the node, and other potential attributes. 

Nodes are indexed by an integer value.

In [53]:
G.nodes[287026838]

{'y': 21.0118318, 'x': -89.6135586, 'street_count': 3}

We now list all attributes present in the graph nodes:

In [33]:
from itertools import chain

set(chain.from_iterable(d.keys() for *_, d in G.nodes(data=True)))

{'highway', 'street_count', 'x', 'y'}

We have a highway attribute, how many nodes have this attribute?

In [37]:
h_count = 0
for node, attr in G.nodes.items():
    if 'highway' in attr.keys():
        h_count += 1
h_count

600

What are its possible values?

In [48]:
set(nx.get_node_attributes(G, 'highway').values())

{'crossing',
 'mini_roundabout',
 'passing_place',
 'traffic_signals',
 'turning_circle',
 'turning_loop'}

Edges are roads and are indexed by their starting and ending nodes, and a zero(TODO; what is that zero?). They are tuple objects.

OK, the zero is the edge key, since in a multigraph we can have potentially more than one edge between a pair of nodes, each edge is identofied by a key.

TODO: Determine is this is indeed a multigraph. Are more than one edge for any pair of nodes? Use the key.

In [71]:
for u in G.nodes():
    for neighbor in G.neighbors(u):
        if G.number_of_edges(u, neighbor) > 2:
            print (u,neighbor)

2028847780 2028847782
2028847782 2028847780


In [76]:
G.edges[2028847780, 2028847782, 0]

{'osmid': 443843678,
 'name': 'Calle 32',
 'highway': 'residential',
 'oneway': False,
 'length': 53.545}

In [78]:
G.edges[2028847782, 2028847780, 1]

{'osmid': [369316954, 369316955, 369316951],
 'name': ['Calle 39-A', 'Calle 30', 'Calle 39B-1'],
 'highway': 'residential',
 'oneway': False,
 'length': 188.21699999999998,
 'geometry': <shapely.geometry.linestring.LineString at 0x7f64680b56a0>}

We now list all available attributes.

In [50]:
set(chain.from_iterable(d.keys() for *_, d in G.edges(data=True)))

{'access',
 'bridge',
 'geometry',
 'highway',
 'junction',
 'lanes',
 'length',
 'maxspeed',
 'name',
 'oneway',
 'osmid',
 'ref',
 'tunnel',
 'width'}

TODO: Explore what are each of these, their possible values and units, the projection in case of the geometry, and how many nodes posses each attribute (or fraction of total nodes) to give an idea of how useful they are.

TODO: Visualize tha graph with all attributes either printed or as an overlay.

In [96]:
m1 = ox.plot_graph_folium(G, popup_attribute='name', weight=2, color='#8b0000')

In [99]:
folium.LayerControl().add_to(m1)
m1.save('osmnx-folium.html')

In [95]:
import folium
from folium.plugins import FastMarkerCluster

m = folium.Map(location=[20.9674, -89.5926], zoom_start=11.5)

m

## Importing shapefiles containing polygons of Colonias de Mexico using GeoPandas (http://datamx.io/dataset/colonias-mexico)

In order to perform some analysis involving how data metrics change with scale, we import the neiborhoods of the city as shapefiles to create a subgraph for each different neigborhood.

In [5]:
#read the files from the Colonias folder using geopandas just as if we were using only pandas
colonias = gpd.read_file('input_data/coloniasmexico/Colonias/')

#query to filter only the colonias from Merida municipality of Yucatan state
colonias_yucatan_merida = colonias[(colonias['ST_NAME']=='YUCATAN') & (colonias['MUN_NAME'] == 'MÉRIDA')]

#query to filter only the colonias from Kanasin municipality of Yucatan state
colonias_yucatan_kanasin = colonias[(colonias['ST_NAME']=='YUCATAN') & (colonias['MUN_NAME'] == 'KANASÍN')]

colonias_yucatan_merida_kanasin = pd.concat([colonias_yucatan_merida, colonias_yucatan_kanasin])

colonias_yucatan_merida_kanasin.head(3)

Unnamed: 0,OBJECTID,POSTALCODE,ST_NAME,MUN_NAME,SETT_NAME,SETT_TYPE,AREA,Shape_Leng,Shape_Area,geometry
56515,56516,97000,YUCATAN,MÉRIDA,BARRIO SAN SEBASTIAN,FRACCIONAMIENTO,724836804.0,0.02866,3.8e-05,"POLYGON Z ((-89.63250 20.96072 0.00000, -89.63..."
56516,56517,97000,YUCATAN,MÉRIDA,BARRIO SANTA ANA,FRACCIONAMIENTO,724836804.0,0.031112,5.1e-05,"POLYGON Z ((-89.62424 20.97568 0.00000, -89.62..."
56517,56518,97000,YUCATAN,MÉRIDA,BARRIO SANTIAGO,FRACCIONAMIENTO,724836804.0,0.023338,3.4e-05,"POLYGON Z ((-89.62914 20.96978 0.00000, -89.62..."


The reference system used in the neiborhoods shpefile is WGS 84 (explain a little about this). It seems to me, it is not too important, as nodes are geolocated, but may be important when we define out own polygons to query subregions.

In [41]:
#view info of the Coordinate Reference System (CRS)
colonias_yucatan_merida_kanasin.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

TODO: Display a visualization of the colonias and the road network.

TODO: Create or obtain polygons for the main zones of the city, as in http://isla.merida.gob.mx/serviciosinternet/ordenamientoterritorial/docs/estrategico/E_01_ZONAS__PRIMARIAS.jpg.
O for the 5 main zones, north, south, east, west and center.

Also consider: http://isla.merida.gob.mx/serviciosinternet/ordenamientoterritorial/paginas/pmdu.phpx
Check what kind of data can we obtain from there.

## Creating vector of measures for each Colonia as subgraph

In [36]:
#extract nodes as a geodataframe
nodes = ox.graph_to_gdfs(G, edges=False)

In [37]:
nodes.head()

Unnamed: 0_level_0,y,x,street_count,highway,geometry
osmid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
286974477,20.900216,-89.509268,4,,POINT (-89.50927 20.90022)
286974740,20.905018,-89.526717,3,,POINT (-89.52672 20.90502)
286974743,20.924688,-89.56015,4,,POINT (-89.56015 20.92469)
287020194,20.896659,-89.647519,3,,POINT (-89.64752 20.89666)
287020195,20.908412,-89.625401,3,,POINT (-89.62540 20.90841)


In [42]:
nodes.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [38]:
#extract edges as a geodataframe
edges = ox.graph_to_gdfs(G, nodes=False)

In [39]:
edges.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,osmid,oneway,ref,highway,maxspeed,length,geometry,name,lanes,bridge,junction,tunnel,width,access
u,v,key,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
286974477,5443055966,0,42683673,True,MEX 188,primary,90.0,173.53,"LINESTRING (-89.50927 20.90022, -89.50960 20.8...",,,,,,,
286974477,2198242153,0,209633188,False,,residential,,829.226,"LINESTRING (-89.50927 20.90022, -89.50946 20.9...",,,,,,,
286974740,2198242162,0,27528695,False,,tertiary,,518.353,"LINESTRING (-89.52672 20.90502, -89.52500 20.9...",,,,,,,
286974740,5011841588,0,"[786072812, 42683694]",False,,tertiary,,1679.496,"LINESTRING (-89.52672 20.90502, -89.52771 20.9...",Calle 21,,,,,,
286974740,1889020903,0,42683694,False,,tertiary,,973.604,"LINESTRING (-89.52672 20.90502, -89.52636 20.9...",,,,,,,


In [43]:
edges.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

### Measures

#### Basic stats:

<ul>
    <li>n = number of nodes in the graph</li>
    <li>m = number of edges in the graph </li>
    <li>k_avg = average node degree of the graph </li>
    <li> intersection_count = number of intersections in graph, that is,
    nodes with >1 physical street connected to them </li>
    <li>streets_per_node_avg = how many physical streets (edges in the
    undirected representation of the graph) connect to each node (ie, intersection or dead-end) on average (mean) </li>
    <li>streets_per_node_counts = dict with keys of number of physical
    streets connecting to a node, and values of number of nodes with this count </li>
    <li>streets_per_node_proportion = dict, same as previous, but as a
    proportion of the total, rather than counts </li>
    <li>edge_length_total = sum of all edge lengths in graph, in meters </li>
    <li>edge_length_avg = mean edge length in the graph, in meters </li>
    <li>street_length_total = sum of all edges in the undirected
    representation of the graph </li>
    <li>street_length_avg = mean edge length in the undirected
    representation of the graph, in meters </li>
    <li>street_segments_count = number of edges in the undirected
    representation of the graph</li>
    <li>node_density_km = n divided by area in square kilometers</li>
    <li>intersection_density_km = intersection_count divided by area in
    square kilometers</li>
    <li>edge_density_km = edge_length_total divided by area in square
    kilometers</li>
    <li>street_density_km = street_length_total divided by area in square
    kilometers</li>
    <li>circuity_avg = edge_length_total divided by the sum of the great
    circle distances between the nodes of each edge</li>
    <li>self_loop_proportion = proportion of edges that have a single node
    as its endpoints (ie, the edge links nodes u and v, and u==v)</li>
    <li>clean_intersection_count = number of intersections in street
    network, merging complex ones into single points</li>
    <li>clean_intersection_density_km = clean_intersection_count divided
    by area in square kilometers</li>
</ul>

#### Extended stats:

<ul>
    <li>avg_neighbor_degree = Mean degree of nodes in the neighborhood of each node</li>
    <li>avg_neighbor_degree_avg = Mean of all average neighborhood degrees in network</li>
    <li>avg_weighted_neighbor_degree = Mean degree of nodes in the neighborhood of each node, weighted by edge length</li>
    <li>avg_weighted_neighbor_degree_avg = Mean of all weighted average neighborhood degrees in network</li>
    <li>degree_centrality = Fraction of nodes that each node is connected to</li>
    <li>degree_centrality_avg = Mean of all degree centralities in network</li>
    <li>clustering_coefficient = Extent to which node's neighborhood forms a complete graph</li>
    <li>clustering_coefficient_avg</li>
    <li>clustering_coefficient_weighted = Extent to which node's neighborhood forms a complete graph, weighted by edge length</li>
    <li>clustering_coefficient_weighted_avg = Mean of weighted clustering coefficients of all nodes in network</li>
    <li>pagerank = Ranking of nodes based on structure of incoming edges</li>
    <li>pagerank_max_node = Node with the maximum PageRank</li>
    <li>pagerank_max = Highest PageRank value of any node in the graph</li>
    <li>pagerank_min_node = Node with the minimum PageRank</li>
    <li>pagerank_min = Lowest PageRank value of any node in the graph</li>
    <li>node_connectivity = Minimum number of nodes that must be removed to disconnect network</li>
    <li>node_connectivity_avg = Expected number of nodes that must be removed to	disconnect randomly selected pair of non-adjacent nodes</li>
    <li>edge_connectivity = Minimum number of edges that must be removed to disconnect network</li>
    <li>eccentricity = For each node, the maximum distance from it to all other nodes, weighted by length</li>
    <li>diameter = Maximum eccentricity of any node in network</li>
    <li>radius = Minimum eccentricity of any node in network</li>
    <li>center = Set of all nodes whose eccentricity equals the radius</li>
    <li>periphery = Set of all nodes whose eccentricity equals the diameter</li>
    <li>closeness_centrality = For each node, the reciprocal of the sum of the distance from the node to all other nodes in the graph, weighted by length</li>
    <li>closeness_centrality_avg = Mean of all the closeness centralities of all the nodes in network</li>
    <li>betweenness_centrality = For each node, the fraction of all shortest paths that pass through the node</li>
    <li>betweenness_centrality_avg = Mean of all the betweenness centralities of all the nodes in network</li>
</ul>

In [40]:
# for each colonia polygon, find intersecting nodes then induce subgraph
j = 0
subgraphs_colonias = [] #vector for storing subgraphs
stats = [] #vector for storing stats from osmnx
vec = [] #vector for storing stats both basic and extended; only the values
for i in range(colonias_yucatan_merida_kanasin.size):
    j = j+1
    if j == 10: #only the first 10 colonia; just for testing
        break
        
    #polygon = colonias_yucatan_merida.iloc[i].geometry
    #colonia_name = colonias_yucatan_merida.iloc[i].SETT_NAME
    #mun_name = colonias_yucatan_merida.iloc[i].MUN_NAME
    #ob_id = colonias_yucatan_merida.iloc[i].OBJECTID

    
    try: 
        #try/except for handling empty graphs; 
        #if the graph is empty, NetworkX raises an error and stops the loop;
        #this code ignores the empty graphs in the except statement
        intersecting_nodes = nodes[nodes.intersects(polygon)].index
        G_sub = G.subgraph(intersecting_nodes)
        subgraphs_colonias.append(G_sub)
        stat = ox.basic_stats(G_sub, 
                              area=None, #the land area of this study site, in square meters. must be greater than 0. if None, will skip all density-based metrics.
                              clean_intersects=False, #if True, calculate consolidated intersections count (and density, if area is provided) via consolidate_intersections function
                              circuity_dist='gc') #‘gc’ or ‘euclidean’, how to calculate straight-line distances for circuity measurement; use former for lat-lng networks and latter for projected networks
        extended_stat = ox.extended_stats(G_sub, 
                                          connectivity=False, #if True, calculate node and edge connectivity
                                          anc=False, #if True, calculate average node connectivity
                                          ecc=True, #calculate shortest paths, eccentricity, and topological metrics that use eccentricity
                                          bc=True, #calculate node betweenness centrality
                                          cc=True) #calculate node closeness centrality
        for key, value in extended_stat.items():
            stat[key] = value #insert the extended stats values
        vec = [stat[elem] for elem in stat if isinstance(stat[elem], dict) == False] #add only the values except the dict type
        stats.append(vec)
        #print(ob_id + ", " + colonia_name + ", " + mun_name)
        #fig, ax = ox.plot_graph(G_sub)
    except:
        pass