# Construct Street Network

In [1]:
%matplotlib inline

import osmnx as ox
import matplotlib.pyplot as plt
import numpy as np
import geopandas  as gpd
import pandas as pd
ox.config(use_cache=True, log_console=False)
%matplotlib inline
import h3 as h3

# import aws libraries
import boto3
# import awswrangler

import warnings
with warnings.catch_warnings():
    warnings.simplefilter('ignore')

import os
from os.path import isfile, join
from pathlib import Path
from os import listdir
import os
os.getcwd()

path =  Path(os.getcwd())
root = path.parent.absolute()

h3_level = 8
root



PosixPath('/home/ubuntu')

If you cannot import AWS Wrangler due to the error `AttributeError: module 'multiprocessing' has no attribute 'connection'` then try downgrading some of the dependencies by using: `pip install fsspec==0.6.3 PyAthena==1.10.2 s3fs==0.4.0`

In [2]:
# pip install fsspec==0.6.3 PyAthena==1.10.2 s3fs==0.4.0

## Overview
This notebook is computationally expensive because a street network of edges and nodes is being constructed and is meant to be ran on an EC2 Instance. Most cells can be ran on a local machine but the linux commands for writing from EC2 to S3 will only work when connected to EC2.

## 1. Generate network nodes
Here we create the driving network within the city of Los Angeles and it will construct the street network.

### 1.1 Construct graph of Los Angeles County

In [3]:
# Downlaod the street network of Los Angeles County
G = ox.graph_from_place('Los Angeles County, CA, USA', network_type='drive')

Next, we consolidate intersections and rebuild graph topology which reconnects edge geometries to the new consolidated nodes

In [4]:
# get a GeoSeries of consolidated intersections
Gc = ox.consolidate_intersections(ox.project_graph(G))

Next, convert the MultiDiGraph to a GeoPandas dataframe.

In [5]:
nodes = ox.graph_to_gdfs(Gc, edges=False)
nodes[['x', 'y', 'lat', 'lon']]

Unnamed: 0_level_0,x,y,lat,lon
osmid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,372767.760379,3.758563e+06,,
1,372927.760839,3.758561e+06,,
2,372762.708764,3.758162e+06,33.956275,-118.377049
3,370663.102590,3.761865e+06,33.989402,-118.400314
4,370529.299423,3.762078e+06,33.991310,-118.401794
...,...,...,...,...
128559,352199.027842,3.810134e+06,34.422121,-118.608404
128560,352169.884566,3.810068e+06,34.421515,-118.608710
128561,352185.418027,3.810103e+06,,
128562,357448.978488,3.809165e+06,34.414119,-118.551130


Let's view some of the nodes and the unique highways.

In [6]:
display(nodes.head())

Unnamed: 0_level_0,osmid_original,x,y,street_count,highway,lon,lat,ref,geometry
osmid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,"[653656, 581237354]",372767.760379,3758563.0,0,,,,,POINT (372767.760 3758563.017)
1,"[123189012, 581239002]",372927.760839,3758561.0,0,,,,,POINT (372927.761 3758561.436)
2,1718756337,372762.708764,3758162.0,3,traffic_signals,-118.377049,33.956275,,POINT (372762.709 3758162.031)
3,653681,370663.10259,3761865.0,3,,-118.400314,33.989402,,POINT (370663.103 3761864.529)
4,26427612,370529.299423,3762078.0,3,,-118.401794,33.99131,,POINT (370529.299 3762077.999)


In [7]:
nodes.shape

(128564, 9)

### 1.2 Create shapefile from network of nodes

In [11]:
nodes.to_file(root / 'X.data' / 'nodes_and_edges' / 'la_county_nodes' / 'la_county_nodes.shp')

  nodes.to_file(root / 'X.data' / 'nodes_and_edges' / 'la_county_nodes' / 'la_county_nodes.shp')


### 1.3 Copy all node shapefiles from EC2 to S3 bucket
AWS CLI has to be installed on Ubuntu in order for this to work and can be installed via `sudo apt-get install awscli` in the terminal.

In [14]:
!aws s3 sync /home/ubuntu/X.data/nodes_and_edges/la_county_nodes s3://traffic-data-bucket/nodes_and_edges/la_county_nodes

upload: ../X.data/nodes_and_edges/la_county_nodes/la_county_nodes.cpg to s3://traffic-data-bucket/nodes_and_edges/la_county_nodes/la_county_nodes.cpg
upload: ../X.data/nodes_and_edges/la_county_nodes/la_county_nodes.prj to s3://traffic-data-bucket/nodes_and_edges/la_county_nodes/la_county_nodes.prj
upload: ../X.data/nodes_and_edges/la_county_nodes/la_county_nodes.shx to s3://traffic-data-bucket/nodes_and_edges/la_county_nodes/la_county_nodes.shx
upload: ../X.data/nodes_and_edges/la_county_nodes/la_county_nodes.shp to s3://traffic-data-bucket/nodes_and_edges/la_county_nodes/la_county_nodes.shp
upload: ../X.data/nodes_and_edges/la_county_nodes/la_county_nodes.dbf to s3://traffic-data-bucket/nodes_and_edges/la_county_nodes/la_county_nodes.dbf


### 1.4 Apply the hexagon id based on the latitude and longitude for each point

In [15]:
def lat_lng_to_h3(row):
    return h3.geo_to_h3(row.lat, row.lon, h3_level)


nodes['hex_id'] = nodes.apply(lat_lng_to_h3, axis=1)

In [16]:
nodes.sample(2)

Unnamed: 0_level_0,osmid_original,x,y,street_count,highway,lon,lat,ref,geometry,hex_id
osmid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
45779,122893558,365437.239771,3788949.0,3,,-118.461071,34.232934,,POINT (365437.240 3788949.051),8829a188a1fffff
110958,1109831795,415921.290271,3776085.0,3,,-117.91175,34.12224,,POINT (415921.290 3776085.084),8829a1d82bfffff


In [17]:
nodes.highway.value_counts()

traffic_signals             6967
stop                        5037
motorway_junction           1513
turning_circle                51
mini_roundabout               22
crossing                      13
turning_loop                   5
give_way                       3
traffic_signals;crossing       2
milestone                      1
trailhead                      1
Name: highway, dtype: int64

### 1.5 Get counts of the number of highways and streets within the network

In [18]:
highway_cnts = nodes.groupby(['hex_id', 'highway']).hex_id.agg('count').to_frame('count').reset_index()
highway_cnts.sample(3)

Unnamed: 0,hex_id,highway,count
2569,8829a1d1b9fffff,stop,1
2080,8829a1c207fffff,traffic_signals,4
330,8829a1120dfffff,stop,2


In [19]:
street_count_cnts = nodes.groupby(['hex_id', 'street_count']).hex_id.agg('count').to_frame('count').reset_index()
street_count_cnts.sample(3)

Unnamed: 0,hex_id,street_count,count
1781,8829a11547fffff,4,2
74,8829a03605fffff,3,13
34,8829a02593fffff,3,1


### 1.6 Write highway and street counts data to CSV and upload to S3 bucket

In [28]:
highway_cnts.to_csv('s3://traffic-data-bucket/nodes_and_edges/nodes_highway_cnts.csv')

In [30]:
street_count_cnts.to_csv('s3://traffic-data-bucket/nodes_and_edges/nodes_street_count_cnts.csv')

In [31]:
del nodes

## 2. Generate network edges
### 2.1 Construct graph of Los Angeles County and add edge speeds

In [2]:
#G = ox.graph_from_place('Long Beach, CA, USA', network_type='drive')
G = ox.graph_from_place('Los Angeles County, CA, USA', network_type='drive')
G = ox.add_edge_speeds(G)

In [3]:
import warnings
warnings.filterwarnings('ignore')
Gc = ox.consolidate_intersections(ox.project_graph(G))

### 2.2 Convert MultiDiGraph to a GeoDataframe of nodes

In [4]:
df_nodes = ox.graph_to_gdfs(Gc, edges=False)

In [5]:
print(df_nodes.shape)
len(df_nodes.osmid_original.unique())

(128563, 9)


128563

### 2.3 Convert MultiDiGraph to a GeoDataframe of edges

In [6]:
df_edges = ox.graph_to_gdfs(Gc, nodes=False)

### 2.4 Get counts for highways and bridges

In [7]:
df_nodes.highway.value_counts()

traffic_signals             6966
stop                        5037
motorway_junction           1513
turning_circle                51
mini_roundabout               22
crossing                      13
turning_loop                   5
give_way                       3
traffic_signals;crossing       2
milestone                      1
trailhead                      1
Name: highway, dtype: int64

In [8]:
df_edges.bridge.value_counts()

yes               5215
viaduct             12
[yes, viaduct]       9
aqueduct             2
Name: bridge, dtype: int64

In [9]:
df_edges.shape

(372407, 19)

In [10]:
#len(df_edges['osmid'].unique())

## 3. Feature engineering
### 3.1 Create functions to create...?

In [11]:
import re

def get_max(var):
    max_var = int(0)
    if var is np.nan:
        max_var = var
    
    elif isinstance(var, list):
        for a_string in var:
            a_string = str(a_string)
            numbers = [int(word) for word in a_string.split() if word.isdigit()]
            var_int = int(numbers[0])
            if var_int > max_var:
                max_var = var_int       

    else:
        var = str(var)
        numbers = [int(word) for word in var.split() if word.isdigit()]
        max_var = int(numbers[0])

    return(max_var)

def get_max_float(var):
    max_var = float(0)
    if var is np.nan:
        max_var = var
    
    elif isinstance(var, list):
        for a_string in var:
            a_string = str(a_string)
            a_string = re.findall(r'\d*\.?\d+', a_string)
            var_float = float(a_string[0])
            if var_float > max_var:
                max_var = var_float       
    else:
        var = re.findall(r'\d*\.?\d+', var)
        max_var = float(var[0])
    return(max_var)


def get_first(var):
    
    if var is np.nan:
        first_var = var
    
    elif isinstance(var, list):
        first_var = str(var[0])
    else:
        first_var = str(var)
    return(first_var)


In [12]:
df_edges.sample(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,osmid,name,highway,oneway,reversed,length,speed_kph,lanes,geometry,u_original,v_original,ref,maxspeed,bridge,access,junction,tunnel,service,width
u,v,key,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
93219,93272,0,884834591,95th Street E,residential,False,False,115.499,40.6,,"LINESTRING (411931.463 3831666.645, 411930.833...",8228398803,123321056,,,,,,,,
128036,127296,0,13306303,Big Horn Walk,residential,False,False,38.156,40.6,,"LINESTRING (357623.994 3810795.256, 357662.228...",2452005151,2452005154,,,,,,,,
32568,2949,0,159001982,Cedartree Road,residential,False,False,108.781,40.6,,"LINESTRING (397878.493 3757859.168, 397910.886...",2273989628,1711114262,,,,,,,,


In [13]:
cols_to_keep = ['osmid', 'speed_kph', 'ref', 'name', 'highway','oneway','length','lanes','maxspeed','bridge','access','junction','tunnel', 'geometry', ]

df_edges.reset_index(inplace = True, drop = True)
df_edges = df_edges[cols_to_keep]

display(df_edges.sample(2))

Unnamed: 0,osmid,speed_kph,ref,name,highway,oneway,length,lanes,maxspeed,bridge,access,junction,tunnel,geometry
297621,13303529,40.6,,Florinda Avenue,residential,False,246.677,,,,,,,"LINESTRING (405185.006 3773791.190, 405198.002..."
86565,13359036,40.6,,South Orchard Avenue,residential,False,130.311,,,,,,,"LINESTRING (380882.298 3749914.253, 380884.545..."


In [14]:
df_edges.dtypes

osmid          object
speed_kph     float64
ref            object
name           object
highway        object
oneway           bool
length        float64
lanes          object
maxspeed       object
bridge         object
access         object
junction       object
tunnel         object
geometry     geometry
dtype: object

### 3.2 Add traffic and road features to edges dataset

In [16]:
df_edges['maxspeed'] = df_edges.apply(lambda x: get_max(x.maxspeed), axis=1)

In [17]:
#df_edges['width'] = df_edges.apply(lambda x: get_max(x.width), axis=1)

In [18]:
df_edges['lanes'] = df_edges.apply(lambda x: get_max(x.lanes), axis=1)

In [19]:
df_edges['bridge'] = df_edges.apply(lambda x: get_first(x.bridge), axis=1)

In [20]:
df_edges['speed_kph'] = df_edges.apply(lambda x: get_first(x.speed_kph), axis=1)

In [21]:
df_edges['name'] = df_edges.apply(lambda x: get_first(x['name']), axis=1)

In [22]:
df_edges['osmid'] = df_edges.apply(lambda x: get_first(x.osmid), axis=1)

In [23]:
df_edges['ref'] = df_edges.apply(lambda x: get_first(x.ref), axis=1)

In [24]:
df_edges['highway'] = df_edges.apply(lambda x: get_first(x.highway), axis=1)

In [25]:
df_edges['oneway'] = df_edges.apply(lambda x: get_first(x.oneway), axis=1)

In [26]:
df_edges['access'] = df_edges.apply(lambda x: get_first(x.access), axis=1)

In [27]:
df_edges['junction'] = df_edges.apply(lambda x: get_first(x.junction), axis=1)

In [28]:
df_edges['tunnel'] = df_edges.apply(lambda x: get_first(x.tunnel), axis=1)

In [29]:
df_edges.sample(3)

Unnamed: 0,osmid,speed_kph,ref,name,highway,oneway,length,lanes,maxspeed,bridge,access,junction,tunnel,geometry
226644,13454485,40.6,,North Sycamore Avenue,residential,False,202.45,2.0,,,,,,"LINESTRING (376158.847 3773631.399, 376158.876..."
78053,13395387,40.6,,Louise Avenue,residential,False,253.157,2.0,,,,,,"LINESTRING (390752.000 3754140.464, 390762.881..."
210838,13351103,40.6,,East 32nd Street,residential,False,339.703,,,,,,,"LINESTRING (382890.635 3764967.435, 382722.288..."


### 3.3 Create shapefile from network of edges

In [30]:
df_edges.to_file(root / 'X.data' /  'nodes_and_edges' / 'la_county_edges' / 'la_county_edges.shp', index=False)

### 3.4 Copy all edge shapefiles from EC2 to S3 bucket

In [31]:
!aws s3 sync /home/ubuntu/X.data/nodes_and_edges/la_county_edges s3://traffic-data-bucket/nodes_and_edges/la_county_edges

upload: ../X.data/nodes_and_edges/la_county_edges/la_county_edges.cpg to s3://traffic-data-bucket/nodes_and_edges/la_county_edges/la_county_edges.cpg
upload: ../X.data/nodes_and_edges/la_county_edges/la_county_edges.prj to s3://traffic-data-bucket/nodes_and_edges/la_county_edges/la_county_edges.prj
upload: ../X.data/nodes_and_edges/la_county_edges/la_county_edges.shx to s3://traffic-data-bucket/nodes_and_edges/la_county_edges/la_county_edges.shx
upload: ../X.data/nodes_and_edges/la_county_edges/la_county_edges.shp to s3://traffic-data-bucket/nodes_and_edges/la_county_edges/la_county_edges.shp
upload: ../X.data/nodes_and_edges/la_county_edges/la_county_edges.dbf to s3://traffic-data-bucket/nodes_and_edges/la_county_edges/la_county_edges.dbf


In [32]:
df_edges.shape

(372407, 14)