In [1]:
import pandas as pd
import osmnx as ox
from collections import Counter

In [2]:
FILEPATH = "../../pipeline_data/2019_thru_2021/unified_dataset.pkl.gz"

In [3]:
unified_data = pd.read_pickle(FILEPATH)

In [4]:
unified_data.head()

Unnamed: 0,NODE_ID,NODE_LATITUDE,NODE_LONGITUDE,NUMBER OF PERSONS INJURED,NUMBER OF PERSONS KILLED,NUMBER OF PEDESTRIANS INJURED,NUMBER OF PEDESTRIANS KILLED,NUMBER OF CYCLIST INJURED,NUMBER OF CYCLIST KILLED,NUMBER OF MOTORIST INJURED,...,WT09,WT10,WT11,WT13,WT14,WT15,WT16,WT18,WT19,WT22
0,42431470,40.732029,-73.982061,0.0,0.0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,3834459540,40.732079,-73.98218,0.0,0.0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,42431467,40.732699,-73.983646,0.0,0.0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4376122113,40.733,-73.984363,0.0,0.0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4376122112,40.732318,-73.984872,0.0,0.0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
unified_data.columns

Index(['NODE_ID', 'NODE_LATITUDE', 'NODE_LONGITUDE',
       'NUMBER OF PERSONS INJURED', 'NUMBER OF PERSONS KILLED',
       'NUMBER OF PEDESTRIANS INJURED', 'NUMBER OF PEDESTRIANS KILLED',
       'NUMBER OF CYCLIST INJURED', 'NUMBER OF CYCLIST KILLED',
       'NUMBER OF MOTORIST INJURED', 'NUMBER OF MOTORIST KILLED',
       'EVENT_DIST_FROM_NODE', 'EVENT_DATE', 'EVENT_TIME', 'IS_CRASH', 'AWND',
       'PGTM', 'PRCP', 'SNOW', 'SNWD', 'TMAX', 'TMIN', 'TOBS', 'WT01', 'WT02',
       'WT03', 'WT04', 'WT05', 'WT06', 'WT08', 'WT09', 'WT10', 'WT11', 'WT13',
       'WT14', 'WT15', 'WT16', 'WT18', 'WT19', 'WT22'],
      dtype='object')

In [6]:
GRAPH_FILEPATH = "../../pipeline_data/2019_thru_2021/target_map.graphml"
G = ox.io.load_graphml(GRAPH_FILEPATH)

In [7]:
def get_node_crash_attributes(crashes_df: pd.DataFrame, G):
    """
    Get the attributes of each node based on crashes and road types
    """
    num_node_crashes = get_num_node_crashes(G, crashes_df)

    for node, attrs in num_node_crashes.items():
        if node:
            for road_type, num_roads in count_road_types(G, node).items():
                attrs[road_type] = num_roads

    if 0 in num_node_crashes:
        _ = num_node_crashes.pop(0)

    return num_node_crashes


def count_road_types(_graph, node: int):
    """
    Count types of roads at a node

    Args:
        node (int): The node ID

    Returns:
        Dict[str, int]: Dictionary of road types and counts
    """
    roads = get_roads_at_node(_graph, node)

    # Need to unpack, because some returns from OSMnx can be a list of multiple road types
    flat_list_roads = []
    for sublist in roads:
        if type(sublist)==list:
            for item in sublist:
                flat_list_roads.append(item)
        else:
            flat_list_roads.append(sublist)

    return dict(Counter(flat_list_roads))


def get_roads_at_node(_graph, node: int):
    """
    Get all roads that are present (converge) at a given node in graph self._graph

    Args:
        node (int): The node ID

    Returns:
        List[str]: Road types for all edges that meet at the given node
    """
    all_edges = get_node_edges(_graph, node)
    
    roads = []
    
    for edge in all_edges:
        edge_roads = get_edge_road(_graph, edge)
        
        for road in edge_roads:
            roads.append(road)

    return roads


def get_edge_road(_graph, edge: tuple) -> str:
    """
    Get the road type of a given edge

    Args:
        edge (tuple): The tuple that defines an edge

    Returns:
        str: The type of road
    """
    roads = []
    edge_attrs = get_edge_attributes(_graph, edge)
    
    for i in edge_attrs.values():
        roads.append(i.get("highway", "N/A"))
    
    return roads


def get_edge_attributes(_graph, edge: tuple) -> dict:
    """
    Get the attributes of a given edge in graph self._graph

    Args:
        edge (tuple): The tuple that defines an edge

    Returns:
        dict: The attribute dictionary for a given edge. Example is:
        {0: {
                'osmid': [421853954, 421853949],
                'oneway': True,
                'lanes': '5',
                'name': '1st Avenue',
                'highway': 'primary',
                'maxspeed': '25 mph',
                'length': 81.28,
                'geometry': <shapely.geometry.linestring.LineString at 0x7fcd2b43a670>
        }}
    """
    return _graph.get_edge_data(edge[0], edge[1])


def get_node_edges(_graph, node: int):
    """
    Get all nodes at an edge in graph self._graph

    Args:
        node (int): The ID of a given node in the graph

    Returns:
        List[tuple]: List of tuples for each edge that meets at the node
    """
    in_edges = list(_graph.in_edges(node))
    out_edges = list(_graph.out_edges(node))

    #TODO: Could there be repeats between in- and out-edges?

    return in_edges + out_edges

In [8]:
count_road_types(G, 42421828)

{'residential': 5}

In [9]:
len(G.nodes)

2586

In [10]:
node_road_types = {}

for i in G.nodes:
    node_road_types[i] = count_road_types(G, i)

In [11]:
node_road_types = pd.DataFrame.from_dict(node_road_types, orient="index").fillna(0)

In [12]:
node_road_types.head()

Unnamed: 0,residential,service,secondary,primary,cycleway,unclassified,trunk,pedestrian,tertiary,path,secondary_link,living_street,primary_link
42421828,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
42421837,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
42421877,2.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
42421889,2.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
42421941,1.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
len(unified_data)

22794974

In [14]:
unified_with_roads = unified_data.set_index("NODE_ID").join(node_road_types, how="left")

In [None]:
unified_with_roads = unified_with_roads.reset_index()

In [None]:
unified_with_roads.head()

In [None]:
unified_with_roads.to_parquet("../../data/2019_thru_2021_unified_dataset_wroads.parquet")