In [5]:
# data processing
import numpy as np
import pandas as pd

# GIS calculation
import geopandas as gpd

# network model
import networkx as nx

In [6]:
# import lion edges (streets). I extracted a subset of the raw LION dataset within the boundary of Manhattan.
# currently we sampled entire dataset due to submit
gdf_lion_edges = gpd.read_file('../../data/raw/lion/edges_manhattan_sample.geojson')

In [7]:
# drop edges between same nodes
gdf_lion_edges = gdf_lion_edges.loc[gdf_lion_edges.loc[:,'NodeIDFrom'] != gdf_lion_edges.loc[:,'NodeIDTo']]

# drop dupliated edges (edge between same node pairs)
gdf_lion_edges.loc[:,'route'] = gdf_lion_edges.loc[:,['NodeIDFrom', 'NodeIDTo']].apply(lambda x: '~'.join(sorted(x)), axis=1)
gdf_lion_edges = gdf_lion_edges.drop_duplicates(subset='route')

In [8]:
gdf_lion_edges.shape

(4787, 131)

In [9]:
# import the neighborhood boundary (from https://data.cityofnewyork.us/City-Government/Neighborhoods-Boundries/j2bc-fus8)
gdf_neighborhood = gpd.read_file('../../data/raw/boundary/Neighborhoods Boundries.geojson')
gdf_neighborhood = gdf_neighborhood.loc[gdf_neighborhood.loc[:,'boroname'] == 'Manhattan']

In [10]:
def calculate_neighborhood_centrality(neighborhood):
    # crop the lion edges by a certain neighborhood boundary
    gdf_neighborhood_tmp = gdf_neighborhood.loc[gdf_neighborhood.loc[:,'ntaname'] == neighborhood]
    gdf_neighborhood_tmp = gdf_neighborhood_tmp.to_crs(2263)
    gdf_lion_edges_tmp = gpd.sjoin(gdf_lion_edges,gdf_neighborhood_tmp.loc[:,['ntaname','geometry']],
                                   how='inner',
                                   predicate='intersects').drop(['index_right','ntaname'], axis=1)
    
    # create a graph, with length of each edge
    # because I assumed that the street network is undirected single graph. 
    # so some edges will be removed if there are multiple streets between two nodes.
    G_lion_tmp = nx.from_pandas_edgelist(gdf_lion_edges_tmp, 'NodeIDFrom', 'NodeIDTo', edge_attr=['OBJECTID','SHAPE_Length'], create_using=nx.Graph())

    # https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.edge_betweenness_centrality.html#networkx.algorithms.centrality.edge_betweenness_centrality
    betweenness_centrality = nx.edge_betweenness_centrality(G_lion_tmp, weight='SHAPE_Length')

    # add betweeness centrality as a property of the edges
    for k, v in betweenness_centrality.items():
        node_id_from = k[0]
        node_id_to = k[1]
        # add attribute 'edge_id'
        G_lion_tmp[node_id_from][node_id_to]['betweeness'] = v

    # convert the edge info as a dataframe. 
    df_edge_info = pd.DataFrame([value for _,_,value in list(G_lion_tmp.edges(data=True))])

    df_edge_info = df_edge_info.loc[:,['OBJECTID','betweeness']]

    return df_edge_info
    

In [11]:
list_neighborhood_betweeness = []

for i, row in gdf_neighborhood.iterrows():
    neighborhood = row['ntaname']
    df_betweeness_tmp = calculate_neighborhood_centrality(neighborhood)
    list_neighborhood_betweeness.append(df_betweeness_tmp)

In [12]:
df_centrality = pd.concat(list_neighborhood_betweeness, ignore_index=True)

In [13]:
gdf_lion_edges = gdf_lion_edges.merge(df_centrality, on='OBJECTID', how='inner')

In [14]:
#gdf_lion_edges.drop('route', axis=1).to_file('../../data/processed/street_network.geojson', driver='GeoJSON')