In [1]:
import osmnx as ox, networkx as nx, time, pandas as pd, numpy as np, matplotlib.cm as cm, matplotlib.pyplot as plt
import geopandas as gpd
from shapely.geometry import Polygon
ox.config(log_console=True, use_cache=True)
%matplotlib inline

In [2]:
wgs84 = {'init':'epsg:4326'}
gdf = gpd.read_file('input_data/ZillowNeighborhoods-CA/')

In [3]:
sf = gdf[gdf['CITY']=='San Francisco']
sf.head()

Unnamed: 0,CITY,COUNTY,NAME,REGIONID,STATE,geometry
789,San Francisco,San Francisco,Bayview,272885.0,CA,"POLYGON ((-122.380496615061 37.7507156475919, ..."
790,San Francisco,San Francisco,Bernal Heights,268020.0,CA,"POLYGON ((-122.403862539662 37.7494769720709, ..."
791,San Francisco,San Francisco,Castro-Upper Market,276241.0,CA,"POLYGON ((-122.426029676707 37.7697778521009, ..."
792,San Francisco,San Francisco,Chinatown,114291.0,CA,"POLYGON ((-122.41020215338 37.7974876723953, -..."
793,San Francisco,San Francisco,Crocker Amazon,273404.0,CA,"POLYGON ((-122.454085201694 37.7082065558492, ..."


In [4]:
# get a color for each node
def get_color_list(n, color_map='plasma', start=0, end=1):
    return [cm.get_cmap(color_map)(x) for x in np.linspace(start, end, n)]

def get_node_colors_by_stat(G, data, start=0, end=1):
    df = pd.DataFrame(data=pd.Series(data).sort_values(), columns=['value'])
    df['colors'] = get_color_list(len(df), start=start, end=end)
    df = df.reindex(G.nodes())
    return df['colors'].tolist()

In [5]:
start_time = time.time()

In [6]:
df = pd.DataFrame()
all_stats = {}
for label, row in sf.iterrows():
    
    name = row['NAME']
    geometry = row['geometry']
    geometry_utm, crs_utm = ox.project_geometry(geometry, crs=wgs84)
    area = geometry_utm.area
    
    # get the driving network and project it
    G = ox.graph_from_polygon(geometry, network_type='drive_service')
    G_proj = ox.project_graph(G)
    
    # plot and save the figure
    filename = '{}_network'.format(name)
    fig, ax = ox.plot_graph(G_proj, fig_height=6,
                            node_color='#336699', node_size=15, node_zorder=2,
                            save=True, filename=filename, show=False)
    
    stats = ox.basic_stats(G, area=area)
    extended_stats = ox.extended_stats(G, connectivity=True, ecc=True, bc=True, cc=True, anc=True)
    for key in extended_stats:
        stats[key] = extended_stats[key]
    stats['area_km'] = area / 1e6 #sq m to sq km
    #stats['node_connectivity_avg_undir'] = nx.average_node_connectivity(G.to_undirected())
    
    filename = '{}_betweenness_centrality'.format(name)
    nc = get_node_colors_by_stat(G_proj, stats['betweenness_centrality'])
    fig, ax = ox.plot_graph(G_proj, fig_height=6,
                            node_color=nc, node_size=15, node_zorder=2,
                            save=True, filename=filename, show=False)
        
    all_stats[name] = stats
    df = df.append(pd.Series(data=stats, name=name))

In [7]:
cols = [col for col in df.columns if not isinstance(df.reset_index().loc[0, col], dict)]
df_display = pd.DataFrame(df[cols])

df_display['pagerank_max_node'] = df_display['pagerank_max_node'].astype(int)
df_display['pagerank_min_node'] = df_display['pagerank_min_node'].astype(int)

def clean_display(value):
    if isinstance(value, float):
        value = round(value, 3)
    return value

df_display = df_display.applymap(clean_display)
df_display.head()

Unnamed: 0,area_km,avg_neighbor_degree_avg,avg_weighted_neighbor_degree_avg,betweenness_centrality_avg,center,circuity_avg,closeness_centrality_avg,clustering_coefficient_avg,clustering_coefficient_weighted_avg,count_intersections,...,pagerank_min,pagerank_min_node,periphery,radius,self_loop_proportion,street_density_km,street_length_avg,street_length_total,street_segments_count,streets_per_node_avg
Bayview,12.859,2.908,0.04,0.021,[2932970389],1.053,0.0,0.043,0.002,709.0,...,0.0,259408082,[662020555],3238.145,0.001,10962.418,111.344,140961.63,1266.0,3.019
Bernal Heights,2.992,2.768,0.049,0.029,[65343556],1.028,0.001,0.052,0.004,389.0,...,0.0,-2147483648,[4066142106],1954.814,0.002,18915.237,87.735,56588.86,645.0,2.969
Castro-Upper Market,2.276,2.846,0.041,0.035,[65296327],1.023,0.001,0.045,0.01,251.0,...,0.001,-2147483648,[581074075],1384.393,0.001,18119.416,97.029,41237.345,425.0,3.264
Chinatown,0.356,1.976,0.044,0.063,[65307376],1.0,0.002,0.017,0.003,89.0,...,0.001,65290756,[65316880],790.503,0.0,24335.618,60.13,8658.701,144.0,2.955
Crocker Amazon,1.196,2.865,0.044,0.064,[65343941],1.023,0.001,0.061,0.012,113.0,...,0.002,318783030,[65350943],1179.792,0.0,14106.236,92.726,16876.099,182.0,3.069


In [8]:
df_display.to_csv('data/sf-nhoods.csv', index=True, encoding='utf-8')

In [9]:
# what is the max betwenness centrality in each network?
for key in all_stats:
    print(key, max(all_stats[key]['betweenness_centrality'].values()))

West Of Twin Peaks 0.22804534767033224
Downtown 0.28510294931764796
Ocean View 0.28637625431999425
Bernal Heights 0.20054839486226358
South Of Market 0.2734685251203388
Castro-Upper Market 0.24514768161994727
Lakeshore 0.29104886078750847
Bayview 0.1734578348548852
Outer Mission 0.25355522910317746
Seacliff 0.37805712805712804
Pacific Heights 0.15855462971414372
Western Addition 0.20136534172063386
Noe Valley 0.2225486858326788
Crocker Amazon 0.35789203350182996
Nob Hill 0.1573008828292541
Inner Richmond 0.18665945968296302
Financial District 0.18025780361067387
Haight-Ashbury 0.2912869464751467
Outer Sunset 0.11016169410011367
Potrero Hill 0.25871049200500323
North Beach 0.30858572231463854
Mission 0.14560638968199155
Parkside 0.208318668231522
Excelsior 0.21785734930764852
Marina 0.17605946566844718
Presidio Heights 0.19862340480573926
Glen Park 0.22847366370102756
Inner Sunset 0.28085289768265476
Visitacion Valley 0.302584259657977
Outer Richmond 0.20955017238591706
Twin Peaks 0.462

In [10]:
time.time() - start_time

5692.390351772308