In [1]:
import osmnx as ox, networkx as nx, pandas as pd, geopandas as gpd, time, matplotlib.pyplot as plt, math
import re
import statsmodels.api as sm
from geopy.distance import great_circle
from shapely.geometry import Polygon
%matplotlib inline
ox.config(use_cache=True, log_file=True, log_console=True, log_filename='calc_stats_every_us_urban_area',
          data_folder='G:/Geoff/osmnx/urbanized-areas-usa', cache_folder='G:/Geoff/osmnx/cache')

In [2]:
gdf = gpd.read_file('input_data/tl_2016_us_uac10').sort_values(by='ALAND10', ascending=False)
len(gdf)

3601

In [3]:
# remove the tiny census "urban clusters" to retain only "urban areas"
gdf = gdf[~gdf['NAMELSAD10'].str.contains('Urban Cluster')]
len(gdf)

497

In [4]:
gdf[['NAMELSAD10', 'ALAND10', 'AWATER10', 'GEOID10', 'geometry']].head()

Unnamed: 0,NAMELSAD10,ALAND10,AWATER10,GEOID10,geometry
2321,"New York--Newark, NY--NJ--CT Urbanized Area",8937429045,532939320,63217,"(POLYGON ((-74.896562 40.561084, -74.896255 40..."
3002,"Atlanta, GA Urbanized Area",6850045152,94712176,3817,"(POLYGON ((-85.04216699999999 33.714332, -85.0..."
3250,"Chicago, IL--IN Urbanized Area",6325255332,106765178,16264,"(POLYGON ((-88.471932 42.120298, -88.472899 42..."
2546,"Philadelphia, PA--NJ--DE--MD Urbanized Area",5132095000,127546905,69076,"(POLYGON ((-76.02113299999999 39.981897, -76.0..."
1593,"Boston, MA--NH--RI Urbanized Area",4852285339,202265876,9271,"(POLYGON ((-71.64691599999999 42.644126, -71.6..."


In [5]:
def load_graph_get_stats(row):
    
    start_time = time.time()
    name = row['NAMELSAD10']
    geoid = row['GEOID10']
    land_area = row['ALAND10']
    name_geoid = '{}_{}'.format(row['NAMELSAD10'], row['GEOID10']).replace(',', '_').replace(' ', '_')
    
    try:
        G = ox.load_graphml('{}.graphml'.format(name_geoid))
        stats = ox.basic_stats(G, area=land_area)
        stats['name'] = name
        stats['geoid'] = geoid
        stats['area'] = land_area
        stats['time'] = time.time()-start_time
        
        # unpack k-counts and k-proportion dicts into individiual keys:values
        for k, count in stats['counts_streets_per_intersection'].items():
            stats['int_{}_streets_count'.format(k)] = count
        for k, proportion in stats['proportion_streets_per_intersection'].items():
            stats['int_{}_streets_prop'.format(k)] = proportion
        
        return pd.Series(stats)

    except Exception as e:
        print('{} failed: {}'.format(name, e))

In [6]:
stats = gdf.apply(load_graph_get_stats, axis=1)
stats.head()

Unnamed: 0,area,avg_circuity,avg_edge_length,avg_street_length,avg_streets_per_intersection,count_street_segments,counts_streets_per_intersection,edge_density_km,geoid,int_0_streets_count,...,k_avg,m,n,name,node_density_km,proportion_streets_per_intersection,self_loop_proportion,time,total_edge_length,total_street_length
2321,8937429045,1.066086,138.196978,140.77646,2.830807,592255,"{0: 1, 1: 75508, 2: 2090, 3: 259280, 4: 79117,...",16817.534433,63217,1,...,5.209273,1087618,417570,"New York--Newark, NY--NJ--CT Urbanized Area",46.72149,"{0: 2.394808056134301e-06, 1: 0.18082716670258...",0.007695,166.890025,150305500.0,83375560.0
3002,6850045152,1.102635,163.024668,165.005023,2.46127,270491,"{0: 0, 1: 69105, 2: 818, 3: 129036, 4: 19836, ...",12111.9628,3817,0,...,4.641304,508926,219303,"Atlanta, GA Urbanized Area",32.014825,"{0: 0.0, 1: 0.3151119683725257, 2: 0.003729999...",0.010398,82.578143,82967490.0,44632370.0
3250,6325255332,1.068456,131.493808,132.673476,2.951925,443845,"{0: 0, 1: 45433, 2: 1474, 3: 175863, 4: 75968,...",17215.929666,16264,0,...,5.527209,828139,299659,"Chicago, IL--IN Urbanized Area",47.375005,"{0: 0.0, 1: 0.15161566981135224, 2: 0.00491892...",0.009053,116.152454,108895200.0,58886460.0
2546,5132095000,1.085334,142.455398,141.996745,2.84299,287604,"{0: 0, 1: 34937, 2: 1152, 3: 127939, 4: 37311,...",14489.100116,69076,0,...,5.161158,521984,202274,"Philadelphia, PA--NJ--DE--MD Urbanized Area",39.413534,"{0: 0.0, 1: 0.1727211604061817, 2: 0.005695245...",0.009928,77.639563,74359440.0,40838830.0
1593,4852285339,1.099111,132.870811,133.734702,2.681577,261615,"{0: 0, 1: 41111, 2: 871, 3: 129895, 4: 20100, ...",13228.795154,9271,0,...,5.012711,483100,192750,"Boston, MA--NH--RI Urbanized Area",39.72355,"{0: 0.0, 1: 0.21328664072632944, 2: 0.00451880...",0.021174,82.099271,64189890.0,34987000.0


In [7]:
stats.to_csv('usa_stats.csv', encoding='utf-8', index=False)
stats['time'].sum()

4028.2600576877594