In [1]:
import osmnx as ox, pandas as pd, networkx as nx, time, geopandas as gpd, os, json
%matplotlib inline
ox.config(use_cache=True, log_file=True, log_console=True, log_filename='calculate_stats_every_us_city',
          data_folder='G:/Geoff/osmnx/cities-usa', cache_folder='G:/Geoff/osmnx/cache/cities-usa')

In [2]:
data_folder = 'G:/Geoff/osmnx/cities-usa'
shp_folder = 'input_data/places'

## Make a DataFrame of all the cities that have .graphml files saved in the folder

In [3]:
places = []
for state_folder in os.listdir(data_folder):
    for city_file in os.listdir('{}/{}'.format(data_folder, state_folder)):
        if '.graphml' in city_file:
            data = {}
            data['state_folder'] = state_folder
            data['state_fips'] = state_folder.split('_')[0]
            data['state'] = state_folder.split('_')[1]
            data['city_file'] = city_file
            data['geoid'] = city_file.split('_')[0]
            data['city'] = city_file.strip('_{}'.format(data['geoid'])).replace('.graphml', '').replace('_', ' ')
            places.append(data)

df = pd.DataFrame(places)

## Get land area data from shapefiles and merge into DataFrame

In [4]:
# load each state shapefile and get the geoid and aland for each city row
gdf = gpd.GeoDataFrame()
for state_fips in df['state_fips'].unique():
    path = '{}/tl_2016_{}_place'.format(shp_folder, state_fips)
    gdf = gdf.append(gpd.read_file(path)[['GEOID', 'ALAND']])

# merge aland values into dataframe, on geoid
gdf = gdf.rename(columns=str.lower)
df = pd.merge(df, gdf, how='left', on='geoid')

In [5]:
print(len(df))
df.head()

19655


Unnamed: 0,city,city_file,geoid,state,state_fips,state_folder,aland
0,Abbeville,0100124_Abbeville.graphml,100124,AL,1,01_AL,40255362
1,Adamsville,0100460_Adamsville.graphml,100460,AL,1,01_AL,65064935
2,Addison,0100484_Addison.graphml,100484,AL,1,01_AL,9753292
3,Akron,0100676_Akron.graphml,100676,AL,1,01_AL,1776164
4,Alabaster,0100820_Alabaster.graphml,100820,AL,1,01_AL,65079075


## Load graph and calculate stats for each city

In [6]:
def load_graph_get_stats(row):
    
    try:
        start_time = time.time()
        folder = '{}/{}'.format(data_folder, row['state_folder'])
        G = ox.load_graphml(filename=row['city_file'], folder=folder)
        
        stats = ox.basic_stats(G, area=row['aland'])
        
        # unpack k-counts and k-proportion dicts into individiual keys:values
        for k, count in stats['streets_per_node_counts'].items():
            stats['int_{}_streets_count'.format(k)] = count
        for k, proportion in stats['streets_per_node_proportion'].items():
            stats['int_{}_streets_prop'.format(k)] = proportion
            
        # calculate/drop the extended stats that have values per node
        extended_stats = ox.extended_stats(G)
        se = pd.Series(extended_stats)
        se = se.drop(['avg_neighbor_degree', 'avg_weighted_neighbor_degree', 'clustering_coefficient',
                      'clustering_coefficient_weighted', 'degree_centrality', 'pagerank'])
        extended_stats_clean = se.to_dict()
        
        for key in extended_stats_clean:
            stats[key] = extended_stats_clean[key]
        
        stats['area_km'] = row['aland'] / 1e6        
        stats['city'] = row['city']
        stats['state'] = row['state']
        stats['geoid'] = row['geoid']
        stats['area'] = row['aland']
        stats['time'] = time.time()-start_time
        
        return pd.Series(stats)

    except Exception as e:
        print('{}, {} failed: {}'.format(row['city'], row['state'], e))
        return pd.Series()

In [7]:
#sample = list(range(0, len(df), int(len(df)/100)))
#stats = df.iloc[sample].apply(load_graph_get_stats, axis=1)
stats = df.apply(load_graph_get_stats, axis=1)



Bassett, KS failed: float division by zero




Lake Aluma, OK failed: float division by zero




Ophir, UT failed: float division by zero




In [8]:
stats.to_csv('stats_every_city.csv', encoding='utf-8', index=False)
print(len(stats))
stats['time'].sum()

19655


8030.371987581253