In [1]:
import osmnx as ox, pandas as pd, time, geopandas as gpd, os
%matplotlib inline
ox.config(use_cache=True, log_file=True, log_console=True, log_filename='calculate_stats_neighborhoods',
          data_folder='G:/Geoff/osmnx/neighborhoods', cache_folder='G:/Geoff/osmnx/cache/neighborhoods')

In [2]:
data_folder = 'G:/Geoff/osmnx/neighborhoods'

## Make a DataFrame of all the cities that have .graphml files saved in the folder

In [3]:
places = []
for state_folder in os.listdir(data_folder):
    for city_folder in os.listdir('{}/{}'.format(data_folder, state_folder)):
        for nhood_file in os.listdir('{}/{}/{}'.format(data_folder, state_folder, city_folder)):
            if '.graphml' in nhood_file:
                data = {}
                data['state_fips'] = state_folder.split('_')[0]
                data['state'] = state_folder.split('_')[1]
                data['geoid'] = city_folder.split('_')[0]
                data['city'] = city_folder.replace('{}_'.format(data['geoid']), '')
                data['nhood'] = nhood_file.replace('.graphml', '').replace('-', ' ')
                data['path'] = '{}/{}/{}'.format(data_folder, state_folder, city_folder)
                data['file'] = nhood_file
                places.append(data)

df = pd.DataFrame(places)
df.head()

Unnamed: 0,city,file,geoid,nhood,path,state,state_fips
0,Mobile,Airmont.graphml,150000,Airmont,G:/Geoff/osmnx/neighborhoods/01_AL/0150000_Mobile,AL,1
1,Mobile,Alderbrook.graphml,150000,Alderbrook,G:/Geoff/osmnx/neighborhoods/01_AL/0150000_Mobile,AL,1
2,Mobile,Alligator-Bayou.graphml,150000,Alligator Bayou,G:/Geoff/osmnx/neighborhoods/01_AL/0150000_Mobile,AL,1
3,Mobile,Argyle.graphml,150000,Argyle,G:/Geoff/osmnx/neighborhoods/01_AL/0150000_Mobile,AL,1
4,Mobile,Arlington.graphml,150000,Arlington,G:/Geoff/osmnx/neighborhoods/01_AL/0150000_Mobile,AL,1


## Load graph and calculate stats for each neighborhood

In [4]:
def load_graph_get_stats(row):
    
    try:
        start_time = time.time()
        G = ox.load_graphml(filename=row['file'], folder=row['path'])
        nhood_area_m = float(G.graph['nhood_area_m'])
        
        stats = ox.basic_stats(G, area=nhood_area_m)
        stats['nhood'] = row['nhood']
        stats['city'] = row['city']
        stats['state'] = row['state']
        stats['geoid'] = row['geoid']
        
        # calculate/drop the extended stats that have values per node
        extended_stats = ox.extended_stats(G)
        se = pd.Series(extended_stats)
        se = se.drop(['avg_neighbor_degree', 'avg_weighted_neighbor_degree', 'clustering_coefficient',
                      'clustering_coefficient_weighted', 'degree_centrality', 'pagerank'])
        extended_stats_clean = se.to_dict()
        
        for key in extended_stats_clean:
            stats[key] = extended_stats_clean[key]
        
        stats['area_km'] = nhood_area_m / 1e6
        stats['area'] = nhood_area_m
        stats['time'] = time.time()-start_time
        return pd.Series(stats)

    except Exception as e:
        print('{}, {}, {} failed: {}'.format(row['nhood'], row['city'], row['state'], e))
        return pd.Series()

In [5]:
#sample = list(range(0, len(df), int(len(df)/100)))
#stats = df.iloc[sample].apply(load_graph_get_stats, axis=1)
stats = df.apply(load_graph_get_stats, axis=1)



Woods, Davis, CA failed: float division by zero




Cedar Ridge, Fort-Wayne, IN failed: float division by zero
Centaur Acres, Fort-Wayne, IN failed: float division by zero
Foxchase, Fort-Wayne, IN failed: float division by zero




Churchill Heights, San-Antonio, TX failed: float division by zero
Tierra Del Sol, San-Antonio, TX failed: float division by zero
Wynnwood, San-Antonio, TX failed: float division by zero




In [6]:
stats.to_csv('stats_every_nhood.csv', encoding='utf-8', index=False)
print(len(stats))
stats['time'].sum()

6857


1709.3586766719818