In [1]:
import geopandas as gpd
import json
import networkx as nx
import numpy as np
import os
import osmnx as ox
import pandas as pd
import scipy.stats as stats

ox.config(log_console=True)

num_bins = 36
threshold = 10 #meters

input_path = 'data/sampled_graph_filepaths.csv'
graphs_folder = 'G:\\Geoff\\osmnx\\data\\tracts\\graphml'
bearings_folder = 'data/bearings'
output_path = 'data/tracts_indicators.csv'
cities_path = 'data/tracts_shapefile'

In [2]:
with open('data/states_by_fips.json') as f:
    fips_to_state = json.load(f)

In [3]:
filepaths = pd.read_csv(input_path, header=None)[0].sort_values()
len(filepaths)

72663

## Define helper functions

In [4]:
def reverse_bearing(x):
    return x + 180 if x < 180 else x - 180

def get_unweighted_bearings(G, threshold):
    # calculate edge bearings
    # threshold lets you discard streets < some length from the bearings analysis
    b = pd.Series([d['bearing'] for u, v, k, d in G.edges(keys=True, data=True) if d['length'] > threshold])
    return pd.concat([b, b.map(reverse_bearing)]).reset_index(drop='True')

In [5]:
def count_and_merge(n, bearings):
    # make twice as many bins as desired, then merge them in pairs
    # prevents bin-edge effects around common values like 0° and 90°
    n = n * 2
    bins = np.arange(n + 1) * 360 / n
    count, _ = np.histogram(bearings, bins=bins)
    
    # move the last bin to the front, so eg 0.01° and 359.99° will be binned together
    count = np.roll(count, 1)
    return count[::2] + count[1::2]

In [6]:
def calculate_orientation_entropy(data, n):
    bin_counts = count_and_merge(n, data)
    entropy = stats.entropy(bin_counts)
    return entropy

In [7]:
def circuity(G, edge_length_total):
    
    coords = np.array([[G.nodes[u]['y'], G.nodes[u]['x'], G.nodes[v]['y'], G.nodes[v]['x']] for u, v, k in G.edges(keys=True)])
    df_coords = pd.DataFrame(coords, columns=['u_y', 'u_x', 'v_y', 'v_x'])

    gc_distances = ox.great_circle_vec(lat1=df_coords['u_y'],
                                    lng1=df_coords['u_x'],
                                    lat2=df_coords['v_y'],
                                    lng2=df_coords['v_x'])

    gc_distances = gc_distances.fillna(value=0)
    circuity_avg = edge_length_total / gc_distances.sum()
    return circuity_avg

## Calculate length entropy and other stats

In [None]:
%%time
results = {}

for filepath in filepaths:
    
    div = filepath.rfind('/') + 1
    folder = filepath[:div]
    filename = filepath[div:]
    geoid = filename.replace('.graphml', '')
    
    Gu = ox.get_undirected(ox.load_graphml(filename=filename, folder=folder))
    lengths = pd.Series(nx.get_edge_attributes(Gu, 'length'))
    
    k_avg = 2 * len(Gu.edges()) / len(Gu.nodes())
    n = len(Gu.nodes())
    m = len(Gu.edges())
    length_median = lengths.median()
    length_mean = lengths.mean()
    
    # proportion of 4-way ints, dead-ends, and avg circuity
    prop_4way = list(Gu.graph['streets_per_node'].values()).count(4) / len(Gu.nodes())
    prop_deadend = list(Gu.graph['streets_per_node'].values()).count(1) / len(Gu.nodes())
    circuity_avg = circuity(Gu, lengths.sum())
    
    # calculate length entropy
    count, _ = np.histogram(lengths, num_bins)
    length_entropy = stats.entropy(count)
    count_log, _ = np.histogram(np.log(lengths+0.01), num_bins)
    length_entropy_log = stats.entropy(count_log)
    
    # calculate orientation entropy
    bearings = get_unweighted_bearings(ox.add_edge_bearings(Gu), threshold)
    orientation_entropy = calculate_orientation_entropy(bearings.dropna(), num_bins)
    
    results[geoid] = {'k_avg'              : k_avg,
                      'n'                  : n,
                      'm'                  : m,
                      'prop_4way'          : prop_4way,
                      'prop_deadend'       : prop_deadend,
                      'circuity_avg'       : circuity_avg,
                      'length_median'      : length_median,
                      'length_mean'        : length_mean,
                      'length_entropy'     : length_entropy,
                      'length_entropy_log' : length_entropy_log,
                      'orientation_entropy': orientation_entropy}

In [None]:
df = pd.DataFrame(results).T

## Calculate orientation-order

In [None]:
max_entropy = np.log(num_bins)
max_entropy

In [None]:
min_bins = 4 #perfect grid
perfect_grid = [1] * min_bins + [0] * (num_bins - min_bins)
perfect_grid_entropy = stats.entropy(perfect_grid)
perfect_grid_entropy

In [None]:
def orientation_order(eta, max_ent=max_entropy, min_ent=perfect_grid_entropy):
    # normalize it as a value between perfect_grid_entropy and max_entropy
    # then square it to approx linearize orientation_order's relationship with the
    # share of total bins with equal non-zero probabilities
    return 1 - ((eta - min_ent) / (max_ent - min_ent)) ** 2

df['orientation_order'] = df['orientation_entropy'].map(orientation_order)

## Merge and save to disk

In [None]:
df['m'] = df['m'].astype(int)
df['n'] = df['n'].astype(int)

In [None]:
cols=['geoid', 'orientation_order', 'orientation_entropy', 
      'circuity_avg', 'k_avg', 'prop_deadend', 'prop_4way', 'm', 'n',
      'length_median', 'length_mean', 'length_entropy', 'length_entropy_log']

df = df.reset_index().rename(columns={'index':'geoid'}).reindex(columns=cols)
len(df)

In [None]:
df.to_csv(output_path, index=False, encoding='utf-8')
df.head()