Takes the graph G, finds the minimum spanning tree, clusters the MST.  
Each cluster is then searched for the node with the highest 'closeness centrality" to indicate the best location to station first responders.  
  
TODO:
- show that the center node is really the best location

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!pip install igraph

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting igraph
  Downloading igraph-0.10.4-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting texttable>=1.6.2 (from igraph)
  Downloading texttable-1.6.7-py2.py3-none-any.whl (10 kB)
Installing collected packages: texttable, igraph
Successfully installed igraph-0.10.4 texttable-1.6.7


In [4]:
import json
import numpy as np
import igraph as ig

In [5]:
def create_uber_graph(file_geo: str, file_time: str) -> ig.Graph:
    """Returns an giant connected component of the uber movement data

    Parameters
    ----------
    file_geo : str
        path to the uber .json geo-json file
    file_time : str
        path to the uber uber drive times file .csv time file

    Returns
    -------
    g : ig.Graph
        giant connected component from the uber movement data
    """
    if not file_geo.endswith('.json'):
        raise ValueError('file_geo must be a .json file')
    if not file_time.endswith('.csv'):
        raise ValueError('file_time must be a .csv file')

    # load geography data as nodes 
    with open(file_geo) as f:
        census_tracts = json.loads(f.readline())

    display_names = dict()
    coordinates = dict()
    
    for area in census_tracts['features']:
        id = int(area['properties']['MOVEMENT_ID'])
        display_names[id] = area['properties']['DISPLAY_NAME''']
        a = area['geometry']['coordinates'][0]
        coordinates[id] = np.array(a if type(a[0][0]) == float else a[0]).mean(axis=0)[::-1]

    g = ig.Graph(directed=False)
    g.add_vertices(len(display_names))
    g.vs['display_name'] = list(display_names.values())  # index = id - 1
    g.vs['coordinates'] = list(coordinates.values())


    # Load drive time data as edges
    december_month = 12  # for monthly aggregate data of 4th quarter, we can filter data based off of only December

    edges = []
    weights = []

    with open(file_time) as f:
        f.readline()  # skip the first line

        for line in f:
            vals = line.strip().split(',')
            
            # read edge info
            src, dest, month, dist = int(vals[0]), int(vals[1]), int(vals[2]), float(vals[3])

            if month == december_month:
                edges.append((src - 1, dest - 1))
                weights.append(dist)

    g.add_edges(edges)
    g.es['weight'] = weights

    # keep only the giant connected component
    gcc = max(g.components(), key=len)
    g = g.subgraph(gcc)

    # remove duplicate edges
    return g.simplify(combine_edges=dict(weight='mean'))


file_geo = "/content/drive/MyDrive/Colab Notebooks/232/Project 4/los_angeles_censustracts.json"
file_time = "/content/drive/MyDrive/Colab Notebooks/232/Project 4/los_angeles-censustracts-2019-4-All-MonthlyAggregate.csv"
g = create_uber_graph(file_geo, file_time)
print(f'Number of nodes: {len(g.vs)}', f'Number of edges: {len(g.es)}', sep='\n')

Number of nodes: 2649
Number of edges: 1003858


In [6]:
# cluster graph into n clusters
n_clusters = 50
mst = g.spanning_tree(weights=g.es['weight'])
clusters = mst.community_fastgreedy(weights='weight').as_clustering(n=n_clusters)
g.vs['cluster'] = clusters.membership

# Find the nodes in each cluster that are closest to all other nodes in the cluster
# ie have the highest closeness centrality
cluster_membership = np.array(g.vs['cluster'])
cluster_mst_root_nodes = []
for i in range(n_clusters):
    cluster_nodes = np.where(cluster_membership == i)[0]
    subgraph = g.subgraph(cluster_nodes)
    min_node = None
    min_dist = float('inf')
    for node in subgraph.vs:
        dist_sum = 0
        for neighbor in subgraph.neighbors(node):
            dist_sum += subgraph.es[subgraph.get_eid(node, neighbor)]['weight']
            if dist_sum > min_dist:
                break
        if dist_sum < min_dist:
            min_dist = dist_sum
            min_node = node
    cluster_mst_root_nodes.append(min_node)

In [7]:
import plotly.graph_objects as go

vertices = g.vs
coordinates = np.array([v['coordinates'] for v in vertices])
colors = np.array([v['cluster'] for v in vertices])

# plot
fig = go.Figure(go.Scattermapbox(
    mode = "markers+text",
    lat = coordinates[:, 0],
    lon = coordinates[:, 1],
    marker = {'size': 7, 'color': colors},
    text = [f'{v.index}: {v["cluster"]}' for v in vertices],
    textposition = "bottom right",
    textfont = {'color': 'black'},
))

# plot cluster_mst-root nodes
colors = np.array([v['cluster'] for v in cluster_mst_root_nodes])
coordinates = np.array([v['coordinates'] for v in cluster_mst_root_nodes])
fig.add_trace(go.Scattermapbox(
    mode = "markers+text",
    lat = coordinates[:, 0],
    lon = coordinates[:, 1],
    marker = {'size': 20, 'color': colors},
    text = [f'{v.index}: {v["cluster"]}' for v in cluster_mst_root_nodes],
    textposition = "bottom right",
    textfont = {'color': 'black'},
))

fig.update_layout(
    mapbox = {
        'style': "carto-positron",
        'zoom': 9,
        'center': {'lon': -118.2437, 'lat': 34.0522},
    },
    margin = {'l': 0, 'r': 0, 'b': 0, 't': 0},
    height = 600,
)

fig.show()

In [12]:
for node in cluster_mst_root_nodes:
  print(node['cluster'], node.index, np.round(node['coordinates'][0],5), np.round(node['coordinates'][1],5))

0 62 34.12566 -118.16622
1 9 34.0117 -118.17831
2 26 34.1461 -117.96759
3 58 34.27333 -118.28809
4 20 34.11424 -118.09069
5 70 34.05394 -118.07046
6 61 33.87916 -118.0934
7 28 33.84589 -117.98227
8 45 34.0861 -117.93975
9 5 34.08468 -117.9119
10 5 33.97165 -118.17006
11 21 33.96759 -118.20725
12 75 34.10934 -118.41587
13 8 34.06728 -118.43652
14 34 34.19502 -118.4054
15 4 34.20219 -118.33567
16 43 34.15337 -118.38368
17 1 34.12171 -118.65489
18 55 33.80458 -118.26249
19 25 33.86754 -118.29929
20 16 33.92858 -117.98197
21 8 34.02517 -118.13506
22 1 33.98262 -118.16636
23 63 33.92434 -118.31281
24 54 33.90502 -118.19969
25 44 34.08924 -117.83421
26 29 34.01376 -118.42449
27 34 33.96815 -118.37018
28 17 33.95697 -117.8256
29 40 33.84561 -118.39749
30 40 33.88734 -118.33655
31 32 34.08929 -118.26641
32 21 33.99637 -118.01374
33 28 33.9802 -118.30925
34 30 34.24297 -118.06648
35 25 33.90553 -117.912
36 34 33.77833 -118.29508
37 19 33.78005 -118.18895
38 1 33.79697 -118.1568
39 23 34.28616 -