# Network analysis with NetworkX

In [1]:
import os, sys
import pandas as pd
import networkx as nx

from globes import taxi_dir, days_dir, getFullDf, zoneIdToBorough

In [2]:
ZONE_COLS = [
    "pickup_day",
    "pickup_hour",
    "pickup_zone_taxi", 
    "dropoff_zone_taxi", 
    "pickup_borough", 
    "dropoff_borough"
]

zone_nodes = [i for i in range(1, 264) if i not in [57, 104, 105]]

df = getFullDf()
df = df[ZONE_COLS]

using 28 cores


In [3]:
## functions for loading graphs
def loadNodes(G):
    # load nodes
    BoroughDict = zoneIdToBorough()
    nodes = {}
    for zoneid in zone_nodes:
        borough = BoroughDict.get(zoneid) if BoroughDict.get(zoneid) else "None"
        if nodes.get(borough):
            nodes[borough].append(zoneid)
        else:
            nodes[borough] = [zoneid]
    
    for borough in nodes:
        G.add_nodes_from(nodes[borough], borough=borough)
    
    return G
        
def loadDirectedEdges(G):
    edges = []
    for pickup_zone in zone_nodes:
        df_zone = df.loc[df["pickup_zone_taxi"] == pickup_zone]
        pickup_group = df_zone.groupby("dropoff_zone_taxi").size()
        for dropoff_zone, count in pickup_group.iteritems():
            edges.append((pickup_zone, dropoff_zone, count))
            
    G.add_weighted_edges_from(edges)
    return G

def loadUndirectedEdges(G):
    edgeDict = {}
    # load edgeDict: (smaller node, bigger node) => agg flow
    for pickup_zone in zone_nodes:
        df_zone = df.loc[df["pickup_zone_taxi"] == pickup_zone]
        pickup_group = df_zone.groupby("dropoff_zone_taxi").size()
        for dropoff_zone, count in pickup_group.iteritems():
            edge = (min(dropoff_zone, pickup_zone), max(dropoff_zone, pickup_zone))
            if edge in edgeDict:
                edgeDict[edge] += count
            else:
                edgeDict[edge] = count

    edges = [(e1, e2, edgeDict[(e1, e2)]) for e1,e2 in edgeDict]
    G.add_weighted_edges_from(edges)
    return G


In [4]:
## load digraph
G = nx.MultiDiGraph()
G = loadNodes(G)
G = loadDirectedEdges(G)

## undirected graph, sum of flow between 
G_aggflow = nx.MultiGraph()
G_aggflow = loadNodes(G_aggflow)
G_aggflow = loadUndirectedEdges(G_aggflow)

print len(G.nodes())
print len(G_aggflow.nodes())

260
260


## Measures of centrality

In [5]:
def boroughAgg(G, centrality_data):
    agg = {}
    for zoneid in centrality_data:
        borough = G.node[zoneid]["borough"]
        if not borough in agg:
            agg[borough] = [centrality_data[zoneid]]
        else:
            agg[borough].append(centrality_data[zoneid])
    
    # get means for each borough
    for borough in agg:
        agg[borough] = sum(agg[borough])/len(agg[borough])
    return agg


In [6]:
deg_centrality = nx.degree_centrality(G)
print boroughAgg(G, deg_centrality)

{u'Staten Island': 0.45598455598455595, u'Brooklyn': 1.4281916576998546, u'Bronx': 1.1753614079195474, u'EWR': 0.8918918918918919, u'Manhattan': 1.6707773871952976, u'Queens': 1.2869634340222578}


In [7]:
closeness_centrality = nx.closeness_centrality(G, distance="weight")
print boroughAgg(G, closeness_centrality)

{u'Staten Island': 0.3790726128126737, u'Brooklyn': 0.4761784180680855, u'Bronx': 0.4857381445962359, u'EWR': 0.40859119396480287, u'Manhattan': 0.37056875737437855, u'Queens': 0.4702500054474006}


In [7]:
betweenness_centrality = nx.betweenness_centrality(G, weight="weight")
print boroughAgg(G, betweenness_centrality)

{u'Staten Island': 0.0004682492370728995, u'Brooklyn': 0.0013879368041661217, u'Bronx': 0.000454884920034402, u'EWR': 0.00023518068684266527, u'Manhattan': 0.0022433698782293667, u'Queens': 0.0010449833194460766}


In [45]:
deg_centrality = nx.degree_centrality(G_aggflow)
closeness_centrality = nx.closeness_centrality(G_aggflow)
betweenness_centrality = nx.betweenness_centrality(G_aggflow)

print boroughAgg(G_aggflow, deg_centrality)
print boroughAgg(G_aggflow, closeness_centrality)
print boroughAgg(G_aggflow, betweenness_centrality)

{u'Staten Island': 0.38687258687258697, u'Brooklyn': 0.8141654535097158, u'Bronx': 0.6866301517464307, u'EWR': 0.7490347490347491, u'Manhattan': 0.9298104074223474, u'Queens': 0.7621508062684536}
{u'Staten Island': 0.5952008448493189, u'Brooklyn': 0.8454540651787196, u'Bronx': 0.7621253250993933, u'EWR': 0.7932217932217932, u'Manhattan': 0.9380565641375687, u'Queens': 0.8152363495637065}
{u'Staten Island': 0.00014428834231732475, u'Brooklyn': 0.0008336645781396832, u'Bronx': 0.0002710839050134295, u'EWR': 0.00037929385337888546, u'Manhattan': 0.0017771252999865773, u'Queens': 0.0005991439028553201}
