[NYC MTA Dataset](http://www.mta.info/developers/download.html)

In [None]:
from pandas import read_csv
calendar = read_csv('datasets/NYC-MTA-Calendar.csv')
routes = read_csv('datasets/NYC-MTA-Routes.csv')
trips = read_csv('datasets/NYC-MTA-Trips.csv')
times = read_csv('datasets/NYC-MTA-StopTimes.csv')
stops = read_csv('datasets/NYC-MTA-Stops.csv')

In [None]:
calendar.ix[0]

In [None]:
weekdayServiceIDs = filter(lambda x: x.endswith('WKD'), calendar.service_id)
weekdayServiceIDs

In [None]:
routes.ix[0]

In [None]:
routeNameByID = {x['route_id']: x['route_long_name'] for index, x in routes.iterrows()}
routeNameByID['3']

In [None]:
trips.ix[0]

In [None]:
weekdayTrips = trips[trips.service_id.isin(weekdayServiceIDs)]
len(weekdayTrips)

In [None]:
times.ix[0]

In [None]:
weekdayTimes = times[times.trip_id.isin(weekdayTrips.trip_id.unique())]
len(weekdayTimes)

In [None]:
stops.ix[0]

In [None]:
# List weekday stops on the 5 line
routeID = '5'
print routeNameByID[routeID]
route5WeekdayTrips = weekdayTrips[weekdayTrips.route_id == routeID]
stopIDs = route5WeekdayTrips.merge(weekdayTimes).stop_id.unique()
stops[stops.stop_id.isin(stopIDs)].stop_name.unique()

In [None]:
# List weekday stops on the 5 line by departure time
route5Trip = route5WeekdayTrips.ix[route5WeekdayTrips.index[224]]
route5TripTimes = weekdayTimes[weekdayTimes.trip_id == route5Trip['trip_id']]
route5Stops = route5TripTimes.merge(stops)[['departure_time', 'stop_id', 'stop_name']]
route5Stops.sort_values('departure_time')

In [None]:
# Build a network connecting stops on the 5 line and 6 line
import datetime
import networkx

def get_tripStops(tripID):
    tripTimes = times[times.trip_id == tripID]
    return tripTimes.merge(stops)[['departure_time', 'stop_name']].sort_values('departure_time')

def parse_departure_time(x):
    return datetime.datetime.strptime(x['departure_time'], '%H:%M:%S')

def make_graph(tripIDs):
    graph = networkx.MultiDiGraph()
    for tripID in tripIDs:
        tripStops = get_tripStops(tripID)
        stopGenerator = tripStops.iterrows()
        lastStop = stopGenerator.next()[1]
        for index, thisStop in stopGenerator:
            timedelta = parse_departure_time(thisStop) - parse_departure_time(lastStop)
            minutes = timedelta.total_seconds() / 60.
            graph.add_edge(lastStop['stop_name'], thisStop['stop_name'],
                key=route5Trip['trip_id'],
                weight=minutes)
            lastStop = thisStop
    return graph
    
graph = make_graph([
    'A20120610WKD_101200_5..N04R',
    'A20120610WKD_104200_6..N03R',
])

In [None]:
networkx.dijkstra_path(graph, 'Brooklyn Bridge - City Hall', '110 St')

In [None]:
networkx.dijkstra_path_length(graph, 'Brooklyn Bridge - City Hall', '110 St')

In [None]:
# Make a graph of the NYC MTA subway system
tripIDs = []
# For each route,
for routeID, routeTrips in weekdayTrips.groupby('route_id'):
    # Pick a trip
    tripIDs.append(routeTrips.trip_id.values[0])
graph = make_graph(tripIDs)

In [None]:
# Which ten stations are the most connected?
from pandas import Series
Series(networkx.pagerank_numpy(graph)).sort_values(ascending=False)[:10]