# MTA Data - Metis 01 Project Benson

In [1]:
%matplotlib inline
from __future__ import division
import csv
import calendar
import datetime
import matplotlib.pyplot as plt
from collections import Counter

## Objective
I was curious about where people are going at off-peak times. The goal for this project is to find the areas of New York with the most active nightlife. As a baseline, we will compare weekends to weeknights.

First, read in the data. Then create a dictionary in which the keys are station identifiers and the values are everything else.

In [2]:
#!curl -O http://web.mta.info/developers/data/nyct/turnstile/turnstile_150404.txt

In [2]:
def read_file(filename):
    with open(filename) as f:
        reader = csv.reader(f)
        rows = [[cell.strip() for cell in row] for row in reader]
        return rows
    
rows1 = read_file('turnstile_150627.txt')
rows2 = read_file('turnstile_150620.txt')
rows3 = read_file('turnstile_150613.txt')
rows4 = read_file('turnstile_150606.txt')
rows5 = read_file('turnstile_150530.txt')
rows6 = read_file('turnstile_150523.txt')
rows7 = read_file('turnstile_150516.txt')
rows8 = read_file('turnstile_150509.txt')
rows9 = read_file('turnstile_150502.txt')
rows10 = read_file('turnstile_150425.txt')
rows11 = read_file('turnstile_150418.txt')
rows12 = read_file('turnstile_150411.txt')
rows13 = read_file('turnstile_150404.txt')


In [3]:
rows1.pop(0)
rows2.pop(0)
rows3.pop(0)
rows4.pop(0)
rows5.pop(0)
rows6.pop(0)
rows7.pop(0)
rows8.pop(0)
rows9.pop(0)
rows10.pop(0)
rows11.pop(0)
rows12.pop(0)
rows13.pop(0)

['C/A',
 'UNIT',
 'SCP',
 'STATION',
 'LINENAME',
 'DIVISION',
 'DATE',
 'TIME',
 'DESC',
 'ENTRIES',
 'EXITS']

In [4]:
def concatenate_rows(*row_files):
    all_rows = sum(row_files, [])
    return all_rows
    
rows = concatenate_rows(rows13, rows12, rows11, rows10, rows9, rows8, rows7, rows6, rows5, rows4, rows3, rows2, rows1)

In [5]:
len(rows)

2495851

In [6]:
len(rows2)

193257

In [7]:
def read_rows(raw_rows):
    dct = {}
    for row in raw_rows:
        dct.setdefault(tuple(row[:4]), []).append(tuple(row[4:]))
    return dct    

raw_readings = read_rows(rows)

In [8]:
len(raw_readings.items())

4583

In [9]:
# raw_readings.items()[0]

## Time Series
Extract time information and count numbers from dictionary values for turnstile exits.

Filter out values that are negative or seem unreasonably large.

In [9]:
def accum_by_datetime(dct):
    d = {turnstile: [(datetime.datetime.strptime(date + time,
                                        '%m/%d/%Y%X'),
                                        int(out_cumulative))
                                       for _, _, date, time,
                                           _, _, out_cumulative in rows]
                           for turnstile, rows in dct.items()}
    return d

datetime_cumulative = accum_by_datetime(raw_readings)

In [10]:
#datetime_cumulative.items()[0]

In [11]:
def count_by_datetime(dct):
    d = {turnstile: [[rows[i][0],
                     rows[i+1][1] - rows[i][1],
                     rows[i+1][0] - rows[i][0]]
                    for i in range(len(rows) - 1)]
        for turnstile, rows in dct.items()}
    return d

datetime_count_times = count_by_datetime(datetime_cumulative)

In [12]:
print len(datetime_count_times.items())
datetime_count_times.items()[0]

4583


(('A030', 'R083', '01-06-00', '23 ST-5 AVE'),
 [[datetime.datetime(2015, 3, 28, 0, 0), 12, datetime.timedelta(0, 14400)],
  [datetime.datetime(2015, 3, 28, 4, 0), 32, datetime.timedelta(0, 14400)],
  [datetime.datetime(2015, 3, 28, 8, 0), 169, datetime.timedelta(0, 14400)],
  [datetime.datetime(2015, 3, 28, 12, 0), 225, datetime.timedelta(0, 14400)],
  [datetime.datetime(2015, 3, 28, 16, 0), 237, datetime.timedelta(0, 14400)],
  [datetime.datetime(2015, 3, 28, 20, 0), 79, datetime.timedelta(0, 14400)],
  [datetime.datetime(2015, 3, 29, 0, 0), 23, datetime.timedelta(0, 14400)],
  [datetime.datetime(2015, 3, 29, 4, 0), 12, datetime.timedelta(0, 14400)],
  [datetime.datetime(2015, 3, 29, 8, 0), 90, datetime.timedelta(0, 14400)],
  [datetime.datetime(2015, 3, 29, 12, 0), 203, datetime.timedelta(0, 14400)],
  [datetime.datetime(2015, 3, 29, 16, 0), 147, datetime.timedelta(0, 14400)],
  [datetime.datetime(2015, 3, 29, 20, 0), 32, datetime.timedelta(0, 14400)],
  [datetime.datetime(2015, 3, 3

In [13]:
def sort_by_value(dct):
    dlist = sorted(dct.items(), key=lambda tup: tup[-1], reverse=True)
    return dlist

In [14]:
all_counts = [count for rows in datetime_count_times.values() for _, count, _ in rows]
all_counts.sort()

print all_counts[-200:]

[3678, 3680, 3680, 3683, 3690, 3695, 3695, 3697, 3699, 3708, 3714, 3715, 3723, 3725, 3728, 3730, 3735, 3751, 3751, 3752, 3753, 3754, 3755, 3761, 3764, 3775, 3780, 3782, 3787, 3804, 3806, 3825, 3832, 3833, 3834, 3838, 3840, 3840, 3841, 3856, 3864, 3888, 3893, 3910, 3912, 3914, 3927, 3939, 3942, 3953, 3964, 3964, 3966, 3968, 3993, 3994, 4000, 4004, 4006, 4017, 4021, 4026, 4027, 4029, 4031, 4033, 4036, 4044, 4044, 4056, 4062, 4064, 4084, 4090, 4104, 4110, 4111, 4118, 4122, 4123, 4128, 4140, 4147, 4148, 4150, 4150, 4154, 4154, 4166, 4169, 4182, 4182, 4204, 4206, 4206, 4207, 4209, 4217, 4239, 4263, 4281, 4292, 4299, 4319, 4333, 4336, 4363, 4386, 4426, 4438, 4477, 4504, 4515, 4519, 4530, 4540, 4550, 4634, 4776, 4791, 4802, 4840, 5219, 5391, 5439, 5497, 5918, 6156, 6696, 6814, 7332, 8059, 9431, 9574, 9949, 10189, 11763, 15051, 39544, 46022, 65261, 75636, 79906, 80097, 80123, 80229, 80396, 83050, 95375, 95456, 137504, 137512, 137627, 195187, 195187, 312439, 322327, 323893, 472911, 806802, 1041

In [15]:
# for key, rows in datetime_count_times.items():
#     for _, count, _ in rows:
#         if count >= 39544:
#             print count, key

In [16]:
print all_counts[:50]

[-2080392237, -1376238516, -1056964595, -985959134, -250683223, -83930712, -33568330, -16754739, -16754517, -14090237, -11113480, -8172201, -8126408, -7645885, -7231960, -7136820, -6188433, -6119245, -5748960, -5713849, -5465194, -5179240, -5008747, -4331551, -4202565, -4018906, -3967127, -3549171, -3544654, -3536673, -3003243, -2962971, -2698086, -2533024, -2377006, -2376873, -2131420, -2112369, -1669794, -1617237, -1540092, -1532280, -1473817, -1393821, -1334500, -1212364, -1103455, -1102179, -1089287, -1078401]


In [17]:
all_times = [duration.total_seconds() / 60 / 60
             for rows in datetime_count_times.values()
             for _, _, duration in rows]
print Counter(all_times).most_common(10)

[(4.0, 2299978), (4.2, 140012), (8.0, 2780), (4.433333333333334, 2217), (0.02222222222222222, 1391), (0.022500000000000003, 719), (0.23333333333333334, 264), (0.02277777777777778, 231), (4.199722222222222, 153), (3.999722222222222, 131)]


In [18]:
def filter_outliers(dct):
    d = {turnstile: [(time, count)
                   for (time, count, _) in rows
                   if 0 <= count <= 7000]
       for turnstile, rows in dct.items()}
    return d
    
datetime_counts = filter_outliers(datetime_count_times)

In [19]:
#datetime_counts.items()[0]

In [20]:
all_good_counts = [count for rows in datetime_counts.values() for _, count in rows]
print len(all_good_counts) / len(all_counts)

0.995574944165


In [21]:
all_good_counts.sort()
print all_good_counts[-5:]

[5497, 5918, 6156, 6696, 6814]


In [22]:
print all_good_counts[:5]

[0, 0, 0, 0, 0]


## Separate by Weeknights and Weekends
Instead of daily entries, we want nighttime/latenight counts (8pm-4am) for weeknight (Mon-Wed) and weekend (Fri-Sat). 

We questioned whether Thursday should be considered weeknight or weekend. Many New Yorkers go out on Thursday nights and treat it like a weekend, but we would still be capturing commuters, as well, so this number feels like it could go either way. We decided to exclude it for this comparison.

We excluded Sunday because we felt it would throw off the baseline.

In [23]:
# datetime_counts.items()[0]

In [24]:
# experimenting with datetime objects

time = datetime.datetime(2015, 5, 1, 3, 0)
time.year, time.month, time.day, time.hour, time.minute

if 0 <= time.hour <=4:
    time = time - datetime.timedelta(days=1)
    
time

datetime.datetime(2015, 4, 30, 3, 0)

In [25]:
def filter_for_night(dct):
    d = {turnstile: [(time, count)
                     for (time, count) in rows
                     if time.hour <= 4 or time.hour >= 20]
         for turnstile, rows in dct.items()}
    return d

nighttime_counts = filter_for_night(datetime_counts)

In [26]:
# nighttime_counts.items()[0]

In [27]:
def filter_weeknight(dct):
    d = {turnstile: [(time, count)
                     for (time, count) in rows
                     if (time.weekday() == 0 and time.hour >= 20)
                     or 0 < time.weekday() < 3
                     or (time.weekday() == 3 and time.hour <=4)]
         for turnstile, rows in dct.items()}
    return d

weeknight_counts = filter_weeknight(nighttime_counts)

In [28]:
# weeknight_counts.items()[0]

In [29]:
def filter_weekend(dct):
    d = {turnstile: [(time, count)
                     for (time, count) in rows
                     if (time.weekday() == 4 and time.hour >= 20)
                     or time.weekday() == 5
                     or (time.weekday() == 6 and time.hour <=4)]
         for turnstile, rows in dct.items()}
    return d

weekend_counts = filter_weekend(nighttime_counts)

In [30]:
# weekend_counts.items()[0]

In [31]:
def reassign_latenight_days(dct):
    d = {}
    for turnstile, rows in dct.items():
        d.setdefault(turnstile, [])
        for time, count in rows:
            if time.hour <= 4:
                d[turnstile].append((time - datetime.timedelta(days = 1), count))
            else:
                d[turnstile].append((time, count))
    return d

new_weekend_counts = reassign_latenight_days(weekend_counts)
new_weeknight_counts = reassign_latenight_days(weeknight_counts)

In [32]:
# new_weeknight_counts.items()[0]

In [33]:
# new_weekend_counts.items()[0]

##Daily Exits
Accumulate exit counts for each day.

In [99]:
def count_by_day(dct):
    d = {}
    for turnstile, rows in dct.items():
        by_day = {}
        for time, count in rows:
            day = time.date()
            by_day[day] = by_day.get(day, 0) + count
        d[turnstile] = sorted(by_day.items())
    return d
        
daily_weekend_counts = count_by_day(new_weekend_counts)
daily_weeknight_counts = count_by_day(new_weeknight_counts)

In [100]:
daily_weekend_counts.items()[0]

(('A030', 'R083', '01-06-00', '23 ST-5 AVE'),
 [(datetime.date(2015, 3, 27), 44),
  (datetime.date(2015, 3, 28), 114),
  (datetime.date(2015, 4, 3), 122),
  (datetime.date(2015, 4, 4), 98),
  (datetime.date(2015, 4, 10), 110),
  (datetime.date(2015, 4, 11), 105),
  (datetime.date(2015, 4, 17), 157),
  (datetime.date(2015, 4, 18), 142),
  (datetime.date(2015, 4, 24), 131),
  (datetime.date(2015, 4, 25), 118),
  (datetime.date(2015, 5, 1), 124),
  (datetime.date(2015, 5, 2), 134),
  (datetime.date(2015, 5, 8), 365),
  (datetime.date(2015, 5, 9), 292),
  (datetime.date(2015, 5, 15), 134),
  (datetime.date(2015, 5, 16), 127),
  (datetime.date(2015, 5, 22), 126),
  (datetime.date(2015, 5, 23), 101),
  (datetime.date(2015, 5, 29), 123),
  (datetime.date(2015, 5, 30), 132),
  (datetime.date(2015, 6, 5), 130),
  (datetime.date(2015, 6, 6), 106),
  (datetime.date(2015, 6, 12), 156),
  (datetime.date(2015, 6, 13), 146),
  (datetime.date(2015, 6, 19), 169),
  (datetime.date(2015, 6, 20), 67)])

In [101]:
# daily_weeknight_counts.items()[200]

## Accumulate Counts by Station Area

So far we've been operating on a single turnstile level. Next we'll combine turnstiles in the same ControlArea/Unit/Station combo. There are some ControlArea/Unit/Station groups that have a single turnstile, but most have multiple turnstiles-- same value for the C/A, UNIT and STATION columns, different values for the SCP column.

We will combine the numbers together for each ControlArea/UNIT/STATION combo, for each day, to get a count by station area.

In [102]:
def combine_by_stationarea(dct):
    d = {}
    for turnstile, rows in dct.items():
        by_day = {}
        station = (turnstile[:2]) + (turnstile[3],)
        d[station] = d.get(station, [])
        for day, count in rows:
            by_day[day] = by_day.get(day, 0) + count
        for item in by_day.items():
            d[station].append(item)
        d[station] = sorted(d[station])
    return d

weekend_by_stationarea = combine_by_stationarea(daily_weekend_counts)
weeknight_by_stationarea = combine_by_stationarea(daily_weeknight_counts)

In [114]:
def accumulate_by_day(dct):
    d = {}
    for key, vals in dct.items():
        by_day = {}
        for day, count in vals:
            by_day[day] = by_day.get(day, 0) + count
        d[key] = sorted(by_day.items())
    return d
        
weekend_stationarea_counts = accumulate_by_day(weekend_by_stationarea)
weeknight_stationarea_counts = accumulate_by_day(weeknight_by_stationarea)

In [115]:
weekend_stationarea_counts[('A030', 'R083', '23 ST-5 AVE')]

[(datetime.date(2015, 3, 27), 329),
 (datetime.date(2015, 3, 28), 598),
 (datetime.date(2015, 4, 3), 581),
 (datetime.date(2015, 4, 4), 546),
 (datetime.date(2015, 4, 10), 656),
 (datetime.date(2015, 4, 11), 581),
 (datetime.date(2015, 4, 17), 782),
 (datetime.date(2015, 4, 18), 725),
 (datetime.date(2015, 4, 24), 759),
 (datetime.date(2015, 4, 25), 669),
 (datetime.date(2015, 5, 1), 753),
 (datetime.date(2015, 5, 2), 708),
 (datetime.date(2015, 5, 8), 929),
 (datetime.date(2015, 5, 9), 744),
 (datetime.date(2015, 5, 15), 694),
 (datetime.date(2015, 5, 16), 606),
 (datetime.date(2015, 5, 22), 661),
 (datetime.date(2015, 5, 23), 581),
 (datetime.date(2015, 5, 29), 737),
 (datetime.date(2015, 5, 30), 682),
 (datetime.date(2015, 6, 5), 754),
 (datetime.date(2015, 6, 6), 651),
 (datetime.date(2015, 6, 12), 792),
 (datetime.date(2015, 6, 13), 768),
 (datetime.date(2015, 6, 19), 804),
 (datetime.date(2015, 6, 20), 560)]

In [116]:
for key, val in weekend_by_stationarea.items():
    if key[2] == 'BOYD-88 ST':
        print sorted(val)
print len(weekend_by_stationarea.values()[0])

[(datetime.date(2015, 3, 27), 1), (datetime.date(2015, 3, 27), 6), (datetime.date(2015, 3, 27), 10), (datetime.date(2015, 3, 28), 7), (datetime.date(2015, 3, 28), 13), (datetime.date(2015, 3, 28), 17), (datetime.date(2015, 4, 3), 16), (datetime.date(2015, 4, 3), 51), (datetime.date(2015, 4, 3), 174), (datetime.date(2015, 4, 4), 16), (datetime.date(2015, 4, 4), 35), (datetime.date(2015, 4, 4), 141), (datetime.date(2015, 4, 10), 10), (datetime.date(2015, 4, 10), 51), (datetime.date(2015, 4, 10), 202), (datetime.date(2015, 4, 11), 22), (datetime.date(2015, 4, 11), 47), (datetime.date(2015, 4, 11), 182), (datetime.date(2015, 4, 17), 6), (datetime.date(2015, 4, 17), 41), (datetime.date(2015, 4, 17), 96), (datetime.date(2015, 4, 18), 5), (datetime.date(2015, 4, 18), 7), (datetime.date(2015, 4, 18), 15), (datetime.date(2015, 4, 24), 15), (datetime.date(2015, 4, 24), 46), (datetime.date(2015, 4, 24), 135), (datetime.date(2015, 4, 25), 7), (datetime.date(2015, 4, 25), 19), (datetime.date(2015, 

## Counts by Station

Combine everything in each station, and come up with a time series for each STATION, by adding up all the turnstiles in a station.

In [106]:
len(weekend_stationarea_counts.items()), len(weeknight_stationarea_counts.items())

(729, 729)

In [117]:
def combine_by_station(dct):
    d = {}
    for stationarea, rows in dct.items():
        by_day = {}
        station = (stationarea[-1],)
        d[station] = d.get(station, [])
        for day, count in rows:
            by_day[day] = by_day.get(day, 0) + count
        for item in by_day.items():
            d[station].append(item)
        d[station] = sorted(d[station])
    return d
            
weekend_by_station = combine_by_station(weekend_stationarea_counts)
weeknight_by_station = combine_by_station(weeknight_stationarea_counts)

In [118]:
weekend_by_station[('23 ST-5 AVE',)]

[(datetime.date(2015, 3, 27), 313),
 (datetime.date(2015, 3, 27), 329),
 (datetime.date(2015, 3, 28), 547),
 (datetime.date(2015, 3, 28), 598),
 (datetime.date(2015, 4, 3), 581),
 (datetime.date(2015, 4, 3), 1126),
 (datetime.date(2015, 4, 4), 478),
 (datetime.date(2015, 4, 4), 546),
 (datetime.date(2015, 4, 10), 656),
 (datetime.date(2015, 4, 10), 738),
 (datetime.date(2015, 4, 11), 581),
 (datetime.date(2015, 4, 11), 628),
 (datetime.date(2015, 4, 17), 742),
 (datetime.date(2015, 4, 17), 782),
 (datetime.date(2015, 4, 18), 725),
 (datetime.date(2015, 4, 18), 740),
 (datetime.date(2015, 4, 24), 759),
 (datetime.date(2015, 4, 24), 776),
 (datetime.date(2015, 4, 25), 669),
 (datetime.date(2015, 4, 25), 1072),
 (datetime.date(2015, 5, 1), 727),
 (datetime.date(2015, 5, 1), 753),
 (datetime.date(2015, 5, 2), 655),
 (datetime.date(2015, 5, 2), 708),
 (datetime.date(2015, 5, 8), 667),
 (datetime.date(2015, 5, 8), 929),
 (datetime.date(2015, 5, 9), 540),
 (datetime.date(2015, 5, 9), 744),
 (

In [119]:
weekend_station_counts = accumulate_by_day(weekend_by_station)
weeknight_station_counts = accumulate_by_day(weeknight_by_station)

In [120]:
weekend_station_counts[('23 ST-5 AVE',)]

[(datetime.date(2015, 3, 27), 642),
 (datetime.date(2015, 3, 28), 1145),
 (datetime.date(2015, 4, 3), 1707),
 (datetime.date(2015, 4, 4), 1024),
 (datetime.date(2015, 4, 10), 1394),
 (datetime.date(2015, 4, 11), 1209),
 (datetime.date(2015, 4, 17), 1524),
 (datetime.date(2015, 4, 18), 1465),
 (datetime.date(2015, 4, 24), 1535),
 (datetime.date(2015, 4, 25), 1741),
 (datetime.date(2015, 5, 1), 1480),
 (datetime.date(2015, 5, 2), 1363),
 (datetime.date(2015, 5, 8), 1596),
 (datetime.date(2015, 5, 9), 1284),
 (datetime.date(2015, 5, 15), 1383),
 (datetime.date(2015, 5, 16), 1305),
 (datetime.date(2015, 5, 22), 1349),
 (datetime.date(2015, 5, 23), 1238),
 (datetime.date(2015, 5, 29), 1431),
 (datetime.date(2015, 5, 30), 1287),
 (datetime.date(2015, 6, 5), 1479),
 (datetime.date(2015, 6, 6), 1366),
 (datetime.date(2015, 6, 12), 1518),
 (datetime.date(2015, 6, 13), 1399),
 (datetime.date(2015, 6, 19), 1598),
 (datetime.date(2015, 6, 20), 1197)]

## Means by Day of the Week

Accumulate the counts by each day of the week and find their mean.

In [101]:
### pop off leading and trailing values that don't match up first



In [170]:
def means_dayofweek(dct):
    d = {}
    f = []
    # find daily totals
    for station, rows in dct.items():
        by_dayofweek = {}
        d[station] = d.get(station, [])
        for day, count in rows:
            by_dayofweek[day.weekday()] = by_dayofweek.get(day.weekday(), 0) + count
            f.append(station + (day.weekday(), ))
        d[station] = sorted(by_dayofweek.items())
    # find daily averages
    counter = Counter(f)
    e = {}
    for station, rows in d.items():
        e[station] = e.get(station, [])
        for weekday, total in rows:
            day_mean = total / counter[station + (weekday,)]
            e[station].append((weekday, total, day_mean))
    return e, counter

weekend_dayofweek_means, counter = means_dayofweek(weekend_station_counts)
weekday_dayofweek_means, counter = means_dayofweek(weeknight_station_counts)

In [171]:
print weekend_dayofweek_means[('23 ST-5 AVE',)]
print weekday_dayofweek_means[('23 ST-5 AVE',)]

[(4, 18636, 1433.5384615384614), (5, 17023, 1309.4615384615386)]
[(0, 29358, 2258.3076923076924), (1, 29973, 2305.6153846153848), (2, 30505, 2346.5384615384614)]


In [172]:
sort_by_value(weekend_dayofweek_means)[:10]

[(('34 ST-PENN STA',),
  [(4, 244160, 18781.53846153846), (5, 208895, 16068.846153846154)]),
 (('34 ST-HERALD SQ',),
  [(4, 205413, 15801.0), (5, 174454, 13419.538461538461)]),
 (('86 ST',),
  [(4, 198337, 15256.692307692309), (5, 178354, 13719.538461538461)]),
 (('42 ST-TIMES SQ',),
  [(4, 193250, 14865.384615384615), (5, 197879, 15221.461538461539)]),
 (('MAIN ST',), [(4, 171717, 13209.0), (5, 130181, 10013.923076923076)]),
 (('42 ST-PA BUS TE',),
  [(4, 168604, 12969.538461538461), (5, 150910, 11608.461538461539)]),
 (('125 ST',), [(4, 158405, 12185.0), (5, 155961, 11997.0)]),
 (('42 ST-GRD CNTRL',),
  [(4, 157107, 12085.153846153846), (5, 142128, 10932.923076923076)]),
 (('ROOSEVELT AVE',),
  [(4, 156770, 12059.23076923077), (5, 146556, 11273.538461538461)]),
 (('145 ST',),
  [(4, 126540, 9733.846153846154), (5, 124203, 9554.076923076924)])]

## Means for Weekend and Weeknight

In [184]:
def weekpart_station_counts(dct):
    d = {}    
    for station, rows in dct.items():
        count = 0
        for _, _, day_mean in rows:
            count += day_mean
        weekpart_mean = count / len(rows)
        d[station] = weekpart_mean
    return d
        
weekend_means = weekpart_station_counts(weekend_dayofweek_means)
weeknight_means = weekpart_station_counts(weekday_dayofweek_means)

In [185]:
print 'WEEKNIGHT:', sort_by_value(weeknight_means)[:10]
print '\nWEEKEND:', sort_by_value(weekend_means)[:10]

WEEKNIGHT: [(('34 ST-PENN STA',), 21205.564102564105), (('34 ST-HERALD SQ',), 19221.358974358973), (('86 ST',), 18716.82051282051), (('42 ST-GRD CNTRL',), 16989.69230769231), (('42 ST-TIMES SQ',), 16443.564102564105), (('MAIN ST',), 14396.02564102564), (('125 ST',), 13025.256410256408), (('42 ST-PA BUS TE',), 12096.82051282051), (('59 ST',), 11812.79487179487), (('ROOSEVELT AVE',), 11731.564102564103)]

WEEKEND: [(('34 ST-PENN STA',), 17425.19230769231), (('42 ST-TIMES SQ',), 15043.423076923078), (('34 ST-HERALD SQ',), 14610.26923076923), (('86 ST',), 14488.115384615385), (('42 ST-PA BUS TE',), 12289.0), (('125 ST',), 12091.0), (('ROOSEVELT AVE',), 11666.384615384615), (('MAIN ST',), 11611.461538461539), (('42 ST-GRD CNTRL',), 11509.038461538461), (('145 ST',), 9643.961538461539)]


## Make Comparison
In order to make a comparison between stations, we looked at the proportion of weekend to weeknight traffic. To control for low traffic stations, I looked at stations with at least a mean of 500 exits on weekends.

In [182]:
def make_comparison(dct1, dct2):
    d = {}
    for station2, means2 in dct2.items():
        if means2 > 0:
            means1 = dct1.get(station2)
            if means1 > 500:
                d[station2] = means1 / means2
    return d

proportions = make_comparison(weekend_means, weeknight_means)

In [183]:
sort_by_value(proportions)[:20]

[(('METS-WILLETS PT',), 2.088525415198792),
 (('BROADWAY/LAFAY',), 1.6945567469385363),
 (('ASTOR PLACE',), 1.6671098536419333),
 (('W 4 ST-WASH SQ',), 1.6629219178936858),
 (('14 ST-UNION SQ',), 1.6301470886726837),
 (('ESSEX ST',), 1.609049858889934),
 (('CITY / BUS',), 1.5590961516822326),
 (('QUEENS PLAZA',), 1.441714114474078),
 (('DELANCEY ST',), 1.4395981775828997),
 (('NEW UTRECHT AVE',), 1.4108612385756003),
 (('HOUSTON ST',), 1.4057795466343659),
 (('BROADWAY',), 1.3864628820960698),
 (('36 ST',), 1.3860922623744147),
 (('INWOOD-207 ST',), 1.3837110395528285),
 (('8 AVE',), 1.3555091842618272),
 (('7 AVE-53 ST',), 1.3387600189767537),
 (('WHITEHALL ST',), 1.3342195024077046),
 (('28 ST',), 1.330513032685147),
 (('MARCY AVE',), 1.3226495302040087),
 (('E 180 ST',), 1.3126671044608096)]