### Benson Team Awesome

C/A,UNIT,SCP,STATION,LINENAME,DIVISION,DATE,TIME,DESC,ENTRIES,EXITS


|Code      | explanation                                                                                     |
|:-------- |:----------------------------------------------------------------------------------------------- | 
| C/A      | Control Area (A002)                                                                          | 
| UNIT     | Remote Unit for a station (R051)                                                             | 
| SCP      | Subunit Channel Position represents an specific address for a device (02-00-00)              | 
| STATION  | Represents the station name the device is located at                                         | 
| LINENAME | Represents all train lines that can be boarded at this station                               | 
|          |   Normally lines are represented by one character.  LINENAME 456NQR repersents train server for | 
|          |   4, 5, 6, N, Q, and R trains.                                                                  | 
| DIVISION | Represents the Line originally the station belonged to BMT, IRT, or IND                      | 
| DATE     | Represents the date (MM-DD-YY)                                                               |   
| TIME     | Represents the time (hh:mm:ss) for a scheduled audit event                                   | 
| DESc     | Represent the "REGULAR" scheduled audit event (Normally occurs every 4 hours)                | 
|          |   1. Audits may occur more that 4 hours due to planning, or troubleshooting activities.         | 
|          |   2. Additionally, there may be a "RECOVR AUD" entry: This refers to a missed audit that was recovered. |
|ENTRIES   | The comulative entry register value for a device|
|EXIST     | The cumulative exit register value for a device|


In [2]:
import csv
import urllib
import io
import pandas as pd
import numpy as np
import multiprocessing as mlp
data = pd.read_csv('http://web.mta.info/developers/data/nyct/turnstile/turnstile_160326.txt')

In [3]:
data.head()

Unnamed: 0,C/A,UNIT,SCP,STATION,LINENAME,DIVISION,DATE,TIME,DESC,ENTRIES,EXITS
0,A002,R051,02-00-00,59 ST,NQR456,BMT,03/19/2016,00:00:00,REGULAR,5590801,1889027
1,A002,R051,02-00-00,59 ST,NQR456,BMT,03/19/2016,04:00:00,REGULAR,5590828,1889032
2,A002,R051,02-00-00,59 ST,NQR456,BMT,03/19/2016,08:00:00,REGULAR,5590852,1889059
3,A002,R051,02-00-00,59 ST,NQR456,BMT,03/19/2016,12:00:00,REGULAR,5590958,1889162
4,A002,R051,02-00-00,59 ST,NQR456,BMT,03/19/2016,16:00:00,REGULAR,5591207,1889248


In [4]:
data.columns

Index([u'C/A', u'UNIT', u'SCP', u'STATION', u'LINENAME', u'DIVISION', u'DATE',
       u'TIME', u'DESC', u'ENTRIES',
       u'EXITS                                                               '],
      dtype='object')

In [5]:
data.rename(columns={data.columns[-1]: data.columns[-1].replace(' ','')}, inplace=True)
data.columns

Index([u'C/A', u'UNIT', u'SCP', u'STATION', u'LINENAME', u'DIVISION', u'DATE',
       u'TIME', u'DESC', u'ENTRIES', u'EXITS'],
      dtype='object')

In [6]:
#data[mask, 'Delta'] = abs((data[mask,'ENTRIES'] - data[mask,'ENTRIES'].shift(-1)) + (data[mask,'EXITS'] - data[mask,'EXITS'].shift(-1)))
data.head()

Unnamed: 0,C/A,UNIT,SCP,STATION,LINENAME,DIVISION,DATE,TIME,DESC,ENTRIES,EXITS
0,A002,R051,02-00-00,59 ST,NQR456,BMT,03/19/2016,00:00:00,REGULAR,5590801,1889027
1,A002,R051,02-00-00,59 ST,NQR456,BMT,03/19/2016,04:00:00,REGULAR,5590828,1889032
2,A002,R051,02-00-00,59 ST,NQR456,BMT,03/19/2016,08:00:00,REGULAR,5590852,1889059
3,A002,R051,02-00-00,59 ST,NQR456,BMT,03/19/2016,12:00:00,REGULAR,5590958,1889162
4,A002,R051,02-00-00,59 ST,NQR456,BMT,03/19/2016,16:00:00,REGULAR,5591207,1889248


In [10]:
#Create the keys that is used to make a mask

data['id'] = data.iloc[:, 0]
for j in range(1,4):
        data.ix[:,'id'] += ',' + data.iloc[:,j]
        
#drop the keys that is 
mask_key = data['id'].drop_duplicates()
print(data.head())

    C/A  UNIT       SCP STATION LINENAME DIVISION        DATE      TIME  \
0  A002  R051  02-00-00   59 ST   NQR456      BMT  03/19/2016  00:00:00   
1  A002  R051  02-00-00   59 ST   NQR456      BMT  03/19/2016  04:00:00   
2  A002  R051  02-00-00   59 ST   NQR456      BMT  03/19/2016  08:00:00   
3  A002  R051  02-00-00   59 ST   NQR456      BMT  03/19/2016  12:00:00   
4  A002  R051  02-00-00   59 ST   NQR456      BMT  03/19/2016  16:00:00   

      DESC  ENTRIES    EXITS                        id  
0  REGULAR  5590801  1889027  A002,R051,02-00-00,59 ST  
1  REGULAR  5590828  1889032  A002,R051,02-00-00,59 ST  
2  REGULAR  5590852  1889059  A002,R051,02-00-00,59 ST  
3  REGULAR  5590958  1889162  A002,R051,02-00-00,59 ST  
4  REGULAR  5591207  1889248  A002,R051,02-00-00,59 ST  


In [11]:
#Initialize the Delta columns as zeros
data['Delta'] = np.zeros(data.shape[0])

#temporary function to fill in the deltas by selections
def fill_in(mask):
    data.loc[mask,'Delta'] = abs((data.loc[mask, 'ENTRIES'] - data.loc[mask, 'ENTRIES'].shift(-1)) + (data.loc[mask, 'EXITS'] - data.loc[mask, 'EXITS'].shift(-1)))

In [12]:
for s in mask_key:
    mask = data.ix[:,'id'] == s
    fill_in(mask)

In [14]:
#print(data.head(20))
data.head(50)

Unnamed: 0,C/A,UNIT,SCP,STATION,LINENAME,DIVISION,DATE,TIME,DESC,ENTRIES,EXITS,id,Delta
0,A002,R051,02-00-00,59 ST,NQR456,BMT,03/19/2016,00:00:00,REGULAR,5590801,1889027,"A002,R051,02-00-00,59 ST",32.0
1,A002,R051,02-00-00,59 ST,NQR456,BMT,03/19/2016,04:00:00,REGULAR,5590828,1889032,"A002,R051,02-00-00,59 ST",51.0
2,A002,R051,02-00-00,59 ST,NQR456,BMT,03/19/2016,08:00:00,REGULAR,5590852,1889059,"A002,R051,02-00-00,59 ST",209.0
3,A002,R051,02-00-00,59 ST,NQR456,BMT,03/19/2016,12:00:00,REGULAR,5590958,1889162,"A002,R051,02-00-00,59 ST",335.0
4,A002,R051,02-00-00,59 ST,NQR456,BMT,03/19/2016,16:00:00,REGULAR,5591207,1889248,"A002,R051,02-00-00,59 ST",406.0
5,A002,R051,02-00-00,59 ST,NQR456,BMT,03/19/2016,20:00:00,REGULAR,5591533,1889328,"A002,R051,02-00-00,59 ST",200.0
6,A002,R051,02-00-00,59 ST,NQR456,BMT,03/20/2016,00:00:00,REGULAR,5591699,1889362,"A002,R051,02-00-00,59 ST",30.0
7,A002,R051,02-00-00,59 ST,NQR456,BMT,03/20/2016,04:00:00,REGULAR,5591721,1889370,"A002,R051,02-00-00,59 ST",27.0
8,A002,R051,02-00-00,59 ST,NQR456,BMT,03/20/2016,08:00:00,REGULAR,5591731,1889387,"A002,R051,02-00-00,59 ST",132.0
9,A002,R051,02-00-00,59 ST,NQR456,BMT,03/20/2016,12:00:00,REGULAR,5591799,1889451,"A002,R051,02-00-00,59 ST",258.0


In [26]:
# df_night = data[[data['TIME'] == '00:00:00' & (data['Delta'] >= 0 | data['Delta'] <= 5000)]]
df_night = data[data['Delta'] >= 0 | data['Delta'] <= 5000]

TypeError: cannot compare a dtyped [float64] array with a scalar of type [bool]

In [None]:
df_night.head()

In [40]:
import gmaps

In [44]:
data = gmaps.datasets.load_dataset('taxi_rides')
map = gmaps.heatmap(data)
gmaps.display(map)