In [4]:
from __future__ import print_function, division

In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt
%matplotlib inline

In [6]:
# Source: http://web.mta.info/developers/turnstile.html
def get_data(week_nums):
    url = "http://web.mta.info/developers/data/nyct/turnstile/turnstile_{}.txt"
    dfs = []
    for week_num in week_nums:
        file_url = url.format(week_num)
        dfs.append(pd.read_csv(file_url))
    return pd.concat(dfs)
        
week_nums = [190907, 190914, 190921, 190928, 191005, 191012, 191019, 191026, 191102, 191109, 191116, 191123, 191130]
turnstiles_df = get_data(week_nums)

In [7]:
turnstiles_df.columns = [column.strip() for column in turnstiles_df.columns]

In [8]:
turnstiles_df["DATE_TIME"] = pd.to_datetime(turnstiles_df.DATE + " " + turnstiles_df.TIME, 
                                            format="%m/%d/%Y %H:%M:%S")

In [9]:
# Get rid of the duplicate entries
turnstiles_df.sort_values(["C/A", "UNIT", "SCP", "STATION", "DATE_TIME"], 
                          inplace=True, ascending=False)
turnstiles_df.drop_duplicates(subset=["C/A", "UNIT", "SCP", "STATION", "DATE_TIME"], inplace=True)

In [10]:
# Sanity Check to verify that "C/A", "UNIT", "SCP", "STATION", "DATE_TIME" is unique
(turnstiles_df
 .groupby(["C/A", "UNIT", "SCP", "STATION", "DATE_TIME"])
 .ENTRIES.count()
 .reset_index()
 .sort_values("ENTRIES", ascending=False)).head(5)

Unnamed: 0,C/A,UNIT,SCP,STATION,DATE_TIME,ENTRIES
0,A002,R051,02-00-00,59 ST,2019-08-31 00:00:00,1
1784129,R138,R293,00-02-05,34 ST-PENN STA,2019-09-11 06:00:00,1
1784111,R138,R293,00-02-05,34 ST-PENN STA,2019-09-08 06:00:00,1
1784112,R138,R293,00-02-05,34 ST-PENN STA,2019-09-08 10:00:00,1
1784113,R138,R293,00-02-05,34 ST-PENN STA,2019-09-08 14:00:00,1


In [11]:
# Drop Exits and Desc Column.  To prevent errors in multiple run of cell, errors on drop is ignored
turnstiles_df = turnstiles_df.drop(["EXITS", "DESC"], axis=1, errors="ignore")

In [12]:
turnstiles_daily = (turnstiles_df
                        .groupby(["C/A", "UNIT", "SCP", "STATION", "LINENAME", "DATE"],as_index=False)
                        .ENTRIES.first())

In [13]:
turnstiles_daily.head()

Unnamed: 0,C/A,UNIT,SCP,STATION,LINENAME,DATE,ENTRIES
0,A002,R051,02-00-00,59 ST,NQR456W,08/31/2019,7183842
1,A002,R051,02-00-00,59 ST,NQR456W,09/01/2019,7184559
2,A002,R051,02-00-00,59 ST,NQR456W,09/02/2019,7185132
3,A002,R051,02-00-00,59 ST,NQR456W,09/03/2019,7186355
4,A002,R051,02-00-00,59 ST,NQR456W,09/04/2019,7187672


In [14]:
turnstiles_daily[["PREV_DATE", "PREV_ENTRIES"]] = (turnstiles_daily
                                                       .groupby(["C/A", "UNIT", "SCP", "STATION", "LINENAME"])["DATE", "ENTRIES"]
                                                       .apply(lambda grp: grp.shift(1)))

  


In [15]:
turnstiles_daily.dropna(subset=["PREV_DATE"], axis=0, inplace=True)

In [16]:
turnstiles_daily[turnstiles_daily["ENTRIES"] < turnstiles_daily["PREV_ENTRIES"]].head()

Unnamed: 0,C/A,UNIT,SCP,STATION,LINENAME,DATE,ENTRIES,PREV_DATE,PREV_ENTRIES
3530,A011,R080,01-03-00,57 ST-7 AV,NQRW,09/01/2019,885822693,08/31/2019,885823072.0
3531,A011,R080,01-03-00,57 ST-7 AV,NQRW,09/02/2019,885822365,09/01/2019,885822693.0
3532,A011,R080,01-03-00,57 ST-7 AV,NQRW,09/03/2019,885821627,09/02/2019,885822365.0
3533,A011,R080,01-03-00,57 ST-7 AV,NQRW,09/04/2019,885820824,09/03/2019,885821627.0
3534,A011,R080,01-03-00,57 ST-7 AV,NQRW,09/05/2019,885819992,09/04/2019,885820824.0


In [17]:
def get_daily_counts(row, max_counter):
    counter = row["ENTRIES"] - row["PREV_ENTRIES"]
    if counter < 0:
        counter = -counter
    if counter > max_counter:
        print(row["ENTRIES"], row["PREV_ENTRIES"])
        return 0
    return counter

# If counter is > 1Million, then the counter might have been reset.  
# Just set it to zero as different counters have different cycle limits
_ = turnstiles_daily.apply(get_daily_counts, axis=1, max_counter=1000000)

12 6170034.0
263 3846402.0
29 3367836.0
26 1083967.0
864 2152170.0
111 1599317.0
460501 6804909.0
73 6697765.0
875479599 775219651.0
590597 9156558.0
88 2728627.0
94 4283110.0
1041 2210879.0
98 1478936.0
116 6197971.0
66 1121336148.0
82 2025847506.0
1451 4711879.0
973 6844908.0
126 6053205.0
196612 50345125.0
589824 50331648.0
5421 10392294.0
2157 6026406.0
458752 117440512.0
107 1068192.0
2567 152966689.0
2204 19783118.0
458752 117440512.0
24 2781046.0
3728 1842366.0
2069 4443445.0
178 6625781.0
4982302 877264.0
1421 2254274.0
721441460 2880424.0
516 3275592.0
610 2460400.0
14 4584512.0
26 153536031.0
18 118621565.0
67122955 103584.0
218 67122955.0
458846 2297063.0
672 5491989.0
235398290 9934.0
83886083 117440614.0
10 5679392.0
1145 4459356.0
6817688 3603909.0
401 3071806.0
123785342 117604971.0
636 2209128.0
31260681 48404.0
1507856 4609.0
2053 3512072.0
483 4777140.0
102459350 210849.0
248 3782823.0
248 9852706.0
1 134218317.0
2319 22554564.0
190 1449873.0
3480 5391681.0
845 172975

In [18]:
def get_daily_counts(row, max_counter):
    counter = row["ENTRIES"] - row["PREV_ENTRIES"]
    if counter < 0:
        # Maybe counter is reversed?
        counter = -counter
    if counter > max_counter:
        # Maybe counter was reset to 0? 
        print(row["ENTRIES"], row["PREV_ENTRIES"])
        counter = min(row["ENTRIES"], row["PREV_ENTRIES"])
    if counter > max_counter:
        # Check it again to make sure we're not still giving a counter that's too big
        return 0
    return counter

# If counter is > 1Million, then the counter might have been reset.  
# Just set it to zero as different counters have different cycle limits
# It'd probably be a good idea to use a number even significantly smaller than 1 million as the limit!
turnstiles_daily["DAILY_ENTRIES"] = turnstiles_daily.apply(get_daily_counts, axis=1, max_counter=1000000)

12 6170034.0
263 3846402.0
29 3367836.0
26 1083967.0
864 2152170.0
111 1599317.0
460501 6804909.0
73 6697765.0
875479599 775219651.0
590597 9156558.0
88 2728627.0
94 4283110.0
1041 2210879.0
98 1478936.0
116 6197971.0
66 1121336148.0
82 2025847506.0
1451 4711879.0
973 6844908.0
126 6053205.0
196612 50345125.0
589824 50331648.0
5421 10392294.0
2157 6026406.0
458752 117440512.0
107 1068192.0
2567 152966689.0
2204 19783118.0
458752 117440512.0
24 2781046.0
3728 1842366.0
2069 4443445.0
178 6625781.0
4982302 877264.0
1421 2254274.0
721441460 2880424.0
516 3275592.0
610 2460400.0
14 4584512.0
26 153536031.0
18 118621565.0
67122955 103584.0
218 67122955.0
458846 2297063.0
672 5491989.0
235398290 9934.0
83886083 117440614.0
10 5679392.0
1145 4459356.0
6817688 3603909.0
401 3071806.0
123785342 117604971.0
636 2209128.0
31260681 48404.0
1507856 4609.0
2053 3512072.0
483 4777140.0
102459350 210849.0
248 3782823.0
248 9852706.0
1 134218317.0
2319 22554564.0
190 1449873.0
3480 5391681.0
845 172975

In [19]:
turnstiles_daily

Unnamed: 0,C/A,UNIT,SCP,STATION,LINENAME,DATE,ENTRIES,PREV_DATE,PREV_ENTRIES,DAILY_ENTRIES
1,A002,R051,02-00-00,59 ST,NQR456W,09/01/2019,7184559,08/31/2019,7183842.0,717.0
2,A002,R051,02-00-00,59 ST,NQR456W,09/02/2019,7185132,09/01/2019,7184559.0,573.0
3,A002,R051,02-00-00,59 ST,NQR456W,09/03/2019,7186355,09/02/2019,7185132.0,1223.0
4,A002,R051,02-00-00,59 ST,NQR456W,09/04/2019,7187672,09/03/2019,7186355.0,1317.0
5,A002,R051,02-00-00,59 ST,NQR456W,09/05/2019,7189025,09/04/2019,7187672.0,1353.0
...,...,...,...,...,...,...,...,...,...,...
444786,TRAM2,R469,00-05-01,RIT-ROOSEVELT,R,11/25/2019,5554,11/24/2019,5554.0,0.0
444787,TRAM2,R469,00-05-01,RIT-ROOSEVELT,R,11/26/2019,5554,11/25/2019,5554.0,0.0
444788,TRAM2,R469,00-05-01,RIT-ROOSEVELT,R,11/27/2019,5554,11/26/2019,5554.0,0.0
444789,TRAM2,R469,00-05-01,RIT-ROOSEVELT,R,11/28/2019,5554,11/27/2019,5554.0,0.0


In [20]:
stations_total = (turnstiles_daily
                        .groupby(["STATION","LINENAME"],as_index=False).agg("sum")).sort_values("DAILY_ENTRIES", ascending = False).reset_index()
                        #.DAILY_ENTRIES.sum())

In [21]:
stations_total

Unnamed: 0,index,STATION,LINENAME,ENTRIES,PREV_ENTRIES,DAILY_ENTRIES
0,311,GRD CNTRL-42 ST,4567S,226865326108,2.268583e+11,12458229.0
1,89,34 ST-HERALD SQ,BDFMNQRW,416207405402,4.161986e+11,9979861.0
2,102,42 ST-PORT AUTH,ACENQRS1237W,667469074088,6.674638e+11,7175885.0
3,403,PATH NEW WTC,1,414635697,4.136185e+08,6878013.0
4,93,34 ST-PENN STA,ACE,150330038840,1.503246e+11,6548407.0
...,...,...,...,...,...,...
472,449,TOMPKINSVILLE,1,216784555,2.167347e+08,49882.0
473,203,BEACH 98 ST,AS,452779551,4.527428e+08,36755.0
474,220,BROAD CHANNEL,AS,130029958,1.300058e+08,24181.0
475,196,BEACH 105 ST,AS,10704291634,1.070427e+10,18738.0


In [63]:
stations_zips = pd.read_csv("sub_st_zip.csv")

In [64]:
stations_zips.rename(columns={'Stop Name':'STATION', "Zip Code":"ZipCode"}, inplace = True)

In [65]:
stations_zips.STATION = stations_zips.STATION.str.upper()

In [66]:
manh_stations=stations_zips.STATION.to_list()

In [67]:
len(manh_stations)

153

In [68]:
manh_stations_total = stations_total[stations_total["STATION"].isin(manh_stations)]

In [69]:
manh_stations_total

Unnamed: 0,index,STATION,LINENAME,ENTRIES,PREV_ENTRIES,DAILY_ENTRIES
8,301,FULTON ST,2345ACJZ,141641492745,1.416370e+11,5303970.0
14,234,CANAL ST,JNQRZ6W,275657025992,2.756537e+11,3935409.0
15,138,72 ST,123,68243510585,6.824090e+10,3607329.0
16,156,86 ST,456,119545892760,1.195424e+11,3518649.0
18,121,59 ST,456NQRW,151409177842,1.521094e+11,3328368.0
...,...,...,...,...,...,...
406,30,145 ST,3,1333200594,1.332932e+09,268241.0
414,114,50 ST,D,1145424833,1.145171e+09,253611.0
425,36,155 ST,C,1479960900,1.479730e+09,230640.0
438,158,86 ST,N,451371746,4.511787e+08,193073.0


In [70]:
zip_dict  = pd.Series(stations_zips.ZipCode.values,index=stations_zips.STATION).to_dict()

In [71]:
manh_stations_total["ZipCode"] = manh_stations_total["STATION"].map(zip_dict)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [72]:
manh_stations_total

Unnamed: 0,index,STATION,LINENAME,ENTRIES,PREV_ENTRIES,DAILY_ENTRIES,ZipCode
8,301,FULTON ST,2345ACJZ,141641492745,1.416370e+11,5303970.0,10038
14,234,CANAL ST,JNQRZ6W,275657025992,2.756537e+11,3935409.0,10013
15,138,72 ST,123,68243510585,6.824090e+10,3607329.0,10021
16,156,86 ST,456,119545892760,1.195424e+11,3518649.0,10028
18,121,59 ST,456NQRW,151409177842,1.521094e+11,3328368.0,10022
...,...,...,...,...,...,...,...
406,30,145 ST,3,1333200594,1.332932e+09,268241.0,10031
414,114,50 ST,D,1145424833,1.145171e+09,253611.0,10019
425,36,155 ST,C,1479960900,1.479730e+09,230640.0,10018
438,158,86 ST,N,451371746,4.511787e+08,193073.0,10028


In [30]:
stations_elev = pd.read_csv("Elevator_yes_no.csv")

In [73]:
manh_stations_total["STATIONLINE"] = manh_stations_total.STATION+manh_stations_total.LINENAME

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [77]:
pd.set_option('display.max_rows', 200)

In [80]:
manh_stations_total.sort_values("STATIONLINE")

Unnamed: 0,index,STATION,LINENAME,ENTRIES,PREV_ENTRIES,DAILY_ENTRIES,ZipCode,STATIONLINE
117,0,1 AV,L,108210509971,108209200000.0,1298171.0,10009,1 AVL
171,1,103 ST,1,5901511168,5900530000.0,981372.0,10025,103 ST1
160,2,103 ST,6,4361693836,4360641000.0,1052643.0,10025,103 ST6
347,3,103 ST,BC,2747657700,2747271000.0,386482.0,10025,103 STBC
192,7,110 ST,6,3525627318,3524766000.0,861425.0,10029,110 ST6
127,11,116 ST,23,2694787170,2694275000.0,1223427.0,10035,116 ST23
82,12,116 ST,6,2748222208,2747402000.0,1545564.0,10035,116 ST6
252,13,116 ST,BC,32702585905,32701990000.0,600657.0,10035,116 STBC
237,16,125 ST,1,59404208111,59403560000.0,648191.0,10027,125 ST1
129,17,125 ST,23,226436618298,226435700000.0,1217065.0,10027,125 ST23


In [75]:
stations_elev.STATION = stations_elev.STATION.str.upper()

In [57]:
stations_elev["STATIONLINE"] = stations_elev.STATION+stations_elev.LINENAME

In [81]:
stations_elev.sort_values("STATIONLINE")

Unnamed: 0,STATION,LINENAME,Elevator,STATIONLINE
0,1 AV,L,0,1 AVL
2,103 ST,1,0,103 ST1
3,103 ST,6,0,103 ST6
1,103 ST,BC,0,103 STBC
4,110 ST,6,0,110 ST6
8,116 ST - COLUMBIA UNIVERSITY,1,0,116 ST - COLUMBIA UNIVERSITY1
7,116 ST,23,0,116 ST23
6,116 ST,6,0,116 ST6
5,116 ST,BC,0,116 STBC
11,125 ST,1,0,125 ST1


In [51]:
elev_dict  = pd.Series(stations_elev.Elevator.values,index=[stations_elev.STATIONLINE]).to_dict()

In [52]:
elev_dict

{('1 AVL',): 0,
 ('103 STBC',): 0,
 ('103 ST1',): 0,
 ('103 ST6',): 0,
 ('110 ST6',): 0,
 ('116 STBC',): 0,
 ('116 ST6',): 0,
 ('116 ST23',): 0,
 ('116 ST - COLUMBIA UNIVERSITY1',): 0,
 ('125 ST4/5/2006',): 1,
 ('125 STA/C/B/D',): 1,
 ('125 ST1',): 0,
 ('125 ST23',): 0,
 ('135 ST3',): 1,
 ('135 STBC',): 0,
 ('137 ST - CITY COLLEGE1',): 0,
 ('14 STFM',): 0,
 ('14 ST123',): 0,
 ('14 ST - UNION SQ456',): 0,
 ('14 ST-UNION SQL/N/Q/R/W',): 1,
 ('14 ST/8 AVA/C/E/L',): 1,
 ('145 STAC',): 0,
 ('145 STBD',): 0,
 ('145 ST1',): 0,
 ('145 ST3',): 0,
 ('155 STC',): 0,
 ('155 STBD',): 0,
 ('157 ST1',): 0,
 ('163 ST - AMSTERDAM AVC',): 0,
 ('168 STA/C only  1 is not accessible',): 1,
 ('168 ST - WASHINGTON HTS1',): 0,
 ('175 STA',): 1,
 ('18 ST1',): 0,
 ('181 STA',): 0,
 ('181 ST1',): 0,
 ('190 STA',): 0,
 ('191 ST1',): 0,
 ('2 AVF',): 0,
 ('207 ST1',): 0,
 ('215 ST1',): 0,
 ('23 ST6',): 1,
 ('23 STRW',): 0,
 ('23 STCE',): 0,
 ('23 STFM',): 0,
 ('23 ST1',): 0,
 ('28 ST6 downtown only',): 1,
 ('28 STR

In [53]:
manh_stations_total["Elev"] = manh_stations_total["STATIONLINE"].map(elev_dict)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [55]:
manh_stations_total

Unnamed: 0,index,STATION,LINENAME,ENTRIES,PREV_ENTRIES,DAILY_ENTRIES,ZipCode,STATIONLINE,Elev
8,301,FULTON ST,2345ACJZ,141641492745,1.416370e+11,5303970.0,10038,FULTON ST2345ACJZ,
14,234,CANAL ST,JNQRZ6W,275657025992,2.756537e+11,3935409.0,10013,CANAL STJNQRZ6W,
15,138,72 ST,123,68243510585,6.824090e+10,3607329.0,10021,72 ST123,
16,156,86 ST,456,119545892760,1.195424e+11,3518649.0,10028,86 ST456,
18,121,59 ST,456NQRW,151409177842,1.521094e+11,3328368.0,11234,59 ST456NQRW,
...,...,...,...,...,...,...,...,...,...
406,30,145 ST,3,1333200594,1.332932e+09,268241.0,10031,145 ST3,
414,114,50 ST,D,1145424833,1.145171e+09,253611.0,10019,50 STD,
425,36,155 ST,C,1479960900,1.479730e+09,230640.0,10018,155 STC,
438,158,86 ST,N,451371746,4.511787e+08,193073.0,10028,86 STN,


In [47]:
manh_stations_total

Unnamed: 0,index,STATION,LINENAME,ENTRIES,PREV_ENTRIES,DAILY_ENTRIES,ZipCode,Elev
8,301,FULTON ST,2345ACJZ,141641492745,1.416370e+11,5303970.0,10038,1.0
14,234,CANAL ST,JNQRZ6W,275657025992,2.756537e+11,3935409.0,10013,1.0
15,138,72 ST,123,68243510585,6.824090e+10,3607329.0,10021,1.0
16,156,86 ST,456,119545892760,1.195424e+11,3518649.0,10028,1.0
18,121,59 ST,456NQRW,151409177842,1.521094e+11,3328368.0,11234,
...,...,...,...,...,...,...,...,...
406,30,145 ST,3,1333200594,1.332932e+09,268241.0,10031,
414,114,50 ST,D,1145424833,1.145171e+09,253611.0,10019,1.0
425,36,155 ST,C,1479960900,1.479730e+09,230640.0,10018,
438,158,86 ST,N,451371746,4.511787e+08,193073.0,10028,1.0


In [None]:
#Map disability and age #s 

In [48]:
age_disability = pd.read_csv("zip_code_age_disability_df.csv")

In [49]:
age_disability

Unnamed: 0,ZipCode,aged,disabled
0,10001,5297.0,3475.0
1,10002,23825.0,24260.0
2,10003,11020.0,5624.0
3,10004,540.0,169.0
4,10005,669.0,154.0
5,10006,377.0,0.0
6,10007,1231.0,170.0
7,10009,13667.0,13695.0
8,10010,7523.0,4206.0
9,10011,13260.0,7709.0


In [50]:
aged_dict  = pd.Series(age_disability.aged.values,index=age_disability.ZipCode).to_dict()

In [51]:
manh_stations_total["aged"] = manh_stations_total["ZipCode"].map(aged_dict)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [52]:
disabled_dict = pd.Series(age_disability.disabled.values,index=age_disability.ZipCode).to_dict()

In [55]:
manh_stations_total["disabled"] = manh_stations_total["ZipCode"].map(disabled_dict)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [56]:
manh_stations_total["aged_scaled"] = manh_stations_total["aged"]/max(manh_stations_total.aged)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [57]:
manh_stations_total["disabled_scaled"] = manh_stations_total["disabled"]/max(manh_stations_total.disabled)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [58]:
manh_stations_total["entries_scaled"] = manh_stations_total["DAILY_ENTRIES"]/max(manh_stations_total.DAILY_ENTRIES)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [59]:
manh_stations_total["Neediness"] = (manh_stations_total.aged_scaled + manh_stations_total.disabled_scaled + manh_stations_total.entries_scaled)/3

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [60]:
manh_stations_total

Unnamed: 0,index,STATION,LINENAME,ENTRIES,PREV_ENTRIES,DAILY_ENTRIES,ZipCode,Elev,aged,disabled,aged_scaled,disabled_scaled,entries_scaled,Neediness
8,301,FULTON ST,2345ACJZ,141641492745,1.416370e+11,5303970.0,10038,1.0,6151.0,4893.0,0.258174,0.201690,1.000000,0.486621
14,234,CANAL ST,JNQRZ6W,275657025992,2.756537e+11,3935409.0,10013,1.0,8071.0,5060.0,0.338762,0.208574,0.741974,0.429770
15,138,72 ST,123,68243510585,6.824090e+10,3607329.0,10021,1.0,15250.0,5634.0,0.640084,0.232234,0.680119,0.517479
16,156,86 ST,456,119545892760,1.195424e+11,3518649.0,10028,1.0,13422.0,4568.0,0.563358,0.188293,0.663399,0.471683
18,121,59 ST,456NQRW,151409177842,1.521094e+11,3328368.0,11234,,,,,,0.627524,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
406,30,145 ST,3,1333200594,1.332932e+09,268241.0,10031,,13199.0,12668.0,0.553998,0.522176,0.050574,0.375583
414,114,50 ST,D,1145424833,1.145171e+09,253611.0,10019,1.0,10479.0,7301.0,0.439832,0.300948,0.047815,0.262865
425,36,155 ST,C,1479960900,1.479730e+09,230640.0,10018,,1339.0,921.0,0.056201,0.037964,0.043484,0.045883
438,158,86 ST,N,451371746,4.511787e+08,193073.0,10028,1.0,13422.0,4568.0,0.563358,0.188293,0.036402,0.262684


In [61]:
#Sort by neediest stations

manh_stations_total.sort_values("Neediness", ascending = False)

Unnamed: 0,index,STATION,LINENAME,ENTRIES,PREV_ENTRIES,DAILY_ENTRIES,ZipCode,Elev,aged,disabled,aged_scaled,disabled_scaled,entries_scaled,Neediness
22,164,96 ST,123,14946269851,1.494333e+10,2944699.0,10029,1.0,18966.0,24241.0,0.796055,0.999217,0.555188,0.783486
150,277,EAST BROADWAY,F,13818349792,1.381725e+10,1103432.0,10002,,23825.0,24260.0,1.000000,1.000000,0.208039,0.736013
353,216,BOWERY,JZ,1541058079,1.540687e+09,370739.0,10002,,23825.0,24260.0,1.000000,1.000000,0.069898,0.689966
108,165,96 ST,6,66002402621,6.600103e+10,1370791.0,10029,1.0,18966.0,24241.0,0.796055,0.999217,0.258446,0.684573
192,7,110 ST,6,3525627318,3.524766e+09,861425.0,10029,,18966.0,24241.0,0.796055,0.999217,0.162411,0.652561
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
171,1,103 ST,1,5901511168,5.900530e+09,981372.0,11368,,,,,,0.185026,
180,123,59 ST,NRW,1350710638,1.349770e+09,940981.0,11234,,,,,,0.177411,
331,83,3 AV,L,2334083753,2.333667e+09,416449.0,11372,,,,,,0.078516,
334,126,6 AV,FLM123,110323108488,1.103231e+11,413003.0,11372,,,,,,0.077867,


In [62]:
#Neediest stations that do not already have an elevator:

manh_stations_total[manh_stations_total.Elev != 1].sort_values("Neediness", ascending = False)

Unnamed: 0,index,STATION,LINENAME,ENTRIES,PREV_ENTRIES,DAILY_ENTRIES,ZipCode,Elev,aged,disabled,aged_scaled,disabled_scaled,entries_scaled,Neediness
150,277,EAST BROADWAY,F,13818349792,1.381725e+10,1103432.0,10002,,23825.0,24260.0,1.000000,1.000000,0.208039,0.736013
353,216,BOWERY,JZ,1541058079,1.540687e+09,370739.0,10002,,23825.0,24260.0,1.000000,1.000000,0.069898,0.689966
192,7,110 ST,6,3525627318,3.524766e+09,861425.0,10029,,18966.0,24241.0,0.796055,0.999217,0.162411,0.652561
55,31,145 ST,ABCD,10532589724,1.053063e+10,1962699.0,10031,,13199.0,12668.0,0.553998,0.522176,0.370043,0.482073
185,55,181 ST,1,3345058744,3.344148e+09,910775.0,10033,,14882.0,15222.0,0.624638,0.627453,0.171716,0.474602
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
171,1,103 ST,1,5901511168,5.900530e+09,981372.0,11368,,,,,,0.185026,
180,123,59 ST,NRW,1350710638,1.349770e+09,940981.0,11234,,,,,,0.177411,
331,83,3 AV,L,2334083753,2.333667e+09,416449.0,11372,,,,,,0.078516,
334,126,6 AV,FLM123,110323108488,1.103231e+11,413003.0,11372,,,,,,0.077867,
