In [1]:
from __future__ import print_function, division

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt
%matplotlib inline

In [3]:
# Source: http://web.mta.info/developers/turnstile.html
def get_data(week_nums):
    url = "http://web.mta.info/developers/data/nyct/turnstile/turnstile_{}.txt"
    dfs = []
    for week_num in week_nums:
        file_url = url.format(week_num)
        dfs.append(pd.read_csv(file_url))
    return pd.concat(dfs)
        
week_nums = [190907, 190914, 190921, 190928, 191005, 191012, 191019, 191026, 191102, 191109, 191116, 191123, 191130]
turnstiles_df = get_data(week_nums)

In [4]:
turnstiles_df.columns = [column.strip() for column in turnstiles_df.columns]

In [5]:
turnstiles_df["DATE_TIME"] = pd.to_datetime(turnstiles_df.DATE + " " + turnstiles_df.TIME, 
                                            format="%m/%d/%Y %H:%M:%S")

In [6]:
# Get rid of the duplicate entries
turnstiles_df.sort_values(["C/A", "UNIT", "SCP", "STATION", "DATE_TIME"], 
                          inplace=True, ascending=False)
turnstiles_df.drop_duplicates(subset=["C/A", "UNIT", "SCP", "STATION", "DATE_TIME"], inplace=True)

In [7]:
# Sanity Check to verify that "C/A", "UNIT", "SCP", "STATION", "DATE_TIME" is unique
(turnstiles_df
 .groupby(["C/A", "UNIT", "SCP", "STATION", "DATE_TIME"])
 .ENTRIES.count()
 .reset_index()
 .sort_values("ENTRIES", ascending=False)).head(5)

Unnamed: 0,C/A,UNIT,SCP,STATION,DATE_TIME,ENTRIES
0,A002,R051,02-00-00,59 ST,2019-08-31 00:00:00,1
1784129,R138,R293,00-02-05,34 ST-PENN STA,2019-09-11 06:00:00,1
1784111,R138,R293,00-02-05,34 ST-PENN STA,2019-09-08 06:00:00,1
1784112,R138,R293,00-02-05,34 ST-PENN STA,2019-09-08 10:00:00,1
1784113,R138,R293,00-02-05,34 ST-PENN STA,2019-09-08 14:00:00,1


In [8]:
# Drop Exits and Desc Column.  To prevent errors in multiple run of cell, errors on drop is ignored
turnstiles_df = turnstiles_df.drop(["EXITS", "DESC"], axis=1, errors="ignore")

In [9]:
turnstiles_daily = (turnstiles_df
                        .groupby(["C/A", "UNIT", "SCP", "STATION", "LINENAME", "DATE"],as_index=False)
                        .ENTRIES.first())

In [10]:
turnstiles_daily.head()

Unnamed: 0,C/A,UNIT,SCP,STATION,LINENAME,DATE,ENTRIES
0,A002,R051,02-00-00,59 ST,NQR456W,08/31/2019,7183842
1,A002,R051,02-00-00,59 ST,NQR456W,09/01/2019,7184559
2,A002,R051,02-00-00,59 ST,NQR456W,09/02/2019,7185132
3,A002,R051,02-00-00,59 ST,NQR456W,09/03/2019,7186355
4,A002,R051,02-00-00,59 ST,NQR456W,09/04/2019,7187672


In [11]:
turnstiles_daily[["PREV_DATE", "PREV_ENTRIES"]] = (turnstiles_daily
                                                       .groupby(["C/A", "UNIT", "SCP", "STATION", "LINENAME"])["DATE", "ENTRIES"]
                                                       .apply(lambda grp: grp.shift(1)))

  


In [12]:
turnstiles_daily.dropna(subset=["PREV_DATE"], axis=0, inplace=True)

In [13]:
turnstiles_daily[turnstiles_daily["ENTRIES"] < turnstiles_daily["PREV_ENTRIES"]].head()

Unnamed: 0,C/A,UNIT,SCP,STATION,LINENAME,DATE,ENTRIES,PREV_DATE,PREV_ENTRIES
3530,A011,R080,01-03-00,57 ST-7 AV,NQRW,09/01/2019,885822693,08/31/2019,885823072.0
3531,A011,R080,01-03-00,57 ST-7 AV,NQRW,09/02/2019,885822365,09/01/2019,885822693.0
3532,A011,R080,01-03-00,57 ST-7 AV,NQRW,09/03/2019,885821627,09/02/2019,885822365.0
3533,A011,R080,01-03-00,57 ST-7 AV,NQRW,09/04/2019,885820824,09/03/2019,885821627.0
3534,A011,R080,01-03-00,57 ST-7 AV,NQRW,09/05/2019,885819992,09/04/2019,885820824.0


In [14]:
def get_daily_counts(row, max_counter):
    counter = row["ENTRIES"] - row["PREV_ENTRIES"]
    if counter < 0:
        counter = -counter
    if counter > max_counter:
        print(row["ENTRIES"], row["PREV_ENTRIES"])
        return 0
    return counter

# If counter is > 1Million, then the counter might have been reset.  
# Just set it to zero as different counters have different cycle limits
_ = turnstiles_daily.apply(get_daily_counts, axis=1, max_counter=1000000)

12 6170034.0
263 3846402.0
29 3367836.0
26 1083967.0
864 2152170.0
111 1599317.0
460501 6804909.0
73 6697765.0
875479599 775219651.0
590597 9156558.0
88 2728627.0
94 4283110.0
1041 2210879.0
98 1478936.0
116 6197971.0
66 1121336148.0
82 2025847506.0
1451 4711879.0
973 6844908.0
126 6053205.0
196612 50345125.0
589824 50331648.0
5421 10392294.0
2157 6026406.0
458752 117440512.0
107 1068192.0
2567 152966689.0
2204 19783118.0
458752 117440512.0
24 2781046.0
3728 1842366.0
2069 4443445.0
178 6625781.0
4982302 877264.0
1421 2254274.0
721441460 2880424.0
516 3275592.0
610 2460400.0
14 4584512.0
26 153536031.0
18 118621565.0
67122955 103584.0
218 67122955.0
458846 2297063.0
672 5491989.0
235398290 9934.0
83886083 117440614.0
10 5679392.0
1145 4459356.0
6817688 3603909.0
401 3071806.0
123785342 117604971.0
636 2209128.0
31260681 48404.0
1507856 4609.0
2053 3512072.0
483 4777140.0
102459350 210849.0
248 3782823.0
248 9852706.0
1 134218317.0
2319 22554564.0
190 1449873.0
3480 5391681.0
845 172975

In [15]:
def get_daily_counts(row, max_counter):
    counter = row["ENTRIES"] - row["PREV_ENTRIES"]
    if counter < 0:
        # Maybe counter is reversed?
        counter = -counter
    if counter > max_counter:
        # Maybe counter was reset to 0? 
        print(row["ENTRIES"], row["PREV_ENTRIES"])
        counter = min(row["ENTRIES"], row["PREV_ENTRIES"])
    if counter > max_counter:
        # Check it again to make sure we're not still giving a counter that's too big
        return 0
    return counter

# If counter is > 1Million, then the counter might have been reset.  
# Just set it to zero as different counters have different cycle limits
# It'd probably be a good idea to use a number even significantly smaller than 1 million as the limit!
turnstiles_daily["DAILY_ENTRIES"] = turnstiles_daily.apply(get_daily_counts, axis=1, max_counter=1000000)

12 6170034.0
263 3846402.0
29 3367836.0
26 1083967.0
864 2152170.0
111 1599317.0
460501 6804909.0
73 6697765.0
875479599 775219651.0
590597 9156558.0
88 2728627.0
94 4283110.0
1041 2210879.0
98 1478936.0
116 6197971.0
66 1121336148.0
82 2025847506.0
1451 4711879.0
973 6844908.0
126 6053205.0
196612 50345125.0
589824 50331648.0
5421 10392294.0
2157 6026406.0
458752 117440512.0
107 1068192.0
2567 152966689.0
2204 19783118.0
458752 117440512.0
24 2781046.0
3728 1842366.0
2069 4443445.0
178 6625781.0
4982302 877264.0
1421 2254274.0
721441460 2880424.0
516 3275592.0
610 2460400.0
14 4584512.0
26 153536031.0
18 118621565.0
67122955 103584.0
218 67122955.0
458846 2297063.0
672 5491989.0
235398290 9934.0
83886083 117440614.0
10 5679392.0
1145 4459356.0
6817688 3603909.0
401 3071806.0
123785342 117604971.0
636 2209128.0
31260681 48404.0
1507856 4609.0
2053 3512072.0
483 4777140.0
102459350 210849.0
248 3782823.0
248 9852706.0
1 134218317.0
2319 22554564.0
190 1449873.0
3480 5391681.0
845 172975

In [16]:
turnstiles_daily

Unnamed: 0,C/A,UNIT,SCP,STATION,LINENAME,DATE,ENTRIES,PREV_DATE,PREV_ENTRIES,DAILY_ENTRIES
1,A002,R051,02-00-00,59 ST,NQR456W,09/01/2019,7184559,08/31/2019,7183842.0,717.0
2,A002,R051,02-00-00,59 ST,NQR456W,09/02/2019,7185132,09/01/2019,7184559.0,573.0
3,A002,R051,02-00-00,59 ST,NQR456W,09/03/2019,7186355,09/02/2019,7185132.0,1223.0
4,A002,R051,02-00-00,59 ST,NQR456W,09/04/2019,7187672,09/03/2019,7186355.0,1317.0
5,A002,R051,02-00-00,59 ST,NQR456W,09/05/2019,7189025,09/04/2019,7187672.0,1353.0
...,...,...,...,...,...,...,...,...,...,...
444786,TRAM2,R469,00-05-01,RIT-ROOSEVELT,R,11/25/2019,5554,11/24/2019,5554.0,0.0
444787,TRAM2,R469,00-05-01,RIT-ROOSEVELT,R,11/26/2019,5554,11/25/2019,5554.0,0.0
444788,TRAM2,R469,00-05-01,RIT-ROOSEVELT,R,11/27/2019,5554,11/26/2019,5554.0,0.0
444789,TRAM2,R469,00-05-01,RIT-ROOSEVELT,R,11/28/2019,5554,11/27/2019,5554.0,0.0


In [17]:
turnstiles_total = (turnstiles_daily
                        .groupby(["C/A","STATION","UNIT","SCP","DATE"],as_index=False).agg("sum")).sort_values("DAILY_ENTRIES", ascending = False).reset_index()
                        #.DAILY_ENTRIES.sum())

In [20]:
#challenge6
stations_total = (turnstiles_daily
                        .groupby(["STATION","LINENAME","DATE"],as_index=False).agg("sum")).sort_values("DAILY_ENTRIES", ascending = False).reset_index()
                        #.DAILY_ENTRIES.sum())

In [21]:
stations_total

Unnamed: 0,index,STATION,LINENAME,DATE,ENTRIES,PREV_ENTRIES,DAILY_ENTRIES
0,21971,CHAMBERS ST,JZ456,09/30/2019,26082003,3.148684e+07,961783.0
1,899,111 ST,A,11/29/2019,61372844,5.726669e+07,878380.0
2,19744,BRIGHTON BEACH,BQ,10/23/2019,78307418,7.915694e+07,874281.0
3,11261,59 ST COLUMBUS,ABCD1,09/28/2019,2130660013,2.230504e+09,832597.0
4,10361,51 ST,6,09/12/2019,674205092,1.056411e+08,801226.0
...,...,...,...,...,...,...,...
42840,7789,30 AV,NQW,10/20/2019,30285899,3.028590e+07,0.0
42841,35624,ORCHARD BEACH,6,09/03/2019,494942,4.949420e+05,0.0
42842,35625,ORCHARD BEACH,6,09/04/2019,494942,4.949420e+05,0.0
42843,35626,ORCHARD BEACH,6,09/05/2019,494942,4.949420e+05,0.0


In [22]:
stations_zips = pd.read_csv("sub_st_zip.csv")

In [24]:
stations_zips.rename(columns={'Stop Name':'STATION', "Zip Code":"ZipCode"}, inplace = True)

In [25]:
stations_zips.STATION = stations_zips.STATION.str.upper()

In [26]:
manh_stations=stations_zips.STATION.to_list()

In [27]:
stations_zips

Unnamed: 0,STATION,Borough,GTFS Latitude,GTFS Longitude,ZipCode
0,LEXINGTON AV/59 ST,M,40.762660,-73.967258,10065
1,5 AV/59 ST,M,40.764811,-73.973347,10065
2,57 ST - 7 AV,M,40.764664,-73.980658,10106
3,49 ST,M,40.759901,-73.984139,10019
4,TIMES SQ - 42 ST,M,40.754672,-73.986754,10018
...,...,...,...,...,...
148,GRAND CENTRAL - 42 ST,M,40.752769,-73.979189,10017
149,34 ST - 11 AV,M,40.755882,-74.001910,10001
150,96 ST,M,40.784318,-73.947152,10029
151,86 ST,M,40.777891,-73.951787,10028


In [28]:
type(manh_stations)

list

In [29]:
zip_dict  = pd.Series(stations_zips.ZipCode.values,index=stations_zips.STATION).to_dict()

In [30]:
manh_stations_total = stations_total[stations_total["STATION"].isin(manh_stations)]

In [31]:
manh_turnstiles_total = turnstiles_total[turnstiles_total["STATION"].isin(manh_stations)]

In [32]:
manh_stations_total

Unnamed: 0,index,STATION,LINENAME,DATE,ENTRIES,PREV_ENTRIES,DAILY_ENTRIES
0,21971,CHAMBERS ST,JZ456,09/30/2019,26082003,3.148684e+07,961783.0
4,10361,51 ST,6,09/12/2019,674205092,1.056411e+08,801226.0
9,20933,CANAL ST,1,11/11/2019,13188954,1.376620e+07,584736.0
10,19528,BOWLING GREEN,45,09/17/2019,2368999214,2.368442e+09,556916.0
12,38959,SOUTH FERRY,1RW,11/14/2019,162973461,1.624356e+08,537899.0
...,...,...,...,...,...,...,...
42752,24386,DYCKMAN ST,1,09/15/2019,7934940,7.934940e+06,0.0
42780,4964,181 ST,1,09/15/2019,36848195,3.684820e+07,0.0
42790,2624,145 ST,1,09/15/2019,822969866,8.229699e+08,0.0
42831,5414,191 ST,1,09/15/2019,6286044,6.286044e+06,0.0


In [36]:
stations_elev = pd.read_csv("Station_Elevator.csv")

In [38]:
stations_elev.STATION = stations_elev.STATION.str.upper()

In [39]:
stations_elev

Unnamed: 0,STATION,LINENAME,Elevator
0,14 ST/8 AV,A/C/E/L,1
1,14 ST-UNION SQ,L/N/Q/R/W,1
2,23 ST,6,1
3,28 ST,6 downtown only,1
4,34 ST-HERALD SQ,B/D/F/M/N/Q/R/W,1
5,34 ST-PENN STATION,1/2/3/A/C/E,1
6,34 ST-HUDSON YARDS,7,1
7,42 ST-PORT AUTHORITY BUS TERMINAL,A/C/E,1
8,47-50 STS-ROCKEFELLER CTR,B/D/F/M,1
9,49 ST,N/R/W uptown only,1


In [40]:
elev_dict  = pd.Series(stations_elev.Elevator.values,index=stations_elev.STATION).to_dict()

In [41]:
elev_dict

{'14 ST/8 AV': 1,
 '14 ST-UNION SQ': 1,
 '23 ST': 1,
 '28 ST': 1,
 '34 ST-HERALD SQ': 1,
 '34 ST-PENN STATION': 1,
 '34 ST-HUDSON YARDS': 1,
 '42 ST-PORT AUTHORITY BUS TERMINAL': 1,
 '47-50 STS-ROCKEFELLER CTR': 1,
 '49 ST': 1,
 '50 ST': 1,
 '51 ST': 1,
 '59 ST-COLUMBUS CIRCLE': 1,
 '66 ST-LINCOLN CENTER': 1,
 '72 ST': 1,
 '86 ST': 1,
 '96 ST': 1,
 '125 ST': 1,
 '135 ST': 1,
 '168 ST': 1,
 '175 ST': 1,
 'BLEECKER ST': 1,
 'BOWLING GREEN': 1,
 'BROADWAY-LAFAYETTE': 1,
 'BROOKLYN BRIDGE-CITY HALL': 1,
 'CANAL ST': 1,
 'CHAMBERS ST': 1,
 'CORTLANDT ST': 1,
 'DYCKMAN ST': 1,
 'FULTON ST': 1,
 'GRAND CENTRAL-42 ST': 1,
 'INWOOD-207 ST': 1,
 'LEXINGTON AV/53 ST': 1,
 'LEXINGTON AV/63 ST': 1,
 'ROOSEVELT ISLAND': 1,
 'SOUTH FERRY': 1,
 'TIMES SQUARE-42 ST': 1,
 'WEST 4 ST-WASHINGTON SQ': 1,
 'WORLD TRADE CENTER': 1,
 'WTC CORTLANDT': 1}

In [42]:
manh_stations_total["ZipCode"] = manh_stations_total["STATION"].map(zip_dict)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [43]:
manh_turnstiles_total["ZipCode"] = manh_turnstiles_total["STATION"].map(zip_dict)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [44]:
manh_stations_total

Unnamed: 0,index,STATION,LINENAME,DATE,ENTRIES,PREV_ENTRIES,DAILY_ENTRIES,ZipCode
0,21971,CHAMBERS ST,JZ456,09/30/2019,26082003,3.148684e+07,961783.0,10013
4,10361,51 ST,6,09/12/2019,674205092,1.056411e+08,801226.0,10022
9,20933,CANAL ST,1,11/11/2019,13188954,1.376620e+07,584736.0,10013
10,19528,BOWLING GREEN,45,09/17/2019,2368999214,2.368442e+09,556916.0,10004
12,38959,SOUTH FERRY,1RW,11/14/2019,162973461,1.624356e+08,537899.0,10004
...,...,...,...,...,...,...,...,...
42752,24386,DYCKMAN ST,1,09/15/2019,7934940,7.934940e+06,0.0,10034
42780,4964,181 ST,1,09/15/2019,36848195,3.684820e+07,0.0,10033
42790,2624,145 ST,1,09/15/2019,822969866,8.229699e+08,0.0,10031
42831,5414,191 ST,1,09/15/2019,6286044,6.286044e+06,0.0,10040


In [45]:
manh_stations_total["Elev"] = manh_stations_total["STATION"].map(elev_dict)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [46]:
manh_turnstiles_total["Elev"] = manh_turnstiles_total["STATION"].map(elev_dict)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [47]:
manh_stations_total

Unnamed: 0,index,STATION,LINENAME,DATE,ENTRIES,PREV_ENTRIES,DAILY_ENTRIES,ZipCode,Elev
0,21971,CHAMBERS ST,JZ456,09/30/2019,26082003,3.148684e+07,961783.0,10013,1.0
4,10361,51 ST,6,09/12/2019,674205092,1.056411e+08,801226.0,10022,1.0
9,20933,CANAL ST,1,11/11/2019,13188954,1.376620e+07,584736.0,10013,1.0
10,19528,BOWLING GREEN,45,09/17/2019,2368999214,2.368442e+09,556916.0,10004,1.0
12,38959,SOUTH FERRY,1RW,11/14/2019,162973461,1.624356e+08,537899.0,10004,1.0
...,...,...,...,...,...,...,...,...,...
42752,24386,DYCKMAN ST,1,09/15/2019,7934940,7.934940e+06,0.0,10034,1.0
42780,4964,181 ST,1,09/15/2019,36848195,3.684820e+07,0.0,10033,
42790,2624,145 ST,1,09/15/2019,822969866,8.229699e+08,0.0,10031,
42831,5414,191 ST,1,09/15/2019,6286044,6.286044e+06,0.0,10040,


In [None]:
#Map disability and age #s 

In [55]:
age_disability = pd.read_csv("zip_code_age_disability_df.csv")

In [56]:
age_disability

Unnamed: 0,ZipCode,aged,disabled
0,10001,5297.0,3475.0
1,10002,23825.0,24260.0
2,10003,11020.0,5624.0
3,10004,540.0,169.0
4,10005,669.0,154.0
5,10006,377.0,0.0
6,10007,1231.0,170.0
7,10009,13667.0,13695.0
8,10010,7523.0,4206.0
9,10011,13260.0,7709.0


In [58]:
aged_dict  = pd.Series(age_disability.aged.values,index=age_disability.ZipCode).to_dict()

In [59]:
manh_stations_total["aged"] = manh_stations_total["ZipCode"].map(aged_dict)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [61]:
disabled_dict = pd.Series(age_disability.disabled.values,index=age_disability.ZipCode).to_dict()

In [63]:
manh_stations_total["disabled"] = manh_stations_total["ZipCode"].map(disabled_dict)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [None]:
#Use minmaxscaler for the scaling

In [66]:
from sklearn.preprocessing import MinMaxScaler

In [67]:
scaler = MinMaxScaler()

In [72]:
aged_scaled = scaler.fit_transform([manh_stations_total.aged])

  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


In [75]:
manh_stations_total["aged_scaled"] = manh_stations_total["aged"]/max(manh_stations_total.aged)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [77]:
manh_stations_total["disabled_scaled"] = manh_stations_total["disabled"]/max(manh_stations_total.disabled)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [79]:
manh_stations_total["entries_scaled"] = manh_stations_total["DAILY_ENTRIES"]/max(manh_stations_total.DAILY_ENTRIES)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [82]:
manh_stations_total["Neediness"] = (manh_stations_total.aged_scaled + manh_stations_total.disabled_scaled + manh_stations_total.entries_scaled)/3

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [83]:
manh_stations_total

Unnamed: 0,index,STATION,LINENAME,DATE,ENTRIES,PREV_ENTRIES,DAILY_ENTRIES,ZipCode,Elev,aged,disabled,aged_scaled,disabled_scaled,entries_scaled,Neediness
0,21971,CHAMBERS ST,JZ456,09/30/2019,26082003,3.148684e+07,961783.0,10013,1.0,8071.0,5060.0,0.338762,0.208574,1.000000,0.515779
4,10361,51 ST,6,09/12/2019,674205092,1.056411e+08,801226.0,10022,1.0,12087.0,4602.0,0.507324,0.189695,0.833063,0.510027
9,20933,CANAL ST,1,11/11/2019,13188954,1.376620e+07,584736.0,10013,1.0,8071.0,5060.0,0.338762,0.208574,0.607971,0.385102
10,19528,BOWLING GREEN,45,09/17/2019,2368999214,2.368442e+09,556916.0,10004,1.0,540.0,169.0,0.022665,0.006966,0.579045,0.202892
12,38959,SOUTH FERRY,1RW,11/14/2019,162973461,1.624356e+08,537899.0,10004,1.0,540.0,169.0,0.022665,0.006966,0.559273,0.196301
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42752,24386,DYCKMAN ST,1,09/15/2019,7934940,7.934940e+06,0.0,10034,1.0,10983.0,9624.0,0.460986,0.396702,0.000000,0.285896
42780,4964,181 ST,1,09/15/2019,36848195,3.684820e+07,0.0,10033,,14882.0,15222.0,0.624638,0.627453,0.000000,0.417364
42790,2624,145 ST,1,09/15/2019,822969866,8.229699e+08,0.0,10031,,13199.0,12668.0,0.553998,0.522176,0.000000,0.358725
42831,5414,191 ST,1,09/15/2019,6286044,6.286044e+06,0.0,10040,,12010.0,9692.0,0.504092,0.399505,0.000000,0.301199


In [88]:
#Sort by neediest stations

manh_stations_total.sort_values("Neediness", ascending = False)

Unnamed: 0,index,STATION,LINENAME,DATE,ENTRIES,PREV_ENTRIES,DAILY_ENTRIES,ZipCode,Elev,aged,disabled,aged_scaled,disabled_scaled,entries_scaled,Neediness
8990,24979,EAST BROADWAY,F,11/07/2019,153824190,1.538090e+08,15183.0,10002,,23825.0,24260.0,1.0,1.0,0.015786,0.671929
9179,24980,EAST BROADWAY,F,11/08/2019,153839161,1.538242e+08,14971.0,10002,,23825.0,24260.0,1.0,1.0,0.015566,0.671855
9221,24937,EAST BROADWAY,F,09/26/2019,153302639,1.532877e+08,14922.0,10002,,23825.0,24260.0,1.0,1.0,0.015515,0.671838
9231,24993,EAST BROADWAY,F,11/21/2019,153990689,1.539758e+08,14911.0,10002,,23825.0,24260.0,1.0,1.0,0.015503,0.671834
9267,24923,EAST BROADWAY,F,09/12/2019,153121889,1.531070e+08,14862.0,10002,,23825.0,24260.0,1.0,1.0,0.015453,0.671818
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40298,7483,3 AV,L,09/14/2019,25789572,2.578832e+07,1252.0,11372,,,,,,0.001302,
41471,11075,59 ST,NRW,09/06/2019,499079,4.983770e+05,702.0,11234,,,,,,0.000730,
42375,11015,59 ST,NQR456W,10/06/2019,56636886,5.663680e+07,90.0,11234,,,,,,0.000094,
42461,14,1 AV,L,09/15/2019,1415291788,1.415292e+09,26.0,11372,,,,,,0.000027,


In [96]:
manh_stations_total[manh_stations_total.Elev != 1].sort_values("Neediness", ascending = False)

Unnamed: 0,index,STATION,LINENAME,DATE,ENTRIES,PREV_ENTRIES,DAILY_ENTRIES,ZipCode,Elev,aged,disabled,aged_scaled,disabled_scaled,entries_scaled,Neediness
8990,24979,EAST BROADWAY,F,11/07/2019,153824190,1.538090e+08,15183.0,10002,,23825.0,24260.0,1.0,1.0,0.015786,0.671929
9179,24980,EAST BROADWAY,F,11/08/2019,153839161,1.538242e+08,14971.0,10002,,23825.0,24260.0,1.0,1.0,0.015566,0.671855
9221,24937,EAST BROADWAY,F,09/26/2019,153302639,1.532877e+08,14922.0,10002,,23825.0,24260.0,1.0,1.0,0.015515,0.671838
9231,24993,EAST BROADWAY,F,11/21/2019,153990689,1.539758e+08,14911.0,10002,,23825.0,24260.0,1.0,1.0,0.015503,0.671834
9267,24923,EAST BROADWAY,F,09/12/2019,153121889,1.531070e+08,14862.0,10002,,23825.0,24260.0,1.0,1.0,0.015453,0.671818
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40298,7483,3 AV,L,09/14/2019,25789572,2.578832e+07,1252.0,11372,,,,,,0.001302,
41471,11075,59 ST,NRW,09/06/2019,499079,4.983770e+05,702.0,11234,,,,,,0.000730,
42375,11015,59 ST,NQR456W,10/06/2019,56636886,5.663680e+07,90.0,11234,,,,,,0.000094,
42461,14,1 AV,L,09/15/2019,1415291788,1.415292e+09,26.0,11372,,,,,,0.000027,


In [None]:
#Sort by total metric and elevation possession status