# MTA Turnstile Data Analysis


This notebook provides interactive maps of all subway stations and the top 10 busiest stations

---

**Note:** Initialization for online plotting instruction, [here](https://plot.ly/python/getting-started/#initialization-for-online-plotting)

### Importing libraries

In [11]:
import pandas as pd
import warnings 
warnings.filterwarnings('ignore')

import pickle

import plotly.plotly as py
import plotly 
import plotly.graph_objs as go

In [2]:
df = pd.read_csv('../data/top_stations.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,index,name,zipcode,lat,long,Geography,Median income (dollars); Estimate; Households
0,215,408,WORLD TRADE CENTER,10007,40.712564,-74.009745,ZCTA5 10007,242644.0
1,217,147,METROPOLITAN AV STATION,11211,40.712774,-73.951424,ZCTA5 10007,242644.0
2,225,360,CHAMBERS STREET STATION,10007,40.714111,-74.008585,ZCTA5 10007,242644.0
3,224,71,METROPOLITAN AV STATION,11211,40.714072,-73.950248,ZCTA5 10007,242644.0
4,223,63,UNION TURNPIKE - KEW GARDENS STATION,11415,40.714035,-73.83037,ZCTA5 10007,242644.0


In [63]:
df.columns

Index(['Unnamed: 0', 'index', 'name', 'zipcode', 'lat', 'long', 'Geography',
       'Median income (dollars); Estimate; Households'],
      dtype='object')

### Quick Data Cleaning

In [3]:
df = df.rename(columns={'Median income (dollars); Estimate; Households':'medianIncome', 
                       'name':'Station'})

In [4]:
# Creating lists of arguments for plotting map (below) 
list_lat = df.lat.tolist()
list_long = df.long.tolist()
list_StnName = df.Station.tolist()

### Plotting all subway locations in NYC

In [7]:
# Setup for plotly api
# Fill in with personal username, api_key, and token
plotly.tools.set_credentials_file(username='YOUR USER NAME', api_key='YOUR API KEY')
mapbox_access_token = 'YOUR TOKEN'

# Creating interactive map
data = [
    go.Scattermapbox(
        lat=list_lat,
        lon=list_long,
        mode='markers',
        marker=dict(
            size=7,
            color='rgb(242, 0, 0)',
            opacity=0.5
        ),
        text=list_StnName,
    )
]

layout = go.Layout(
    autosize=True,
    hovermode='closest',
    mapbox=dict(
        accesstoken=mapbox_access_token,
        bearing=0,
        center=dict(
            lat=df.lat.mean(),
            lon=df.long.mean()
        ),
        pitch=0,
        zoom=6
    ),
)

fig = dict(data=data, layout=layout)

py.iplot(fig, filename='New York Mapbox')

### Load pickle object containing 'clean' dataframe from the result of **EDA_MTA.ipynb**

In [12]:
# Importing 'clean' dataframe from EDA_MTA.ipynb
with open('cleanDf.pickle', 'rb') as readfile:
    dfnew = pickle.load(readfile)

In [9]:
# Creating a subset df containing only top10 stations
dfStation= dfnew.groupby('Station')[['traffic']].sum().sort_values(by = 'traffic', ascending=False)
list_top10 = dfStation.index[:10].tolist()
df_top10 = dfnew[dfnew.Station.isin(list_top10)]

In [13]:
# Importing another dataframe containing geocodes
with open('cleanDfsubsetZip.pickle', 'rb') as readfile:
    dfgeo = pickle.load(readfile)

In [16]:
# Creating a subset of top10
dfgeo = dfgeo.loc[dfgeo.name.isin(list_top10),:]
dfgeo

Unnamed: 0,name,lat,long
41,86 ST,40.622687,-74.028398
146,FULTON ST,40.687119,-73.975375
261,14 ST-UNION SQ,40.734673,-73.989951
269,23 ST,40.739864,-73.986599
299,34 ST-HERALD SQ,40.749645,-73.987937
305,GRD CNTRL-42 ST,40.751431,-73.976041
309,34 ST-PENN STA,40.752287,-73.993391
316,TIMES SQ-42 ST,40.754612,-73.986768
326,42 ST-PORT AUTH,40.757308,-73.989735
385,125 ST,40.815581,-73.958372


### Plotting top 10 subway locations in NYC

In [18]:
# Setup for plotly api
# Fill in with personal username, api_key, and token
plotly.tools.set_credentials_file(username='YOUR USERNAME', api_key='YOUR API')
mapbox_access_token = 'YOUR TOKEN'


# Creating interactive map
data = [
    go.Scattermapbox(
        lat=dfgeo.lat,
        lon=dfgeo.long,
        mode='markers',
        marker=dict(
            size=7,
            color='rgb(242, 0, 0)',
            opacity=0.5
        ),
        text=dfgeo.name,
    )
]

layout = go.Layout(
    autosize=True,
    hovermode='closest',
    mapbox=dict(
        accesstoken=mapbox_access_token,
        bearing=0,
        center=dict(
            lat=dfgeo.lat.mean(),
            lon=dfgeo.long.mean()
        ),
        pitch=0,
        zoom=6
    ),
)

fig = dict(data=data, layout=layout)

py.iplot(fig, filename='New York Mapbox')



___