## What is the variation in traffic week by week during the summer months?

In [None]:
# This reads in the cleaned data file and creates a heatmap 
# of the foot traffic at top MTA stations by week.

### 1. Import packages and libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt

# allow render in Jupyter notebook
%matplotlib inline

# configure format of images
%config InlineBackend.figure_format = 'svg'
sns.set(font_scale=0.8)

### 2. Group and sort data

In [None]:
# read cleaned data into data frame
df = pd.read_csv('output_stage2.csv', parse_dates=['DATETIME'])
df.drop(['Unnamed: 0'], axis=1, inplace=True)

In [None]:
# add useful columns and define data types
df['TOTAL'] = df['ENTRY_DIFF'] + df['EXIT_DIFF']
df['DATE'] = pd.to_datetime(df['DATE'])

In [None]:
# prepare dataframe to be filtered by station
df = df.groupby(['STATION', 'DATE']).sum().reset_index()

In [None]:
# top stations are hardcoded but ideally, a list would be read in.
stations_list = [
    '34 ST-PENN STA', 'GRD CNTRL-42 ST', '34 ST-HERALD SQ', '23 ST',
    '14 ST-UNION SQ', 'TIMES SQ-42 ST', 'FULTON ST', '86 ST',
    '42 ST-PORT AUTH', '59 ST COLUMBUS',
]

In [None]:
# match station names in dataframe to elements in the stations list
df2 = df.loc[df['STATION'].isin(stations_list)].reset_index()

In [None]:
# convert dates to week number in the year
df2['WEEK_OF_YEAR'] = df['DATE'].dt.week

### 3. Visualize data

In [None]:
# create data frame for visualization
df_viz = (df2[['STATION','WEEK_OF_YEAR','TOTAL']]
          .groupby(['STATION','WEEK_OF_YEAR'])[['TOTAL']]
          .mean())

df_viz = df_viz.reset_index()
df_viz['TOTAL']= df_viz['TOTAL']/1000

In [None]:
# stage data in a pivot table for seaborn heatmap
df_viz = pd.pivot_table(
    data=df_viz,
    index='STATION',
    values='TOTAL',
    columns='WEEK_OF_YEAR',
)

In [None]:
# change week numbers to month for better comprehension
x_labels = [
    'April','May','May','May','May',
    'June','June','June','June','June',
    'July','July','July','July',
]

df_viz.columns = x_labels

In [None]:
# render heatmap
plt.figure(figsize=(16,8))
ax = plt.axes()

sns.heatmap(
    df_viz,
    fmt=".0f",
    cmap='inferno',
    annot=True,
    annot_kws={"size": 12},
    cbar_kws={'label': "Average Daily Traffic (in thousands)"},
    ax = ax
)

# configure axes and labels
ax.set_title('Traffic for Top Stations by Week', fontsize=20)
ax.title.set_position([.5, 1.05])
plt.xlabel('Week of Year', fontsize=16)
plt.ylabel('Station', fontsize=16);
ax.figure.axes[-1].yaxis.label.set_size(16)

# format and output figure
# plt.tight_layout()
# # plt.savefig('heatmap_fig_final.pdf', transparent=True);