In [20]:
import pandas as pd
import seaborn
import matplotlib.pyplot as plt

# Imports resident data
ndb = pd.read_csv('zip_data.csv')

# Imports company data
cdb = pd.read_csv('company_data.csv')

# Groups company data by ZIP Code
cdbz = cdb.groupby('ZIP').sum().reset_index()

# Merges the two data sets
wdf = pd.merge(cdbz, ndb, on='ZIP', how='outer').reset_index()
wdf = wdf.drop(['index'], axis=1)

# Fills in null values
wdf['EMPLOYEES'].fillna(0, inplace=True)
wdf['RESIDENTS'].fillna(0, inplace=True)

# Adjusts the employers column to only include estimated female employees
wdf['EMPLOYEES'] = wdf['EMPLOYEES'] * .272 // 1

# Creates sum column and then limits to top the 8
wdf['TOTAL'] = wdf['EMPLOYEES'] + wdf['RESIDENTS']
wdf = wdf.sort_values('TOTAL', ascending=False).head(8)

# Imports a list of stations in top ZIP codes and merges with the above data set
sdf = pd.read_csv('top_zip_stations.csv')
tsdf = pd.merge(wdf, sdf, on='ZIP').reset_index()
tsdf = tsdf.drop(['index'], axis=1)

# Reads the MTA entrance data and removes time data, raw entries/exits, and DESC columns
turnstile = pd.read_csv('output_stage2.csv')
turnstile['MOVEMENT'] = (turnstile['ENTRY_DIFF'] + turnstile['EXIT_DIFF'])
turnstile = turnstile[['STATION', 'UNIT', 'C/A', 'SCP','DATE','MOVEMENT']]

# Group data by date and station and construct the sum of entries and exits
turnstile = turnstile.groupby(by=['STATION', 'DATE']).sum()

# Group data by date, finds the average entry and exit values, and rounds those numbers
turnstile = turnstile.groupby(by=['STATION']).mean()
turnstile = turnstile.round(0)

# Sorts the station data by the highest number of entries and limits to the top 20
turnstile = turnstile.sort_values(['MOVEMENT'], ascending=False).head(20)

# Merges the top ZIP code station data with the overall station data
station_data = pd.merge(tsdf, turnstile, on='STATION')

# Selects the target stations for the outreach
stations = station_data.STATION.unique()
print(stations)

['59 ST' '14 ST' '23 ST' '34 ST-HERALD SQ' '34 ST-PENN STA'
 '14 ST-UNION SQ' 'GRD CNTRL-42 ST' '42 ST-PORT AUTH' '47-50 STS ROCK'
 'TIMES SQ-42 ST']
