In [1]:
import os
import re
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import date
import calendar

# plot modules
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline
matplotlib.style.use("ggplot")

In [4]:
new_cols = ["C/A", "UNIT", "SCP", "STATION", "LINENAME", "DIVISION", "DATE", "TIME", "DESC", "ENTRIES", "EXITS"]
jan06_2018_path = "http://web.mta.info/developers/data/nyct/turnstile/turnstile_180106.txt"
jan06_2018_mta_data = pd.read_csv(jan06_2018_path, sep=",", header=0, names=new_cols)


mta_datasets_2018 = [jan06_2018_mta_data]

In [5]:
####### SAMPLE DATASET
### looking at the most recent MTA data from June 29, 2019 in NY
path = "http://web.mta.info/developers/data/nyct/turnstile/turnstile_190629.txt"
new_cols = ["C/A", "UNIT", "SCP", "STATION", "LINENAME", "DIVISION", "DATE", "TIME", "DESC", "ENTRIES", "EXITS"]
MTA_data = pd.read_csv(path, sep=",", header=0, names=new_cols)


In [6]:
### add turnstile passes column
MTA_data['TURNSTILE_PASSES'] = MTA_data['ENTRIES'] + MTA_data['EXITS']
MTA_data.sample(10)

Unnamed: 0,C/A,UNIT,SCP,STATION,LINENAME,DIVISION,DATE,TIME,DESC,ENTRIES,EXITS,TURNSTILE_PASSES
64485,N116,R198,00-03-00,NOSTRAND AV,AC,IND,06/27/2019,08:00:00,REGULAR,5245539,2685736,7931275
170697,R294,R052,00-00-01,WOODLAWN,4,IRT,06/22/2019,08:00:00,REGULAR,7839318,2852119,10691437
67855,N135,R385,01-06-00,ROCKAWAY BLVD,A,IND,06/22/2019,01:00:00,REGULAR,2940565,618461,3559026
93070,N414A,R316,01-00-02,FLUSHING AV,G,IND,06/27/2019,17:00:00,REGULAR,1488768,679866,2168634
65158,N120,R153,00-05-01,UTICA AV,AC,IND,06/27/2019,13:00:00,REGULAR,2,231,233
65388,N120A,R153,01-05-01,UTICA AV,AC,IND,06/23/2019,21:00:00,REGULAR,0,224,224
190360,R532,R328,00-05-03,METS-WILLETS PT,7,IRT,06/28/2019,09:00:00,REGULAR,621783,32406,654189
109032,N601,R319,00-00-04,LEXINGTON AV/63,F,IND,06/23/2019,20:00:00,REGULAR,1540386,151336,1691722
43310,N009,R174,01-00-01,181 ST,A,IND,06/24/2019,01:00:00,REGULAR,7995493,6464117,14459610
116557,PTH03,R552,00-01-02,JOURNAL SQUARE,1,PTH,06/26/2019,13:42:20,REGULAR,25448,40858,66306


In [16]:
## add day of the week column
MTA_data["DATE"] = pd.to_datetime(MTA_data["DATE"])
days = [calendar.day_name[date.weekday()] for date in MTA_data["DATE"]]
MTA_data["DAY_OF_WEEK"] = days

In [19]:
### sort and group by date
MTA_data["DATE"] = MTA_data["DATE"]
date_grouped_MTA_data = MTA_data.groupby("DATE").count()
date_sorted_MTA_data = MTA_data.sort_values("DATE")

In [9]:
# rearrange columns
cols = MTA_data.columns.tolist()
len(cols)
c1, c2, c3 = list(cols[:7]), cols[-1:], list(cols[7:-1])
cols = c1 + c2 + c3
cols
MTA_data = MTA_data[cols]

In [11]:
# new DataFrame with relevant data
condensed_cols = [cols[3], cols[6], cols[7], cols[8], cols[10], cols[11], cols[-1]]
condensed_MTA_data = MTA_data[condensed_cols]
condensed_MTA_data

Unnamed: 0,STATION,DATE,DAY_OF_WEEK,TIME,ENTRIES,EXITS,TURNSTILE_PASSES
0,59 ST,2019-06-22,Saturday,00:00:00,7107725,2407457,9515182
1,59 ST,2019-06-22,Saturday,04:00:00,7107738,2407465,9515203
2,59 ST,2019-06-22,Saturday,08:00:00,7107761,2407491,9515252
3,59 ST,2019-06-22,Saturday,12:00:00,7107858,2407541,9515399
4,59 ST,2019-06-22,Saturday,16:00:00,7108075,2407581,9515656
5,59 ST,2019-06-22,Saturday,20:00:00,7108333,2407608,9515941
6,59 ST,2019-06-23,Sunday,00:00:00,7108482,2407628,9516110
7,59 ST,2019-06-23,Sunday,04:00:00,7108500,2407639,9516139
8,59 ST,2019-06-23,Sunday,08:00:00,7108512,2407655,9516167
9,59 ST,2019-06-23,Sunday,12:00:00,7108584,2407707,9516291


In [12]:
### 378 different subway stations
len(MTA_data.STATION.unique())

378

In [13]:
### group by columns and sort by highest activity
grouped_by_station = condensed_MTA_data.groupby('STATION').count().sort_values("DATE", ascending=False)
grouped_by_station
grouped_by_time = condensed_MTA_data.groupby('TIME').count()
grouped_by_time.sample(10)


Unnamed: 0_level_0,STATION,DATE,DAY_OF_WEEK,ENTRIES,EXITS,TURNSTILE_PASSES
TIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
04:23:17,1,1,1,1,1,1
19:02:12,1,1,1,1,1,1
14:06:13,1,1,1,1,1,1
12:38:13,1,1,1,1,1,1
17:41:07,1,1,1,1,1,1
06:17:55,1,1,1,1,1,1
21:49:00,1,1,1,1,1,1
11:13:19,1,1,1,1,1,1
02:35:34,1,1,1,1,1,1
11:44:37,1,1,1,1,1,1


In [81]:
test_station = MTA_data[MTA_data.STATION == '59 ST']
exits_since_previous = []
#print(test_station.EXITS.loc[503])
test_station.index.unique()
#for i in range(1, len(test_station)):
#    exits_since_previous.append(test_station.EXITS.iloc[i] - test_station.EXITS.iloc[i-1])
#exits_since_previous.insert(0,0)
test_station['EXITS_SINCE_PREVIOUS'] = test_station.EXITS.diff().fillna(0)
test_station
#len(exits_since_previous), len(test_station)
#exits_since_previous = pd.Series(exits_since_previous)
#test_station.loc['EXITS_SINCE_PREVIOUS'] = exits_since_previous


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,C/A,UNIT,SCP,STATION,LINENAME,DIVISION,DATE,DAY_OF_WEEK,TIME,DESC,ENTRIES,EXITS,TURNSTILE_PASSES,EXITS_SINCE_PREVIOUS
0,A002,R051,02-00-00,59 ST,NQR456W,BMT,2019-06-22,Saturday,00:00:00,REGULAR,7107725,2407457,9515182,0.0
1,A002,R051,02-00-00,59 ST,NQR456W,BMT,2019-06-22,Saturday,04:00:00,REGULAR,7107738,2407465,9515203,8.0
2,A002,R051,02-00-00,59 ST,NQR456W,BMT,2019-06-22,Saturday,08:00:00,REGULAR,7107761,2407491,9515252,26.0
3,A002,R051,02-00-00,59 ST,NQR456W,BMT,2019-06-22,Saturday,12:00:00,REGULAR,7107858,2407541,9515399,50.0
4,A002,R051,02-00-00,59 ST,NQR456W,BMT,2019-06-22,Saturday,16:00:00,REGULAR,7108075,2407581,9515656,40.0
5,A002,R051,02-00-00,59 ST,NQR456W,BMT,2019-06-22,Saturday,20:00:00,REGULAR,7108333,2407608,9515941,27.0
6,A002,R051,02-00-00,59 ST,NQR456W,BMT,2019-06-23,Sunday,00:00:00,REGULAR,7108482,2407628,9516110,20.0
7,A002,R051,02-00-00,59 ST,NQR456W,BMT,2019-06-23,Sunday,04:00:00,REGULAR,7108500,2407639,9516139,11.0
8,A002,R051,02-00-00,59 ST,NQR456W,BMT,2019-06-23,Sunday,08:00:00,REGULAR,7108512,2407655,9516167,16.0
9,A002,R051,02-00-00,59 ST,NQR456W,BMT,2019-06-23,Sunday,12:00:00,REGULAR,7108584,2407707,9516291,52.0


In [83]:
business_data = pd.read_csv('Legally_Operating_Businesses.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [87]:
business_data[business_data.Industry == 'Electronic & Appliance Service']

Unnamed: 0,DCA License Number,License Type,License Expiration Date,License Status,License Creation Date,Industry,Business Name,Business Name 2,Address Building,Address Street Name,...,Community Board,Council District,BIN,BBL,NTA,Census Tract,Detail,Longitude,Latitude,Location
35,2084014-DCA,Business,06/30/2020,Active,04/02/2019,Electronic & Appliance Service,THE WIRELESS CIRCLE INC.,UBREAKIFIX,251,W 23RD ST,...,104.0,3.0,1.07614e+06,1007730012,MN13,91.0,,-73.997080,40.744711,"(40.744711182934765, -73.99708042228067)"
43,2080727-DCA,Business,06/30/2020,Active,12/17/2018,Electronic & Appliance Service,ALEJANDRO ZACATENCO ROMANO,ROMANO'S MULTISERVICE,5920,5TH AVE,...,307.0,38.0,3.01629e+06,3008630047,BK32,74.0,,-74.015750,40.639901,"(40.63990115120479, -74.015749604107)"
188,2069924-DCA,Business,06/30/2020,Active,04/23/2018,Electronic & Appliance Service,MOBILE PLAZA NYC INC,,11026,LIBERTY AVE,...,410.0,28.0,4.20241e+06,4095330011,QN55,100.0,,-73.831665,40.684527,"(40.6845272327581, -73.83166453988532)"
485,2079235-DCA,Business,06/30/2020,Active,10/18/2018,Electronic & Appliance Service,RVIVE INC,Jack's Place,235,E 51ST ST,...,106.0,4.0,1.03847e+06,1013250019,MN19,98.0,,-73.969428,40.756087,"(40.756086886246656, -73.96942763051698)"
514,2059440-DCA,Business,06/30/2018,Inactive,10/16/2017,Electronic & Appliance Service,MOBILE PLAZA NYC INC,,10616,ROCKAWAY BLVD,...,410.0,32.0,4.24781e+06,4114810040,QN55,864.0,,-73.832812,40.678534,"(40.6785343107872, -73.83281173058518)"
519,2084523-DCA,Business,06/30/2020,Active,04/11/2019,Electronic & Appliance Service,QUICK FIX TECH 1 INC,,1530,MYRTLE AVE,...,304.0,37.0,3.07636e+06,3033360018,BK77,439.0,,-73.912548,40.699403,"(40.699403208305576, -73.91254791349502)"
653,2005137-DCA,Business,06/30/2020,Active,03/24/2014,Electronic & Appliance Service,"STAPLES THE OFFICE SUPERSTORE EAST, INC.",Staples the Office Superstore,2535,RICHMOND AVE,...,,,,,SI24,27702.0,,,,
682,1380146-DCA,Business,06/30/2014,Inactive,01/05/2011,Electronic & Appliance Service,"MOBILE CITY 170, INC.",SPRINT,1202,SAINT NICHOLAS AVE,...,112.0,10.0,1.06304e+06,1021270001,MN36,253.0,,-73.938733,40.842435,"(40.842434762124554, -73.93873252197251)"
720,2081399-DCA,Business,06/30/2020,Active,01/15/2019,Electronic & Appliance Service,BE IN TOUCH OF WHITE PLAINS ROAD LLC,,2096,WHITE PLAINS RD,...,211.0,13.0,2.04879e+06,2042870014,BX49,22404.0,,-73.867736,40.853198,"(40.8531975867418, -73.86773593417237)"
1021,2081770-DCA,Business,06/30/2020,Active,01/30/2019,Electronic & Appliance Service,ACEVE2 WIRELESS INC,,565,WILSON AVE,...,304.0,37.0,3.07886e+06,3034120002,BK77,437.0,,-73.908978,40.691188,"(40.69118809021159, -73.90897795291953)"


In [88]:
business_data.Industry.unique()

array(['Amusement Device Permanent', 'Garage',
       'Home Improvement Contractor', 'Electronics Store',
       'Garage and Parking Lot', 'Pedicab Driver',
       'Home Improvement Salesperson', 'Electronic Cigarette Dealer',
       'Tobacco Retail Dealer', 'General Vendor', 'Tow Truck Driver',
       'Stoop Line Stand', 'Locksmith', 'Electronic & Appliance Service',
       'Pedicab Business', 'Sightseeing Guide', 'Parking Lot',
       'Dealer In Products', 'Ticket Seller', 'Laundries',
       'Sidewalk Cafe', 'Scrap Metal Processor',
       'Secondhand Dealer - General', 'Debt Collection Agency',
       'Gaming Cafe', 'Tow Truck Company', 'Pawnbroker',
       'Secondhand Dealer - Auto', 'Amusement Device Portable',
       'Auctioneer', 'Storage Warehouse', 'Laundry', 'Special Sale',
       'Process Serving Agency', 'Cabaret', 'Employment Agency',
       'Amusement Device Temporary', 'Scale Dealer Repairer',
       'Process Server Individual', 'Newsstand', 'Horse Drawn Cab Owner',
   