In [28]:
import pandas as pd
import numpy as np

In [29]:
slu = pd.read_csv('data/slu-Paid_Parking_Occupancy__Last_30_Days_.csv')
# parking rate only contains null
slu = slu.drop(['PaidParkingRate'], axis = 1)

Add availability information

1. Calculate occupancy percentage: The date and time (minute) of the transaction as recorded
2. Aavailability (bool) : True if has available parking spaces. 
3. Available parking spaces (int): Number of availabel parking spaces. 0 if paid spaces are equal or more than the parking space count. A car might leave before the parking expire, thus paird occupancy could be higher than the parking space count. 

In [30]:
slu['OccupancyPercentage'] = slu['PaidOccupancy']/slu['ParkingSpaceCount']
slu['HasAvailability'] = slu.OccupancyPercentage < 1
slu['AvailableSpace'] = np.where(slu['ParkingSpaceCount'] - slu['PaidOccupancy'] > 0, slu['ParkingSpaceCount'] - slu['PaidOccupancy'], 0)

compute latitude and longitude

In [31]:
location = slu.Location.str[7:-1]
slu.Location.str.split(' ',expand=True)
slu[['Latitude','Longitude']]=location.str.split(' ',expand=True)
slu['Latitude'] = pd.to_numeric(slu['Latitude'])
slu['Longitude'] = pd.to_numeric(slu['Longitude'])

Compute datetime information. 

1. Convert a string of time into a datetime object
2. Check wheter a given date is a US holiday
3. Compute day of the week, hour of the day and minute of the day
5. sort by datetime

In [35]:
from pandas.tseries.holiday import USFederalHolidayCalendar as calendar
import time
slu['OccupancyDateTime'] = pd.to_datetime(slu['OccupancyDateTime'],infer_datetime_format=True)
cal = calendar()

dr = pd.date_range(start=slu['OccupancyDateTime'].min(), end=slu['OccupancyDateTime'].max())
holidays = cal.holidays(start=dr.min(), end=dr.max())
slu['Holiday'] = slu['OccupancyDateTime'].isin(holidays)

slu['DayOfTheWeek'] = slu['OccupancyDateTime'].dt.day_of_week
slu['Year'] = slu['OccupancyDateTime'].dt.year
slu['Month'] = slu['OccupancyDateTime'].dt.month
slu['Day'] = slu['OccupancyDateTime'].dt.day
slu['Hour'] = slu['OccupancyDateTime'].dt.hour
slu['Minute'] = slu['OccupancyDateTime'].dt.minute
slu['Second'] = slu['OccupancyDateTime'].dt.second
slu['MinuteOfTheDay'] = slu['Hour'] * 60 * 60 + slu['Minute']  * 60

In [35]:
slu = slu.sort_values(by=['OccupancyDateTime'])

In [37]:
slu.to_csv('data/clean_data.csv', sep=',')

In [36]:
slu.head(30)

Unnamed: 0,OccupancyDateTime,PaidOccupancy,BlockfaceName,SideOfStreet,SourceElementKey,ParkingTimeLimitCategory,ParkingSpaceCount,PaidParkingArea,PaidParkingSubArea,ParkingCategory,Location,OccupancyPercentage,HasAvailability,AvailableSpace,Latitude,Longitude
3108464,03/15/2022 01:00:00 PM,3,TERRY AVE N BETWEEN THOMAS ST AND HARRISON ST,W,13109,120.0,9,South Lake Union,South,Paid Parking,POINT (-122.33727797 47.62144907),0.333333,True,6,-122.337278,47.621449
3399943,03/15/2022 01:00:00 PM,0,MERCER ST BETWEEN MINOR AVE N AND PONTIUS AVE N,S,79786,120.0,6,South Lake Union,South,Restricted Parking Zone,POINT (-122.33237992 47.62421002),0.0,True,6,-122.33238,47.62421
3293016,03/15/2022 01:00:00 PM,6,EASTLAKE AVE E BETWEEN PROSPECT ST AND E NELSO...,NW,120105,600.0,9,South Lake Union,North,Paid Parking,POINT (-122.32878566 47.629648),0.666667,True,3,-122.328786,47.629648
3109659,03/15/2022 01:00:00 PM,6,HARRISON ST BETWEEN BOREN AVE N AND FAIRVIEW A...,N,78705,600.0,8,South Lake Union,South,Paid Parking,POINT (-122.33513202 47.62209926),0.75,True,2,-122.335132,47.622099
3298106,03/15/2022 01:00:00 PM,4,THOMAS ST BETWEEN PONTIUS AVE N AND YALE AVE N,S,81134,120.0,8,South Lake Union,South,Paid Parking,POINT (-122.33105113 47.62075019),0.5,True,4,-122.331051,47.62075
3356537,03/15/2022 01:00:00 PM,6,8TH AVE N BETWEEN VALLEY ST AND ALOHA ST,W,8405,600.0,7,South Lake Union,North,Paid Parking,POINT (-122.34114567 47.6266943),0.857143,True,1,-122.341146,47.626694
3321130,03/15/2022 01:00:00 PM,8,EASTLAKE AVE E BETWEEN E NELSON PL AND FAIRVIE...,SE,55454,600.0,8,South Lake Union,North,Paid Parking,POINT (-122.32657593 47.63127461),1.0,False,0,-122.326576,47.631275
2866044,03/15/2022 01:00:00 PM,15,FAIRVIEW NR AVE N BETWEEN FAIRVIEW PL N AND AL...,SE,94602,120.0,28,South Lake Union,North,Paid Parking,POINT (-122.33364771 47.6267583),0.535714,True,13,-122.333648,47.626758
2930579,03/15/2022 01:00:00 PM,11,MINOR AVE N BETWEEN ROY ST AND VALLEY ST,W,57201,600.0,12,South Lake Union,North,Paid Parking,POINT (-122.33266709 47.62552645),0.916667,True,1,-122.332667,47.625526
3298057,03/15/2022 01:00:00 PM,7,VALLEY ST BETWEEN DEXTER AVE N AND 8TH AVE N,N,35881,600.0,15,South Lake Union,North,Paid Parking,POINT (-122.34168272 47.62632754),0.466667,True,8,-122.341683,47.626328


In [38]:
slu.columns

Index(['OccupancyDateTime', 'PaidOccupancy', 'BlockfaceName', 'SideOfStreet',
       'SourceElementKey', 'ParkingTimeLimitCategory', 'ParkingSpaceCount',
       'PaidParkingArea', 'PaidParkingSubArea', 'ParkingCategory', 'Location',
       'OccupancyPercentage', 'HasAvailability', 'AvailableSpace', 'Latitude',
       'Longitude', 'Holiday', 'DayOfTheWeek', 'Year', 'Month', 'Day', 'Hour',
       'Minute', 'Second', 'MinuteOfTheDay'],
      dtype='object')

In [32]:
loc_df = slu[['Latitude','Longitude', 'BlockfaceName', 'SourceElementKey','SideOfStreet','ParkingSpaceCount']]
loc_df = loc_df.drop_duplicates(subset=['SourceElementKey'], keep='first')
loc_df = loc_df.set_index('SourceElementKey')

In [33]:
loc_df

Unnamed: 0_level_0,Latitude,Longitude,BlockfaceName,SideOfStreet,ParkingSpaceCount
SourceElementKey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
130802,-122.336508,47.624738,MERCER NR ST BETWEEN TERRY AVE N AND BOREN AVE N,N,5
80554,-122.332345,47.623066,REPUBLICAN ST BETWEEN MINOR AVE N AND PONTIUS ...,S,6
10153,-122.329227,47.621376,EASTLAKE AVE E BETWEEN THOMAS ST AND HARRISON ST,W,10
79790,-122.329810,47.624233,MERCER ST BETWEEN YALE AVE N AND EASTLAKE AVE E,S,9
35889,-122.331680,47.626028,VALLEY ST BETWEEN MINOR AVE N AND YALE AVE N,N,1
...,...,...,...,...,...
35197,-122.336516,47.623268,REPUBLICAN ST BETWEEN TERRY AVE N AND BOREN AVE N,N,8
13138,-122.335108,47.620784,THOMAS ST BETWEEN BOREN AVE N AND FAIRVIEW AVE N,S,10
13142,-122.329784,47.620764,THOMAS ST BETWEEN YALE AVE N AND EASTLAKE AVE E,S,4
55637,-122.334445,47.621436,FAIRVIEW AVE N BETWEEN THOMAS ST AND HARRISON ST,W,16


In [34]:
loc_df.to_csv('data/paystub_location.csv') 

In [5]:

clean_df = pd.read_csv('data/clean_data.csv')
location_df = clean_df[['Latitude','Longitude', 'SourceElementKey','ParkingSpaceCount']]
location_df = location_df.drop_duplicates(subset=['SourceElementKey'], keep='first')
location_df = location_df[['Latitude','Longitude','ParkingSpaceCount']]

In [6]:
# location_df.to_csv('data/paystub_coordinates.csv', index=False) 