This notebook is for counting up the number of food, alcohol and leisure businesses open by taxi location by hour of the day.
This data is for use in the busyness ML model.

In [1]:
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
df = pd.read_csv("../osm_taxi_location/osm_places_open_hours_with_locations.csv")

In [3]:
df

Unnamed: 0,id,subtype,lat,lon,hours_parsed,location
0,349323821,fast_food,40.762515,-73.976690,"[{'day': 'monday', 'opens': '11:00', 'closes':...",161
1,357620442,restaurant,40.741558,-73.978463,"[{'day': 'monday', 'opens': '12:00', 'closes':...",137
2,368045855,community_centre,40.748038,-73.976099,"[{'day': 'monday', 'opens': '9:00', 'closes': ...",170
3,368047348,fast_food,40.762222,-73.868889,"[{'day': 'monday', 'opens': '5:00', 'closes': ...",70
4,368049383,library,40.773741,-73.982941,"[{'day': 'monday', 'opens': '8:30', 'closes': ...",142
...,...,...,...,...,...,...
7356,11726831143,,40.780623,-73.979910,"[{'day': 'tuesday', 'opens': '5:00', 'closes':...",239
7357,11739272854,,40.587964,-73.955871,"[{'day': 'monday', 'opens': '9:00', 'closes': ...",210
7358,11835593507,,40.713795,-73.991638,"[{'day': 'monday', 'opens': '6:00', 'closes': ...",232
7359,11851487120,,40.746885,-73.859908,"[{'day': 'monday', 'opens': '6:00', 'closes': ...",56


In [4]:
df['subtype'].value_counts()

subtype
restaurant          2911
fast_food           1783
cafe                1105
bar                  576
pub                  181
ice_cream            104
nightclub             21
community_centre      17
arts_centre           16
library               14
food_court            14
theatre                8
cinema                 4
biergarten             1
music_venue            1
planetarium            1
Name: count, dtype: int64

In [5]:
# Group the subtypes by food, alcohol, leisure
map = {"restaurant":"food",
    "fast_food":"food",
    "cafe":"food",
    "bar":"alcohol",
    "pub":"alcohol",
    "ice_cream":"food",
    "nightclub":"alcohol",
    "community_centre":"leisure",
    "arts_centre":"leisure",
    "library":"leisure",
    "food_court":"food",
    "theatre":"leisure",
    "cinema":"leisure",
    "biergarten":"alcohol",
    "planetarium":"leisure",
    "music_venue":"leisure"
}
df.replace({'subtype':map},inplace=True)


In [6]:
df['hours_parsed'] = df.apply(lambda x: eval(x['hours_parsed']), axis=1) 

In [7]:
# To make evaluating open time easier, create opening/closing hour/min keys in dict
for i, row in df.iterrows():
    for j, time in enumerate(row['hours_parsed']):
        open_time = time['opens'].split(':')
        close_time = time['closes'].split(':')
        df['hours_parsed'][i][j]['opens_hour'] = int(open_time[0])
        df['hours_parsed'][i][j]['opens_min'] = int(open_time[1])
        df['hours_parsed'][i][j]['closes_hour'] = int(close_time[0])
        df['hours_parsed'][i][j]['closes_min'] = int(close_time[1])


In [8]:
df.dropna(inplace=True)

In [9]:
df['subtype'].value_counts(dropna=False)

subtype
food       5917
alcohol     779
leisure      61
Name: count, dtype: int64

In [10]:
# Make a dataframe with each day and hour
hours = [i for i in range(24)]
days = ["monday", "tuesday", "wednesday",
                     "thursday", "friday", "saturday", "sunday"]
locations = df['location'].unique()
multi_index = pd.MultiIndex.from_product([days, hours, locations], names=["day", "hour", "location"])

In [11]:
times = pd.DataFrame(index=multi_index).reset_index()
times['food'] = 0
times['alcohol'] = 0
times['leisure'] = 0

In [12]:
def is_open(opening_hours, day, hour):
    # Check if shop is open, given opening hours and a day and hour fo the day to check.
    for opening_hour in opening_hours:
        if opening_hour['day'] == day:
            if opening_hour['opens_hour'] > hour:
                continue

            if opening_hour['opens_hour'] == hour and opening_hour['opens_min']>0:
                continue

            if opening_hour['closes_hour'] < hour:
                continue

            if opening_hour['closes_hour'] == hour and opening_hour['closes_min']==0:
                continue
            
            # If day is correct and opening time is before hour and closing time is after hour, return true
            return True
        
    # If time where shop is not open not found, return false
    return False

In [13]:
oh = [{'day': 'monday', 'opens': '11:00', 'closes': '22:00', 'opens_hour': 11, 'opens_min': 0, 'closes_hour': 22, 'closes_min': 0}, {'day': 'tuesday', 'opens': '11:00', 'closes': '22:00', 'opens_hour': 11, 'opens_min': 0, 'closes_hour': 22, 'closes_min': 0}, {'day': 'wednesday', 'opens': '11:00', 'closes': '22:00', 'opens_hour': 11, 'opens_min': 0, 'closes_hour': 22, 'closes_min': 0}, {'day': 'thursday', 'opens': '11:00', 'closes': '22:00', 'opens_hour': 11, 'opens_min': 0, 'closes_hour': 22, 'closes_min': 0}, {'day': 'friday', 'opens': '11:00', 'closes': '22:00', 'opens_hour': 11, 'opens_min': 0, 'closes_hour': 22, 'closes_min': 0}, {'day': 'saturday', 'opens': '11:00', 'closes': '22:00', 'opens_hour': 11, 'opens_min': 0, 'closes_hour': 22, 'closes_min': 0}, {'day': 'sunday', 'opens': '11:00', 'closes': '22:00', 'opens_hour': 11, 'opens_min': 0, 'closes_hour': 22, 'closes_min': 0}]
print(is_open(oh, 'monday', 11))

True


In [14]:
# Count how many food, alcohol and leisure places are open per location at every time of the week
for i, row in df.iterrows():
    open_hour = row['hours_parsed']
    col = row['subtype']
    loc = row['location']
    for index, time_row in times.loc[times['location']==loc].iterrows():
        if is_open(open_hour, time_row['day'], time_row['hour']):
            times.loc[index, col] += 1

    if i%100 == 0:
        print("done row", i)

done row 0
done row 100
done row 200
done row 300
done row 400
done row 500
done row 600
done row 700
done row 800
done row 900
done row 1000
done row 1100
done row 1200
done row 1300
done row 1400
done row 1500
done row 1600
done row 1700
done row 1800
done row 1900
done row 2000
done row 2100
done row 2200
done row 2300
done row 2400
done row 2500
done row 2600
done row 2700
done row 2800
done row 2900
done row 3000
done row 3100
done row 3200
done row 3300
done row 3400
done row 3500
done row 3600
done row 3700
done row 3800
done row 3900
done row 4000
done row 4100
done row 4200
done row 4300
done row 4400
done row 4500
done row 4600
done row 4700
done row 4800
done row 4900
done row 5000
done row 5100
done row 5200
done row 5300
done row 5400
done row 5500
done row 5600
done row 5700
done row 5800
done row 5900
done row 6000
done row 6100
done row 6200
done row 6300
done row 6400
done row 6500
done row 6600
done row 6700


In [23]:
times.to_csv("taxi_location_num_businesses_open.csv", index=False)

In [22]:
df.loc[df['subtype']=='leisure']

Unnamed: 0,id,subtype,lat,lon,hours_parsed,location
2,368045855,leisure,40.748038,-73.976099,"[{'day': 'monday', 'opens': '9:00', 'closes': ...",170
4,368049383,leisure,40.773741,-73.982941,"[{'day': 'monday', 'opens': '8:30', 'closes': ...",142
5,368050625,leisure,40.719779,-73.979337,"[{'day': 'monday', 'opens': '11:00', 'closes':...",232
70,889115578,leisure,40.714742,-74.015169,"[{'day': 'monday', 'opens': '11:00', 'closes':...",13
427,2494208360,leisure,40.671024,-73.981631,"[{'day': 'monday', 'opens': '10:00', 'closes':...",181
...,...,...,...,...,...,...
5727,9797307321,leisure,40.703782,-73.985304,"[{'day': 'tuesday', 'opens': '11:00', 'closes'...",66
5790,9921710386,leisure,40.687303,-73.769821,"[{'day': 'monday', 'opens': '11:00', 'closes':...",205
5824,9989146511,leisure,40.771737,-73.985510,"[{'day': 'sunday', 'opens': '8:00', 'closes': ...",142
5847,10077762874,leisure,40.719292,-74.004823,"[{'day': 'wednesday', 'opens': '17:00', 'close...",231
