# Initialize

In [11]:
import pandas as pd
import json
from geopy.distance import distance

# Load file
<li> load grab.csv containing all information of restaurants on grab</li>

In [12]:
grab_df = pd.read_csv("dataset/grab.csv")

In [13]:
# Filter for restaurants only
restaurants_df = grab_df[grab_df['loc_type']=='FOOD']

In [14]:
# filter relevant columns
restaurants_df = restaurants_df[['name', 'address','lat','lon', 'opening_hours']]
restaurants_df.head()

Unnamed: 0,name,address,lat,lon,opening_hours
0,Texas Chicken,Texas Chicken - VivoCity,1.264792,103.822103,"{""open"": true, ""displayedHours"": ""10:30-21:30""..."
1,Subway,Subway - Vivocity,1.264741,103.822072,"{""open"": true, ""displayedHours"": ""12:00-19:00""..."
2,LiHO Tea,LiHO Tea - Vivo City,1.264309,103.821369,"{""open"": true, ""displayedHours"": ""11:30-20:45""..."
3,Wingstop,Wingstop - VivoCity,1.263527,103.821712,"{""open"": true, ""displayedHours"": ""11:00-21:00""..."
4,Le Shrimp Ramen,Le Shrimp Ramen - Vivocity,1.26481,103.822554,"{""open"": true, ""displayedHours"": ""10:30-21:00""..."


# Process opening_hours
<li>We want to filter for restaurants that are open on both Friday and Saturday past 12am</li>
<li>We want to filer for restaurants in Singapore's downtown region only</li>

In [15]:
# Extract friday and saturday opening hours
restaurants_df['fri_opening_hrs'] = restaurants_df['opening_hours'].apply(lambda x: json.loads(x)["fri"])
restaurants_df['sat_opening_hrs'] = restaurants_df['opening_hours'].apply(lambda x: json.loads(x)["sat"])

# Remove opening_hours column
restaurants_df = restaurants_df.drop('opening_hours', axis=1)

In [16]:
# Helper function to filter rows where opening hours extend past 00:00
def extends_past_midnight(opening_hrs):
    # Check if restaurants is closed
    if opening_hrs == 'Closed':
        return False
    
    time_range = opening_hrs.split()
    # Split the time range into opening and closing times
    def past_midnight(range):
        times = range.split('-')
        opening_time = times[0]
        closing_time = times[1]

        # Convert opening and closing times to datetime objects
        opening_datetime = pd.to_datetime(opening_time, format='%H:%M')
        closing_datetime = pd.to_datetime(closing_time, format='%H:%M')

        # Check if closing time is earlier than opening time, indicating it extends past 00:00
        return (closing_datetime < opening_datetime) or opening_time =="00:00"
    
    return any(past_midnight(range) for range in time_range)

In [17]:
# Create a binary variable that determines if a restaurant is open past 00:00
restaurants_df['fri_past_00:00'] = restaurants_df['fri_opening_hrs'].apply(extends_past_midnight)
restaurants_df['sat_past_00:00'] = restaurants_df['sat_opening_hrs'].apply(extends_past_midnight)

In [18]:
# Column to indentify restaurants that open past 00:00 on both friday and saturday
restaurants_df['midnight_restaurant'] = restaurants_df.apply(lambda x: True if x['fri_past_00:00'] and x['sat_past_00:00'] else False, axis=1)

In [19]:
# View of all restuarants open past 00:00 on both friday and saturday
restaurants_df[restaurants_df['midnight_restaurant']]

Unnamed: 0,name,address,lat,lon,fri_opening_hrs,sat_opening_hrs,fri_past_00:00,sat_past_00:00,midnight_restaurant
139,Jalan Tua Kong Ya Lim Mee Pok,Jalan Tua Kong Ya Lim Mee Pok - Telok Blangah ...,1.277626,103.819708,00:00-23:59,00:00-23:59,True,True,True
163,Ya,Ya-mahyuddin Al-jailani Restaurant - Kampong B...,1.276141,103.835411,00:00-23:59,00:00-23:59,True,True,True
171,Kimly Dim Sum,Kimly Dim Sum - Telok Blangah Crescent,1.277062,103.819199,00:00-23:59,00:00-23:59,True,True,True
174,Zhou Ji Ban Mian,Zhou Ji Ban Mian - Telok Blangah Crescent,1.277102,103.819195,00:00-01:00 10:00-23:59,00:00-01:00 10:00-23:59,True,True,True
181,Akbar 24 Hours Restaurant,Akbar 24 Hours Restaurant - Lim Teck Kim Road ...,1.273545,103.842756,00:00-23:59,00:00-23:59,True,True,True
...,...,...,...,...,...,...,...,...,...
16082,505 Sembawang Mushroom Minced Meat Noodle,505 Sembawang Mushroom Minced Meat Noodle - Ca...,1.450087,103.822191,00:00-23:59,00:00-23:59,True,True,True
16096,McDonald's,McDonald's - Sun Plaza,1.448357,103.819993,00:00-01:45 06:00-23:59,00:00-23:59,True,True,True
16105,Al-Azhar,Al-Azhar - Tampines Street 21,1.352028,103.953905,00:00-01:45 09:00-23:59,00:00-01:45 09:00-23:59,True,True,True
16110,McDonald's,McDonald's - Yishun,1.426700,103.826261,00:00-23:59,00:00-23:59,True,True,True


In [20]:
# Filter necessary columns
restaurants_df = restaurants_df[['name','address','lat','lon','midnight_restaurant']]

In [24]:
# Save dataframe as CSV
restaurants_df.to_csv("dataset/midnight_restaurants.csv", index=False)