# Initialize

In [25]:
import pandas as pd
import json
from geopy.distance import distance

# Load file
<li> load grab.csv containing all information of restaurants on grab</li>

In [26]:
grab_df = pd.read_csv("dataset/grab.csv")

In [27]:
# Filter for restaurants only
restaurants_df = grab_df[grab_df['loc_type']=='FOOD']

In [28]:
# filter relevant columns
restaurants_df = restaurants_df[['name', 'address','lat','lon', 'opening_hours']]
restaurants_df.head()

Unnamed: 0,name,address,lat,lon,opening_hours
0,Texas Chicken,Texas Chicken - VivoCity,1.264792,103.822103,"{""open"": true, ""displayedHours"": ""10:30-21:30""..."
1,Subway,Subway - Vivocity,1.264741,103.822072,"{""open"": true, ""displayedHours"": ""12:00-19:00""..."
2,LiHO Tea,LiHO Tea - Vivo City,1.264309,103.821369,"{""open"": true, ""displayedHours"": ""11:30-20:45""..."
3,Wingstop,Wingstop - VivoCity,1.263527,103.821712,"{""open"": true, ""displayedHours"": ""11:00-21:00""..."
4,Le Shrimp Ramen,Le Shrimp Ramen - Vivocity,1.26481,103.822554,"{""open"": true, ""displayedHours"": ""10:30-21:00""..."


# Process opening_hours
<li>We want to filter for restaurants that are open on both Friday and Saturday past 12am</li>
<li>We want to filer for restaurants in Singapore's downtown region only</li>

In [29]:
# Extract friday and saturday opening hours
restaurants_df['fri_opening_hrs'] = restaurants_df['opening_hours'].apply(lambda x: json.loads(x)["fri"])
restaurants_df['sat_opening_hrs'] = restaurants_df['opening_hours'].apply(lambda x: json.loads(x)["sat"])

# Remove opening_hours column
restaurants_df = restaurants_df.drop('opening_hours', axis=1)

In [30]:
# Filter for restaurants that are open on both Friday and Saturday
df_filtered = restaurants_df[(restaurants_df['fri_opening_hrs'] != 'Closed') & (restaurants_df['sat_opening_hrs'] != 'Closed')]


In [31]:
# Helper function to filter rows where opening hours extend past 00:00
def extends_past_midnight(time_range):
    # Split the time range into opening and closing times
    times = time_range.split('-')
    opening_time = times[0]
    closing_time = times[1]

    # Convert opening and closing times to datetime objects
    opening_datetime = pd.to_datetime(opening_time, format='%H:%M')
    closing_datetime = pd.to_datetime(closing_time, format='%H:%M')

    # Check if closing time is earlier than opening time, indicating it extends past 00:00
    return (closing_datetime < opening_datetime) or opening_time =="00:00"

In [32]:
# Create a binary variable that determines if a restaurant is open past 00:00
df_filtered['fri_opening_hrs'] = df_filtered['fri_opening_hrs'].str.split()
df_filtered['fri_past_00:00'] = df_filtered['fri_opening_hrs'].apply(lambda x: any(extends_past_midnight(time_range) for time_range in x))

df_filtered['sat_opening_hrs'] = df_filtered['sat_opening_hrs'].str.split()
df_filtered['sat_past_00:00'] = df_filtered['sat_opening_hrs'].apply(lambda x: any(extends_past_midnight(time_range) for time_range in x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._set_item(key, value)


In [33]:
# Filter for restaurants that open past 00:00
df_past_midnight = df_filtered[df_filtered['fri_past_00:00'] & df_filtered['sat_past_00:00']]
df_past_midnight = df_past_midnight.drop(columns=['fri_past_00:00', 'sat_past_00:00'])

In [34]:
# Filter for restaurants in Singapore's downtown region only
central_lat = 1.287953
central_lon = 103.851784

df_past_midnight['distance_from_central(km)'] = df_past_midnight.apply(lambda x: distance((central_lat, central_lon), (x['lat'],x['lon'])).km, axis=1)


In [35]:
df_final = df_past_midnight[df_past_midnight['distance_from_central(km)'] < 2]
df_final

Unnamed: 0,name,address,lat,lon,fri_opening_hrs,sat_opening_hrs,distance_from_central(km)
181,Akbar 24 Hours Restaurant,Akbar 24 Hours Restaurant - Lim Teck Kim Road ...,1.273545,103.842756,[00:00-23:59],[00:00-23:59],1.883471
196,Curry Village Restaurant,Curry Village Restaurant - Lim Teck Kim Road,1.273542,103.842581,[00:00-23:59],[00:00-23:59],1.894283
255,Good Eat’s Family Restaurant,Good Eat's Family Restaurant - Tanjong Pagar P...,1.275374,103.843289,"[00:00-01:59, 08:00-23:59]","[00:00-01:59, 08:00-23:59]",1.681850
393,2D1N Soju Bang,2D1N Soju Bang Korean Food - Tanjong Pagar Roa...,1.279348,103.843816,"[00:00-03:00, 11:00-23:59]","[00:00-03:00, 11:00-23:59]",1.300687
541,Don Dae Bak,Don Dae Bak - Kreta Ayer Road [Islandwide Deli...,1.280780,103.842268,"[00:00-03:00, 17:00-23:59]","[00:00-04:00, 17:00-23:59]",1.323151
...,...,...,...,...,...,...,...
11111,Al-Bismi Kitchen,Al-Bismi Kitchen - Dickson Road,1.305337,103.853851,[00:00-23:59],[00:00-23:59],1.935918
11455,La Jia Restaurant,La Jia Restaurant - Wilkie Edge,1.301839,103.849774,"[00:00-02:30, 11:30-23:59]","[00:00-02:30, 11:30-23:59]",1.551608
11472,Wanyoo Cyber Cafe,Wanyoo Cyber Cafe - North Bridge Road,1.298523,103.856003,[00:00-23:59],[00:00-23:59],1.259536
11642,Kampong Glam Cafe,Kampong Glam Cafe - Bussorah Street,1.301240,103.859996,"[00:00-02:00, 08:00-23:59]","[00:00-02:00, 08:00-23:59]",1.730284


In [36]:
# Save dataframe as CSV
df_final.to_csv("dataset/midnight_restaurants.csv")