In [1]:
import pandas as pd

# Get the address of the datasets
airbnb_file="data/singapore-airbnb/listings.csv"
michelin_one_file = "data/michelin-restaurants/one-star-michelin-restaurants.csv"
michelin_two_file = "data/michelin-restaurants/two-stars-michelin-restaurants.csv"
michelin_three_file = "data/michelin-restaurants/three-stars-michelin-restaurants.csv"
mrt_file = "data/singapore-train-stations/mrt_lrt_data.csv"

# Read the csv files and store them into pandas dataframes
o_df = pd.read_csv(airbnb_file)
m1_df = pd.read_csv(michelin_one_file)
m2_df = pd.read_csv(michelin_two_file)
m3_df = pd.read_csv(michelin_three_file)
train_stat_df = pd.read_csv(mrt_file)

# Preprocessing
# For the Airbnb data, we fill the records without reviews with zero
o_df = o_df.fillna(0)

# For the Michelin resturants, we retrieve the records located in Singapore,
# drop Nan column (zip code), add more information (how many stars the restaurant
# has won.
# We then merge the dataframes together and reindex the records. Noted that there is no restaurant in Singapore
# has won a Michelin Three-star award.
sin1 = m1_df['region']=='Singapore'
m1_df_sing = m1_df[sin1].copy()
m1_df_sing.drop("zipCode", axis=1,inplace=True)
m1_df_sing.drop("region", axis=1,inplace=True)
m1_df_sing['star'] = 1

sin2 = m2_df['region']=='Singapore'
m2_df_sing = m2_df[sin2].copy()
m2_df_sing.drop("zipCode", axis=1,inplace=True)
m2_df_sing.drop("region", axis=1,inplace=True)
m2_df_sing['star'] = 2

m_df_sing = m1_df_sing.append(m2_df_sing, ignore_index = True) 

# For the train stations, we rename the columns for a better match.
columns = ["station_name", "type", "latitude", "longitude"]
train_stat_df.columns = columns


m_df_sing


Unnamed: 0,name,year,latitude,longitude,city,cuisine,price,url,star
0,Hill Street Tai Hwa Pork Noodle,2018,1.3052,103.8624,Singapore,Street Food,$,https://guide.michelin.com/sg/en/singapore-reg...,1
1,Putien (Kitchener Road),2018,1.30969,103.8573,Singapore,Fujian,$,https://guide.michelin.com/sg/en/singapore-reg...,1
2,Chef Kang's,2018,1.304735,103.84955,Singapore,Cantonese,$$$,https://guide.michelin.com/sg/en/singapore-reg...,1
3,Garibaldi,2018,1.296564,103.855,Singapore,Italian,$,https://guide.michelin.com/sg/en/singapore-reg...,1
4,Summer Pavilion,2018,1.291284,103.8603,Singapore,Cantonese,$$,https://guide.michelin.com/sg/en/singapore-reg...,1
5,Shinji (Bras Basah Road),2018,1.295903,103.8539,Singapore,Sushi,$$,https://guide.michelin.com/sg/en/singapore-reg...,1
6,The Song of India,2018,1.310767,103.8353,Singapore,Indian,$,https://guide.michelin.com/sg/en/singapore-reg...,1
7,Lei Garden,2018,1.295228,103.8521,Singapore,Cantonese,$,https://guide.michelin.com/sg/en/singapore-reg...,1
8,Whitegrass,2018,1.295452,103.8516,Singapore,Australian,$$,https://guide.michelin.com/sg/en/singapore-reg...,1
9,Jaan,2018,1.293184,103.8529,Singapore,French contemporary,$$$,https://guide.michelin.com/sg/en/singapore-reg...,1


In [13]:

df = o_df
df

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,49091,COZICOMFORT LONG TERM STAY ROOM 2,266763,Francesca,North Region,Woodlands,1.44255,103.79580,Private room,83,180,1,2013-10-21,0.01,2,365
1,50646,Pleasant Room along Bukit Timah,227796,Sujatha,Central Region,Bukit Timah,1.33235,103.78521,Private room,81,90,18,2014-12-26,0.28,1,365
2,56334,COZICOMFORT,266763,Francesca,North Region,Woodlands,1.44246,103.79667,Private room,69,6,20,2015-10-01,0.20,2,365
3,71609,Ensuite Room (Room 1 & 2) near EXPO,367042,Belinda,East Region,Tampines,1.34541,103.95712,Private room,206,1,14,2019-08-11,0.15,9,353
4,71896,B&B Room 1 near Airport & EXPO,367042,Belinda,East Region,Tampines,1.34567,103.95963,Private room,94,1,22,2019-07-28,0.22,9,355
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7902,38105126,Loft 2 pax near Haw Par / Pasir Panjang. Free ...,278109833,Belle,Central Region,Queenstown,1.27973,103.78751,Entire home/apt,100,3,0,0,0.00,31,61
7903,38108273,3bedroom luxury at Orchard,238891646,Neha,Central Region,Tanglin,1.29269,103.82623,Entire home/apt,550,6,0,0,0.00,34,365
7904,38109336,[ Farrer Park ] New City Fringe CBD Mins to MRT,281448565,Mindy,Central Region,Kallang,1.31286,103.85996,Private room,58,30,0,0,0.00,3,173
7905,38110493,Cheap Master Room in Central of Singapore,243835202,Huang,Central Region,River Valley,1.29543,103.83801,Private room,56,14,0,0,0.00,2,30
