## Import Libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

## Load the dataset

In [2]:
df = pd.read_csv('airbnb_v3.2.csv')
df.head()

Unnamed: 0,Name,Header,Price_Per_Night,Superhost,Rating,Review,Rooms_info,Link
0,"TTS Lake Villa @ Broga, Semenyih",Villa in Semenyih,"RM1,927",Superhost,4.86 (96),96 reviews,16+ guests 4 bedrooms 15 beds 3 baths,https://www.airbnb.com/rooms/52588088?adults=1...
1,"TTS Beach Village @ Broga, Semenyih",Villa in Semenyih,"RM3,138",Superhost,5.0 (15),15 reviews,16+ guests 5 bedrooms 22 beds 6 baths,https://www.airbnb.com/rooms/73086887394461725...
2,Mountain view villa with entertainment room,Villa in Semenyih,"RM1,716",,4.6 (15),15 reviews,16+ guests 6 bedrooms 6 beds 4 baths,https://www.airbnb.com/rooms/61284459437053711...
3,"Wabi Sabian Broga 侘寂の居, わび・さびのい organic farmstay",Cottage in Lenggeng,"RM1,145",,4.86 (22),22 reviews,8 guests 2 bedrooms 8 beds 5 baths,https://www.airbnb.com/rooms/60416724063512941...
4,"Forest House Broga 森の居シンのい sh-in-no-i, Semenyih",Cottage in Lenggeng,"RM1,019",,4.5 (50),50 reviews,8 guests 2 bedrooms 8 beds 4.5 baths,https://www.airbnb.com/rooms/45835146?adults=1...


In [3]:
#Check the summary info of the dataframe
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 270 entries, 0 to 269
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Name             270 non-null    object
 1   Header           270 non-null    object
 2   Price_Per_Night  270 non-null    object
 3   Superhost        125 non-null    object
 4   Rating           256 non-null    object
 5   Review           194 non-null    object
 6   Rooms_info       269 non-null    object
 7   Link             270 non-null    object
dtypes: object(8)
memory usage: 17.0+ KB


## Data Cleaning

In [4]:
# Check for null value
df.isnull().sum()

Name                 0
Header               0
Price_Per_Night      0
Superhost          145
Rating              14
Review              76
Rooms_info           1
Link                 0
dtype: int64

In [5]:
# Create a new column called area
df['Header'] = df['Header'].astype(str)
df['Area'] = df['Header'].astype(str)
df['Area'] = df['Header'].apply(lambda x: x.split(' in ')[-1])
df['Area'].unique()

array(['Semenyih', 'Lenggeng', 'Broga', 'Kuala Kubu Baru', 'Kajang',
       'Bangi', 'Nilai', 'Sungai Pelek', 'Seri Kembangan', 'Cyberjaya',
       'Sepang', 'Seremban', 'Hulu Langat', 'Genting Highlands',
       'Bandar Baru Bangi', 'Kuala Lumpur', 'Beranang', 'Bentong',
       'Tanjong Karang', 'Cheras', 'Port Dickson', 'Balakong', 'Mantin',
       'Petaling Jaya', 'Putrajaya', 'Ampang Jaya', 'Alor Gajah',
       'Telok Panglima Garang', 'Sekinchan', 'Dengkil', 'Janda Baik',
       'Batu Caves', 'Subang Jaya', 'Puchong', 'Shah Alam', 'Ampang',
       'Batang Berjuntai', 'Serendah', 'Pantai'], dtype=object)

In [6]:
# Create a new column called Home_type
df['Home_type'] = df['Header'].apply(lambda x :x.split(' in ')[0])
df['Home_type']

0             Villa
1             Villa
2             Villa
3           Cottage
4           Cottage
           ...     
265       Farm stay
266    Private room
267          Chalet
268       Apartment
269           Condo
Name: Home_type, Length: 270, dtype: object

In [7]:
df['Home_type'].unique()

array(['Villa', 'Cottage', 'Home', 'Apartment', 'Bungalow', 'Condo',
       'Guesthouse', 'Vacation home', 'Tiny home', 'Private room',
       'Place to stay', 'Loft', 'Farm stay', 'Chalet', 'Treehouse',
       'Hotel room', 'Guest suite', 'Shipping container', 'Resort',
       'Cabin'], dtype=object)

In [8]:
df['Price_Per_Night'].unique()

array(['RM1,927 ', 'RM3,138 ', 'RM1,716 ', 'RM1,145 ', 'RM1,019 ',
       'RM916 ', 'RM358 ', 'RM137 ', 'RM356 ', 'RM890 ', 'RM2,054 ',
       'RM134 ', 'RM197 ', 'RM898', 'RM228 ', 'RM98 ', 'RM525 ', 'RM156 ',
       'RM225 ', 'RM222 ', 'RM256 ', 'RM300 ', 'RM253 ', 'RM110',
       'RM499 ', 'RM297 ', 'RM555 ', 'RM119 ', 'RM344 ', 'RM368 ',
       'RM123 ', 'RM79 ', 'RM85 ', 'RM64 ', 'RM195 ', 'RM86 ', 'RM94 ',
       'RM104 ', 'RM115 ', 'RM217 ', 'RM1,255 ', 'RM686', 'RM157 ',
       'RM99 ', 'RM141 ', 'RM1,096', 'RM97 ', 'RM85', 'RM345 ', 'RM124 ',
       'RM3,266 ', 'RM470 ', 'RM800 ', 'RM872 ', 'RM335 ', 'RM153 ',
       'RM114 ', 'RM776 ', 'RM2,995 ', 'RM194 ', 'RM133 ', 'RM105',
       'RM160 ', 'RM142 ', 'RM93 ', 'RM101', 'RM2,951 ', 'RM442 ',
       'RM131 ', 'RM262 ', 'RM1,860 ', 'RM3,566 ', 'RM155', 'RM1,232 ',
       'RM99', 'RM126 ', 'RM1,186', 'RM117 ', 'RM118 ', 'RM971 ',
       'RM121 ', 'RM1,917', 'RM136', 'RM325 ', 'RM111', 'RM155 ',
       'RM122 ', 'RM161', 'RM136 '

In [9]:
# Strip the price of the "RM" as well as commas
df['Price_Per_Night'] = df['Price_Per_Night'].apply(lambda x : x.replace(',' ,'').strip('RM'))
df['Price_Per_Night'] = df['Price_Per_Night'].astype(float)


In [10]:
df['Superhost'].unique()

array(['Superhost', nan], dtype=object)

In [11]:
# Convert Superhost into 1 and nan into 0
df['Superhost'] = df['Superhost'].apply(lambda x : 1 if x == 'Superhost' else 0)

In [12]:
# Check for unique values
df['Rating'].unique()

array(['4.86 (96)', '5.0 (15)', '4.6 (15)', '4.86 (22)', '4.5 (50)',
       '4.82 (124)', '4.89 (28)', '4.74 (116)', '4.9 (40)', '4.94 (80)',
       '4.83 (12)', '4.56 (18)', '4.85 (27)', '4.88 (199)', '4.86 (14)',
       '4.64 (42)', '4.76 (200)', '4.63 (43)', '4.96 (55)', '4.83 (30)',
       '5.0 (6)', '4.85 (33)', '5.0 (19)', '4.97 (39)', '4.95 (38)', nan,
       '4.71 (7)', '5.0 (4)', '4.85 (54)', '4.8 (5)', '4.51 (59)',
       '4.89 (9)', 'New', '4.0 (3)', '4.88 (42)', '4.78 (139)',
       '4.95 (80)', '4.65 (21)', '4.81 (186)', '4.88 (121)', '4.67 (18)',
       '4.42 (38)', '4.96 (47)', '4.53 (30)', '5.0 (18)', '4.9 (137)',
       '4.84 (92)', '4.89 (65)', '4.8 (324)', '4.84 (106)', '4.6 (5)',
       '4.5 (4)', '5.0 (5)', '4.84 (19)', '4.57 (7)', '4.83 (117)',
       '4.65 (139)', '4.55 (75)', '4.84 (200)', '4.86 (63)', '4.92 (13)',
       '5.0 (25)', '4.78 (59)', '4.71 (93)', '4.88 (272)', '4.88 (78)',
       '4.33 (6)', '4.57 (14)', '4.87 (100)', '4.2 (5)', '4.65 (104)',
      

In [13]:
# Remove the review from Rating columns  
df['Rating'] = df['Rating'].astype(str)
df['Rating'] = df['Rating'].apply(lambda x: float(x.split(' ')[0]) if x != "New" else None)


In [14]:
df['Rating'].unique()

array([4.86, 5.  , 4.6 , 4.5 , 4.82, 4.89, 4.74, 4.9 , 4.94, 4.83, 4.56,
       4.85, 4.88, 4.64, 4.76, 4.63, 4.96, 4.97, 4.95,  nan, 4.71, 4.8 ,
       4.51, 4.  , 4.78, 4.65, 4.81, 4.67, 4.42, 4.53, 4.84, 4.57, 4.55,
       4.92, 4.33, 4.87, 4.2 , 4.59, 4.46, 4.66, 4.38, 4.69, 4.77, 4.75,
       4.47, 4.79, 4.52, 4.44, 4.93, 4.39, 4.13, 4.58, 3.67])

In [15]:
df['Rating'].isna().sum()

76

In [16]:
# Replacing all the missing values at Rating column
df['Rating'].fillna(0,inplace=True)

In [17]:
df['Rating'].value_counts()

0.00    76
5.00    35
4.86    10
4.84    10
4.89     8
4.80     8
4.88     8
4.95     7
4.65     6
4.33     5
4.82     5
4.78     5
4.38     4
4.71     4
4.79     4
4.83     4
4.94     4
4.00     3
4.50     3
4.60     3
4.93     3
4.85     3
4.56     3
4.51     3
4.76     3
4.90     3
4.74     3
4.66     3
4.52     2
4.77     2
4.75     2
4.46     2
4.81     2
4.96     2
4.57     2
4.67     2
4.55     2
4.92     1
4.58     1
4.13     1
4.39     1
4.64     1
4.44     1
4.63     1
4.97     1
4.47     1
4.69     1
4.42     1
4.53     1
4.59     1
4.20     1
4.87     1
3.67     1
Name: Rating, dtype: int64

In [18]:
df['Review'].isna().sum()

76

In [19]:
# Check for unique values for Review column
df['Review'].unique()

array(['96 reviews', '15 reviews', '22 reviews', '50 reviews',
       '124 reviews', '28 reviews', '116 reviews', '40 reviews',
       '80 reviews', '12 reviews', '18 reviews', '27 reviews',
       '199 reviews', '14 reviews', '42 reviews', '200 reviews',
       '43 reviews', '55 reviews', '30 reviews', '6 reviews',
       '33 reviews', '19 reviews', '39 reviews', '38 reviews', nan,
       '7 reviews', '4 reviews', '54 reviews', '5 reviews', '59 reviews',
       '9 reviews', '3 reviews', '139 reviews', '21 reviews',
       '186 reviews', '121 reviews', '47 reviews', '137 reviews',
       '92 reviews', '65 reviews', '324 reviews', '106 reviews',
       '117 reviews', '75 reviews', '63 reviews', '13 reviews',
       '25 reviews', '93 reviews', '272 reviews', '78 reviews',
       '100 reviews', '104 reviews', '82 reviews', '67 reviews',
       '173 reviews', '32 reviews', '154 reviews', '17 reviews',
       '8 reviews', '36 reviews', '84 reviews', '83 reviews',
       '10 reviews', '192 r

In [20]:
# Remove reviews and fill the nan value with 0
df['Review'] = df['Review'].astype(str)
df['Review'] = df['Review'].apply(lambda x : x.replace(' reviews',''))
df['Review'] = df['Review'].astype(float)
df['Review'].fillna(0,inplace=True)


In [21]:
df['Review'].unique()

array([ 96.,  15.,  22.,  50., 124.,  28., 116.,  40.,  80.,  12.,  18.,
        27., 199.,  14.,  42., 200.,  43.,  55.,  30.,   6.,  33.,  19.,
        39.,  38.,   0.,   7.,   4.,  54.,   5.,  59.,   9.,   3., 139.,
        21., 186., 121.,  47., 137.,  92.,  65., 324., 106., 117.,  75.,
        63.,  13.,  25.,  93., 272.,  78., 100., 104.,  82.,  67., 173.,
        32., 154.,  17.,   8.,  36.,  84.,  83.,  10., 192.,  71.,  37.,
       254.,  29.,  53., 135., 118., 157.,  79.,  34.,  72.,  20.,  16.,
       136., 112., 110.,  46., 255.,  87.,  68.,  57.,  56.,  45.,  88.])

In [22]:
# Create new columns for Guests
df = df[df['Rooms_info'].notna()]    # remove NaN values in the Rooms_info column 
df['Rooms_info'] = df['Rooms_info'].astype(str)
df['Guests'] = df['Rooms_info'].apply(lambda x: x.split(' ')[0].replace('+', '') if '+' not in x.split(' ')[0] else '16+')

# df['Guests'] = df['Guests'].apply(lambda x: 16 if x == '16+' else int(x))
df['Guests'] = df['Guests'].apply(lambda x: 16 if x == "16+" else int(x) if x != 'nan' else x)

# Convert Guests datatype from str to float
df['Guests'] = df['Guests'].astype(float)

# Fill the Nan values with 0
df['Guests'].fillna(0,inplace=True)



In [23]:
df['Rooms_info'].unique()

array(['16+ guests 4 bedrooms 15 beds 3 baths',
       '16+ guests 5 bedrooms 22 beds 6 baths',
       '16+ guests 6 bedrooms 6 beds 4 baths',
       '8 guests 2 bedrooms 8 beds 5 baths',
       '8 guests 2 bedrooms 8 beds 4.5 baths',
       '12 guests 4 bedrooms 5 beds 3 baths',
       '8 guests 3 bedrooms 4 beds 2 baths',
       '7 guests 3 bedrooms 8 beds 2.5 baths',
       '14 guests 5 bedrooms 8 beds 3 baths',
       '10 guests 4 bedrooms 4 beds 3 baths',
       '16+ guests 2 bedrooms 20 beds 5 baths',
       '6 guests 3 bedrooms 4 beds 2 baths',
       '10 guests 3 bedrooms 8 beds 3 baths',
       '4 guests 3 bedrooms 4 beds 2 baths',
       '2 guests 1 bedroom 1 bed 1 bath',
       '6 guests 2 bedrooms 4 beds 1 bath',
       '4 guests Studio 2 beds 1 bath',
       '5 guests 3 bedrooms 3 beds 2 baths',
       '7 guests 3 bedrooms 4 beds 2 baths',
       '4 guests 2 bedrooms 2 beds 1 bath',
       '6 guests 3 bedrooms 3 beds 2 baths',
       '7 guests Studio 3 beds 1 bath',
      

In [24]:
# Create new columns for Bedrooms
df['Bedrooms'] = df['Rooms_info'].apply(lambda x: 1 if len(x.split(' ')) < 3 else (1 if x.split(' ')[2] == 'Studio' else int(x.split(' ')[2])))



In [25]:
# Create new columns for Beds
# df['Beds'] = df['Rooms_info'].apply(lambda x : x.split(' ')[-4])
df['Beds'] = df['Rooms_info'].apply(lambda x: int(x.split(' ')[-4]) if (len(x.split(' ')) >= 5 and x.split(' ')[-4].isdigit()) else None)
df['Beds'].fillna(value=1, inplace=True)



In [26]:
# Create new columns for Bathroom
df['Rooms_info2'] = df['Rooms_info'].apply(lambda x: str(x.lower()))
df['Rooms_info2'] = df['Rooms_info'].apply(lambda x: x.replace('shared','').replace('share','').replace('private','').replace('Shared',''))
df['Bathroom'] = df['Rooms_info2'].apply(lambda x: float(x.split(' ')[-2]) if x.split(' ')[-2] != '' else 1)



In [27]:
# Check for duplicated row and drop the duplicate row
df.duplicated().sum()
df.drop_duplicates(inplace=True)

In [29]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 226 entries, 0 to 269
Data columns (total 15 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Name             226 non-null    object 
 1   Header           226 non-null    object 
 2   Price_Per_Night  226 non-null    float64
 3   Superhost        226 non-null    int64  
 4   Rating           226 non-null    float64
 5   Review           226 non-null    float64
 6   Rooms_info       226 non-null    object 
 7   Link             226 non-null    object 
 8   Area             226 non-null    object 
 9   Home_type        226 non-null    object 
 10  Guests           226 non-null    float64
 11  Bedrooms         226 non-null    int64  
 12  Beds             226 non-null    float64
 13  Rooms_info2      226 non-null    object 
 14  Bathroom         226 non-null    float64
dtypes: float64(6), int64(2), object(7)
memory usage: 28.2+ KB


## Get the longtitude and latitude coordinate for each Area

In [33]:
#Add lat & long into the dataframe
df['Latitude'] =np.nan
df['Longitude'] =np.nan
df.head(5)



Unnamed: 0,Name,Header,Price_Per_Night,Superhost,Rating,Review,Rooms_info,Link,Area,Home_type,Guests,Bedrooms,Beds,Rooms_info2,Bathroom,Latitude,Longitude
0,"TTS Lake Villa @ Broga, Semenyih",Villa in Semenyih,1927.0,1,4.86,96.0,16+ guests 4 bedrooms 15 beds 3 baths,https://www.airbnb.com/rooms/52588088?adults=1...,Semenyih,Villa,16.0,4,15.0,16+ guests 4 bedrooms 15 beds 3 baths,3.0,,
1,"TTS Beach Village @ Broga, Semenyih",Villa in Semenyih,3138.0,1,5.0,15.0,16+ guests 5 bedrooms 22 beds 6 baths,https://www.airbnb.com/rooms/73086887394461725...,Semenyih,Villa,16.0,5,22.0,16+ guests 5 bedrooms 22 beds 6 baths,6.0,,
2,Mountain view villa with entertainment room,Villa in Semenyih,1716.0,0,4.6,15.0,16+ guests 6 bedrooms 6 beds 4 baths,https://www.airbnb.com/rooms/61284459437053711...,Semenyih,Villa,16.0,6,6.0,16+ guests 6 bedrooms 6 beds 4 baths,4.0,,
3,"Wabi Sabian Broga 侘寂の居, わび・さびのい organic farmstay",Cottage in Lenggeng,1145.0,0,4.86,22.0,8 guests 2 bedrooms 8 beds 5 baths,https://www.airbnb.com/rooms/60416724063512941...,Lenggeng,Cottage,8.0,2,8.0,8 guests 2 bedrooms 8 beds 5 baths,5.0,,
4,"Forest House Broga 森の居シンのい sh-in-no-i, Semenyih",Cottage in Lenggeng,1019.0,0,4.5,50.0,8 guests 2 bedrooms 8 beds 4.5 baths,https://www.airbnb.com/rooms/45835146?adults=1...,Lenggeng,Cottage,8.0,2,8.0,8 guests 2 bedrooms 8 beds 4.5 baths,4.5,,


In [35]:
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent='Malaysia')
location = []
i=0
for d in df['Area']:
    address = d+',  MY'    
    location.append(geolocator.geocode(address))
    if location[i] is None:
        print('Coordinates of ', d, ' are missing')
    else :
        print('Coordinates of ', d, ' are:', location[i].latitude, location[i].longitude)
        df['Latitude'].iloc[i] = geolocator.geocode(address).latitude
        df['Longitude'].iloc[i] = geolocator.geocode(address).longitude
    i=i+1

Coordinates of  Semenyih  are: 2.9473911 101.8459911


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Latitude'].iloc[i] = geolocator.geocode(address).latitude
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Longitude'].iloc[i] = geolocator.geocode(address).longitude


Coordinates of  Semenyih  are: 2.9473911 101.8459911
Coordinates of  Semenyih  are: 2.9473911 101.8459911
Coordinates of  Lenggeng  are: 2.861427 101.943388
Coordinates of  Lenggeng  are: 2.861427 101.943388
Coordinates of  Semenyih  are: 2.9473911 101.8459911
Coordinates of  Semenyih  are: 2.9473911 101.8459911
Coordinates of  Semenyih  are: 2.9473911 101.8459911
Coordinates of  Semenyih  are: 2.9473911 101.8459911
Coordinates of  Semenyih  are: 2.9473911 101.8459911
Coordinates of  Broga  are: 2.9365953 101.9111952
Coordinates of  Semenyih  are: 2.9473911 101.8459911
Coordinates of  Semenyih  are: 2.9473911 101.8459911
Coordinates of  Kuala Kubu Baru  are: 3.6371407 101.5730051
Coordinates of  Semenyih  are: 2.9473911 101.8459911
Coordinates of  Semenyih  are: 2.9473911 101.8459911
Coordinates of  Kuala Kubu Baru  are: 3.6371407 101.5730051
Coordinates of  Kajang  are: 2.9948437 101.7896595
Coordinates of  Semenyih  are: 2.9473911 101.8459911
Coordinates of  Semenyih  are: 2.9473911 

Coordinates of  Genting Highlands  are: 3.4237725 101.7935591
Coordinates of  Lenggeng  are: 2.861427 101.943388
Coordinates of  Cyberjaya  are: 2.9338908 101.6455587
Coordinates of  Kuala Lumpur  are: 3.1516964 101.6942371
Coordinates of  Genting Highlands  are: 3.4237725 101.7935591
Coordinates of  Port Dickson  are: 2.5228943 101.7945134
Coordinates of  Kuala Lumpur  are: 3.1516964 101.6942371
Coordinates of  Cyberjaya  are: 2.9338908 101.6455587
Coordinates of  Kajang  are: 2.9948437 101.7896595
Coordinates of  Kajang  are: 2.9948437 101.7896595
Coordinates of  Balakong  are: 3.0291306 101.7472119
Coordinates of  Alor Gajah  are: 2.3834528 102.2107805
Coordinates of  Kuala Lumpur  are: 3.1516964 101.6942371
Coordinates of  Dengkil  are: 2.8680243 101.6756275
Coordinates of  Bangi  are: 2.90398245 101.78602883608005
Coordinates of  Putrajaya  are: 2.9140567 101.6838531
Coordinates of  Dengkil  are: 2.8680243 101.6756275
Coordinates of  Bandar Baru Bangi  are: 2.9540793 101.781001797

In [37]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 226 entries, 0 to 269
Data columns (total 17 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Name             226 non-null    object 
 1   Header           226 non-null    object 
 2   Price_Per_Night  226 non-null    float64
 3   Superhost        226 non-null    int64  
 4   Rating           226 non-null    float64
 5   Review           226 non-null    float64
 6   Rooms_info       226 non-null    object 
 7   Link             226 non-null    object 
 8   Area             226 non-null    object 
 9   Home_type        226 non-null    object 
 10  Guests           226 non-null    float64
 11  Bedrooms         226 non-null    int64  
 12  Beds             226 non-null    float64
 13  Rooms_info2      226 non-null    object 
 14  Bathroom         226 non-null    float64
 15  Latitude         226 non-null    float64
 16  Longitude        226 non-null    float64
dtypes: float64(8), i

In [36]:
df.tail()

Unnamed: 0,Name,Header,Price_Per_Night,Superhost,Rating,Review,Rooms_info,Link,Area,Home_type,Guests,Bedrooms,Beds,Rooms_info2,Bathroom,Latitude,Longitude
264,Aurora Home with Private Pool,Villa in Port Dickson,1690.0,1,4.93,28.0,16+ guests 4 bedrooms 6 beds 5 baths,https://www.airbnb.com/rooms/47280968?adults=1...,Port Dickson,Villa,16.0,4,6.0,16+ guests 4 bedrooms 6 beds 5 baths,5.0,2.522894,101.794513
265,Rumah Kebun @ Trigofarm Retreat,Farm stay in Mantin,1315.0,1,4.84,45.0,12 guests 3 bedrooms 6 beds 3 baths,https://www.airbnb.com/rooms/35548563?adults=1...,Mantin,Farm stay,12.0,3,6.0,12 guests 3 bedrooms 6 beds 3 baths,3.0,2.825719,101.893793
266,Jiaxin Homestay Semenyih Broga Kajang Selangor,Private room in Semenyih,207.0,0,0.0,0.0,2 guests 1 bedroom 1 bed 1.5 baths,https://www.airbnb.com/rooms/34833256?adults=1...,Semenyih,Private room,2.0,1,1.0,2 guests 1 bedroom 1 bed 1.5 baths,1.5,2.947391,101.845991
267,Tokido Executive Water Villa Private Unit,Chalet in Port Dickson,470.0,0,4.74,88.0,4 guests 1 bedroom 2 beds 1 bath,https://www.airbnb.com/rooms/37679839?adults=1...,Port Dickson,Chalet,4.0,1,2.0,4 guests 1 bedroom 2 beds 1 bath,1.0,2.522894,101.794513
269,Cozy Little Muji Home Kepong Nice View MRT E03A D,Condo in Kuala Lumpur,230.0,1,0.0,0.0,7 guests 3 bedrooms 4 beds 2 baths,https://www.airbnb.com/rooms/81712786780072672...,Kuala Lumpur,Condo,7.0,3,4.0,7 guests 3 bedrooms 4 beds 2 baths,2.0,3.151696,101.694237


In [39]:
#Save Preprocessed Data
df.to_csv('Airbnb_cleanv1.csv')