# Scraping Process

## Importing the Library

In [1]:
import pandas as pd 
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from webdriver_manager.chrome import ChromeDriverManager

## Make the driver and Loading the addresss

In [2]:
driver = webdriver.Chrome()
url = 'https://www.travelio.com/search?searchType=monthly&destinationCategory=City&destinationUrlName=&destinationPlaceId=&destinationCountryId=ID&destinationId=544a05f190e50d6a3d000001&nights=30&flexible=1&destination=Jakarta&checkIn=01-04-2023&checkOut=01-05-2023&months=1&propTypeId=apartment&cbFlexible=on&unitType=3%2C2%2C1%2Cstudio&sellType=Unfurnished%2CFull%2BFurnished&bottomPrice=0&upperPrice=20000000'
driver.get(url)
# keep track of the scroll height
last_height = driver.execute_script("return document.body.scrollHeight")

while True:
    # scroll to the bottom of the page
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    
    # wait for the page to load
    time.sleep(5)
    
    # calculate the new scroll height and compare to the previous scroll height
    new_height = driver.execute_script("return document.body.scrollHeight")
    
    # if the new scroll height is the same as the previous scroll height, we've reached the end of the page
    if new_height == last_height:
        break
    
    # update the last scroll height
    last_height = new_height


## Scrap the url for each unit page

In [3]:
unit_selections = driver.find_elements(By.CSS_SELECTOR, '.property-box a')
unit_link = []
for i in unit_selections:
    url = i.get_attribute('href')
    unit_link.append({'url': url})

unit_link_df = pd.DataFrame(unit_link)
unit_link_df

Unnamed: 0,url
0,https://www.travelio.com/property/jakarta/comf...
1,https://www.travelio.com/property/jakarta/cozy...
2,https://www.travelio.com/property/jakarta/comf...
3,https://www.travelio.com/property/jakarta/comf...
4,https://www.travelio.com/property/jakarta/comf...
...,...
3139,https://www.travelio.com/property/jakarta/unfu...
3140,https://www.travelio.com/property/jakarta/1br-...
3141,https://www.travelio.com/property/jakarta/unfu...
3142,https://www.travelio.com/property/jakarta/unfu...


## Scrap the Data

In [75]:
def get_text_or_default(driver, xpath, default='N/A'):
    try:
        element = driver.find_element(By.XPATH, xpath)
        return element.text
    except NoSuchElementException:
        return default

#unit_df = []

for m in range(3021,len(unit_link_df)):
    driver.get(unit_link_df['url'][m])
    
    button_appeared = False
    while not button_appeared:
        try:
            # Wait for the close button to be clickable
            wait = WebDriverWait(driver, 15)
            close_button = wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'close-button')))
            
            # Click the close button
            close_button.click()
            time.sleep(2)

            button_appeared = True
            
        except TimeoutException:
            # Reload the page if the button hasn't appeared within 30 seconds
            driver.refresh()
    # Get all element that we want        
    unit_name = get_text_or_default(driver, '//*[@id="hotel-name"]/div[1]/h2')
    furnish_status = get_text_or_default(driver, '//*[@id="hotel-name"]/div[2]/span[1]')
    num_rooms = get_text_or_default(driver, '//*[@id="hotel-name"]/div[2]/span[2]')
    num_bathroom = get_text_or_default(driver, '//*[@id="hotel-name"]/div[2]/span[3]')
    elec_power = get_text_or_default(driver, '//*[@id="hotel-name"]/div[2]/span[4]')
    location = get_text_or_default(driver, '//*[@id="hotel-address"]/span[1]')
    size = get_text_or_default(driver, '//*[@id="hotel-room-detail"]/div[2]/div[1]/div[2]')
    room_type = get_text_or_default(driver, '//*[@id="hotel-room-detail"]/div[2]/div[3]/div[2]')
    property_type = get_text_or_default(driver, '//*[@id="page-body-left"]/div[5]/div[2]/div[1]/div[2]')
    building = get_text_or_default(driver, '//*[@id="page-body-left"]/div[5]/div[2]/div[2]/div[1]/div[2]')
    tower =  get_text_or_default(driver, '//*[@id="page-body-left"]/div[5]/div[2]/div[2]/div[2]/div[2]')
    floor_num = get_text_or_default(driver, '//*[@id="page-body-left"]/div[5]/div[2]/div[2]/div[3]/div[2]') 
    room_facility = get_text_or_default(driver, '//*[@id="page-body-left"]/div[5]/div[3]/div/div[2]/div[2]')
    kitchen_facility = get_text_or_default(driver, '//*[@id="page-body-left"]/div[5]/div[3]/div/div[2]/div[4]')
    building_facility = get_text_or_default(driver, '//*[@id="page-body-left"]/div[5]/div[4]/div/div[2]')
    clean_rating = get_text_or_default(driver, '//*[@id="review-list-container"]/div[2]/div[1]/div[3]')
    location_rating = get_text_or_default(driver, '//*[@id="review-list-container"]/div[2]/div[3]/div[3]')
    satis_rating = get_text_or_default(driver, '//*[@id="review-list-container"]/div[2]/div[5]/div[3]')
    check_rating = get_text_or_default(driver, '//*[@id="review-list-container"]/div[2]/div[2]/div[3]')
    econom_rating = get_text_or_default(driver, '//*[@id="review-list-container"]/div[2]/div[2]/div[3]')
    staff_rating = get_text_or_default(driver, '//*[@id="review-list-container"]/div[2]/div[4]/div[3]')
    total_rating = get_text_or_default(driver, '//*[@id="property-rate"]/b')
    longitude = driver.find_element(By.XPATH, '//*[@id="longitude"]')
    latitude = driver.find_element(By.XPATH, '//*[@id="latitude"]')
    longitude_value = longitude.get_attribute('value')
    latitude_value = latitude.get_attribute('value')
    monthly_price = get_text_or_default(driver, '//*[@id="price-calculation"]/div[1]/div[2]') 
    
    unit_df.append({'unit_name': unit_name,
                    'furnish_status': furnish_status,
                    'num_rooms': num_rooms,
                    'num_bathroom': num_bathroom,
                    'elec_power': elec_power,
                    'location': location,
                    'longitude': longitude_value,
                    'latitude': latitude_value,
                    'size': size,
                    'room_type': room_type,
                    'property_type': property_type,
                    'building': building,
                    'tower': tower,
                    'floor_num': floor_num,
                    'room_facility': room_facility,
                    'kitchen_facility': kitchen_facility,
                    'building_facility': building_facility,
                    'clean_rating': clean_rating,
                    'location_rating': location_rating,
                    'satisfaction_rating': satis_rating,
                    'checkin/out_rating': check_rating,
                    'economic_rating': econom_rating,
                    'staff_rating': staff_rating, 
                    'total_rating': total_rating,
                    'monthly_price': monthly_price})


# Data Cleaning process

## Convert to Data Frame

In [78]:
pd.options.display.max_columns = None  

In [177]:
df = pd.DataFrame(unit_df)

In [178]:
df

Unnamed: 0,unit_name,furnish_status,num_rooms,num_bathroom,elec_power,location,longitude,latitude,size,room_type,property_type,building,tower,floor_num,room_facility,kitchen_facility,building_facility,clean_rating,location_rating,satisfaction_rating,checkin/out_rating,economic_rating,staff_rating,total_rating,monthly_price
0,Comfort 2BR at Nifarro Park Apartment By Trave...,Full Furnished,2,1,3600 Watt,"Jl. Raya Pasar Minggu, RT.1/RW.1, Pejaten Timu...",106.84415861964226,-6.263363578451597,51.5,Seluruh Rumah / Apartemen,Apartemen,Apartemen Nifarro Park,A,11,Queen Bed (160x200)\nPillow\nWardrobe\nAir Con...,Single Bed (100x200)\nPillow\nWardrobe\nAir Co...,Access Card\nElevator,5.0,5.0,5.0,5.0,5.0,5.0,5.0,"IDR 7,536,412"
1,Cozy Stay Studio Tokyo Riverside PIK 2 Apartme...,Full Furnished,1,1,1300 Watt,"Lemo, Kabupaten Tangerang, Banten, Indonesia",106.692587,-6.0420121,21,Seluruh Rumah / Apartemen,Apartemen,Apartemen Tokyo Riverside PIK 2,AKHIBARA,16,Queen Bed (160x200)\nWardrobe\nAir Conditioner...,Gas Stove (Not Portable)\nRefrigerator\nRice C...,24-Hour Security\nJogging Track\nPlayground\nS...,4.9,4.9,4.9,4.9,4.9,4.9,4.9,"IDR 3,320,000"
2,Comfort Studio Room at Tifolia Apartment By Tr...,Full Furnished,1,1,1300 Watt,"Jl. Perintis Kemerdekaan No.23, RW.16, Kayu Pu...",106.89268112182617,-6.175350308134791,21,Seluruh Rumah / Apartemen,Apartemen,Apartemen Tifolia,TF,22,Queen Bed (160x200)\nWardrobe\nAir Conditioner...,Electric Stove\nRefrigerator\nRice Cooker\nGla...,24-Hour Security\nCCTV In Public Area\nElevato...,4.8,5.0,4.8,5.0,5.0,4.6,4.6,"IDR 3,470,000"
3,Comfort 2BR at 20th Floor Green Pramuka City A...,Full Furnished,2,1,1300 Watt,"Jl. A. Yani No.32, RT.16/RW.9, Rawasari, Cempa...",106.87484,-6.188552,33,Seluruh Rumah / Apartemen,Apartemen,Apartemen Green Pramuka City,BOUGENVILLE,20,Queen Bed (160x200)\nMattress Protector 200x20...,Single Bed (90x200)\nMattress Protector 200x20...,24-Hour Security\nATM On Site\nAccess Card\nCC...,,,,,,,,"IDR 5,199,315"
4,Comfortable and Simply Studio at Citra Living ...,Full Furnished,1,1,1200 Watt,"Jl. Citra 7 No.7, RW.11, Kalideres, Kalideres,...",106.70166358351707,-6.146017734755382,22.61,Seluruh Rumah / Apartemen,Apartemen,Apartemen Citra Living,Lotus,03,Queen Bed (160x200)\nWardrobe\nAir Conditioner...,Induction Stove (2 Furnace)\nRefrigerator\nRic...,24-Hour Security\nCCTV In Public Area\nFitness...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,"IDR 4,405,470"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3138,Unfurnished Studio with AC 16th Floor Green Se...,Unfurnished,1,3500 Watt,,"Jl. Kamal Raya Outer Ring Road, Gerbang Utama ...",106.72831267118454,-6.138466658464273,38,Seluruh Rumah / Apartemen,Apartment,Apartemen Green Sedayu,Pasadena,16,Air Conditioner\nAC Remote\nWardrobe,Kitchen Cabinet\nWashing Machine,24-Hour Security\nAccess Card\nCCTV In Public ...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,"IDR 3,533,000"
3139,1BR Unfurnished with AC at 18th Floor The Mans...,Unfurnished,1,5500 Watt,,"Jl. Trembesi No.D, Pademangan Tim., Pademangan...",106.85455620288849,-6.148443176931242,33,Seluruh Rumah / Apartemen,Apartemen,Apartemen The Mansion Kemayoran,BG,18,Air Conditioner\nAC Remote,Air Conditioner\nAC Remote,24-Hour Security\nATM On Site\nAccess Card\nCC...,,,,,,,,"IDR 3,033,999"
3140,Unfurnished 2BR Apartment Green Palace Kalibat...,Unfurnished,1,1300 Watt,,"Jl. Raya Kalibata No.09/04, RT.1 Rt/RW.4, Rawa...",106.85046046972275,-6.256900672828798,35.5,Seluruh Rumah / Apartemen,Apartemen,Apartemen Green Palace Kalibata,MAWAR,20,Air Conditioner\nAC Remote,Toilet Bowl\nShower,ATM On Site\nAccess Card\nCafe\nElevator\nLaun...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,"IDR 3,050,000"
3141,Unfurnished 2BR with AC at L'Avenue Apartment ...,Unfurnished,1,3500 Watt,,"Jl. Raya Pasar Minggu No.Kav.16, RT.7/RW.9, Pa...",106.84432625770569,-6.248592628891698,70,Seluruh Rumah / Apartemen,Apartemen,Apartemen L'Avenue Pancoran,South,292912,Air Conditioner\nAC Remote,Air Conditioner\nAC Remote,ATM On Site\nAccess Card\nCafe\nElevator\nFitn...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,"IDR 8,049,999"


In [179]:
df.isnull().sum()

unit_name              0
furnish_status         0
num_rooms              0
num_bathroom           0
elec_power             0
location               0
longitude              0
latitude               0
size                   0
room_type              0
property_type          0
building               0
tower                  0
floor_num              0
room_facility          0
kitchen_facility       0
building_facility      0
clean_rating           0
location_rating        0
satisfaction_rating    0
checkin/out_rating     0
economic_rating        0
staff_rating           0
total_rating           0
monthly_price          0
dtype: int64

In [157]:
df.dtypes

unit_name              object
furnish_status         object
num_rooms              object
num_bathroom           object
elec_power             object
location               object
longitude              object
latitude               object
size                   object
room_type              object
property_type          object
building               object
tower                  object
floor_num              object
room_facility          object
kitchen_facility       object
building_facility      object
clean_rating           object
location_rating        object
satisfaction_rating    object
checkin/out_rating     object
economic_rating        object
staff_rating           object
total_rating           object
monthly_price          object
dtype: object

In [181]:
try_df = df[['num_bathroom','elec_power']]
try_df

Unnamed: 0,num_bathroom,elec_power
0,1,3600 Watt
1,1,1300 Watt
2,1,1300 Watt
3,1,1300 Watt
4,1,1200 Watt
...,...,...
3138,3500 Watt,
3139,5500 Watt,
3140,1300 Watt,
3141,3500 Watt,


In [182]:
try_df['elec_power'] = try_df.apply(lambda row: row['num_bathroom'] if row['elec_power'] == 'N/A' else row['elec_power'], axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  try_df['elec_power'] = try_df.apply(lambda row: row['num_bathroom'] if row['elec_power'] == 'N/A' else row['elec_power'], axis=1)


In [184]:
df['elec_power'] = try_df['elec_power']

In [186]:
try_df = df[['num_rooms','num_bathroom']]
import re

# Define a regular expression pattern to match numeric values
pattern = r'\d+'

# Define a function to extract the numeric portion of a string and convert it to an integer
def extract_number(s):
    match = re.search(pattern, s)
    if match:
        return int(match.group())
    else:
        return None

# Apply the function to the 'num_bathroom' column to extract the numeric values
try_df['num_bathroom'] = try_df['num_bathroom'].apply(extract_number)

try_df['num_bathroom'] = try_df.apply(lambda x: x['num_rooms'] if x['num_bathroom'] not in [1, 2, 3] else x['num_bathroom'], axis=1)

try_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  try_df['num_bathroom'] = try_df['num_bathroom'].apply(extract_number)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  try_df['num_bathroom'] = try_df.apply(lambda x: x['num_rooms'] if x['num_bathroom'] not in [1, 2, 3] else x['num_bathroom'], axis=1)


Unnamed: 0,num_rooms,num_bathroom
0,2,1.0
1,1,1.0
2,1,1.0
3,2,1.0
4,1,1.0
...,...,...
3138,1,1
3139,1,1
3140,1,1
3141,1,1


In [187]:
try_df['num_bathroom'].value_counts()

1.0    2528
2.0     314
1       277
2        12
3.0      10
3         2
Name: num_bathroom, dtype: int64

In [188]:
df['num_bathroom'] = try_df['num_bathroom']
df

Unnamed: 0,unit_name,furnish_status,num_rooms,num_bathroom,elec_power,location,longitude,latitude,size,room_type,property_type,building,tower,floor_num,room_facility,kitchen_facility,building_facility,clean_rating,location_rating,satisfaction_rating,checkin/out_rating,economic_rating,staff_rating,total_rating,monthly_price
0,Comfort 2BR at Nifarro Park Apartment By Trave...,Full Furnished,2,1.0,3600 Watt,"Jl. Raya Pasar Minggu, RT.1/RW.1, Pejaten Timu...",106.84415861964226,-6.263363578451597,51.5,Seluruh Rumah / Apartemen,Apartemen,Apartemen Nifarro Park,A,11,Queen Bed (160x200)\nPillow\nWardrobe\nAir Con...,Single Bed (100x200)\nPillow\nWardrobe\nAir Co...,Access Card\nElevator,5.0,5.0,5.0,5.0,5.0,5.0,5.0,"IDR 7,536,412"
1,Cozy Stay Studio Tokyo Riverside PIK 2 Apartme...,Full Furnished,1,1.0,1300 Watt,"Lemo, Kabupaten Tangerang, Banten, Indonesia",106.692587,-6.0420121,21,Seluruh Rumah / Apartemen,Apartemen,Apartemen Tokyo Riverside PIK 2,AKHIBARA,16,Queen Bed (160x200)\nWardrobe\nAir Conditioner...,Gas Stove (Not Portable)\nRefrigerator\nRice C...,24-Hour Security\nJogging Track\nPlayground\nS...,4.9,4.9,4.9,4.9,4.9,4.9,4.9,"IDR 3,320,000"
2,Comfort Studio Room at Tifolia Apartment By Tr...,Full Furnished,1,1.0,1300 Watt,"Jl. Perintis Kemerdekaan No.23, RW.16, Kayu Pu...",106.89268112182617,-6.175350308134791,21,Seluruh Rumah / Apartemen,Apartemen,Apartemen Tifolia,TF,22,Queen Bed (160x200)\nWardrobe\nAir Conditioner...,Electric Stove\nRefrigerator\nRice Cooker\nGla...,24-Hour Security\nCCTV In Public Area\nElevato...,4.8,5.0,4.8,5.0,5.0,4.6,4.6,"IDR 3,470,000"
3,Comfort 2BR at 20th Floor Green Pramuka City A...,Full Furnished,2,1.0,1300 Watt,"Jl. A. Yani No.32, RT.16/RW.9, Rawasari, Cempa...",106.87484,-6.188552,33,Seluruh Rumah / Apartemen,Apartemen,Apartemen Green Pramuka City,BOUGENVILLE,20,Queen Bed (160x200)\nMattress Protector 200x20...,Single Bed (90x200)\nMattress Protector 200x20...,24-Hour Security\nATM On Site\nAccess Card\nCC...,,,,,,,,"IDR 5,199,315"
4,Comfortable and Simply Studio at Citra Living ...,Full Furnished,1,1.0,1200 Watt,"Jl. Citra 7 No.7, RW.11, Kalideres, Kalideres,...",106.70166358351707,-6.146017734755382,22.61,Seluruh Rumah / Apartemen,Apartemen,Apartemen Citra Living,Lotus,03,Queen Bed (160x200)\nWardrobe\nAir Conditioner...,Induction Stove (2 Furnace)\nRefrigerator\nRic...,24-Hour Security\nCCTV In Public Area\nFitness...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,"IDR 4,405,470"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3138,Unfurnished Studio with AC 16th Floor Green Se...,Unfurnished,1,1,3500 Watt,"Jl. Kamal Raya Outer Ring Road, Gerbang Utama ...",106.72831267118454,-6.138466658464273,38,Seluruh Rumah / Apartemen,Apartment,Apartemen Green Sedayu,Pasadena,16,Air Conditioner\nAC Remote\nWardrobe,Kitchen Cabinet\nWashing Machine,24-Hour Security\nAccess Card\nCCTV In Public ...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,"IDR 3,533,000"
3139,1BR Unfurnished with AC at 18th Floor The Mans...,Unfurnished,1,1,5500 Watt,"Jl. Trembesi No.D, Pademangan Tim., Pademangan...",106.85455620288849,-6.148443176931242,33,Seluruh Rumah / Apartemen,Apartemen,Apartemen The Mansion Kemayoran,BG,18,Air Conditioner\nAC Remote,Air Conditioner\nAC Remote,24-Hour Security\nATM On Site\nAccess Card\nCC...,,,,,,,,"IDR 3,033,999"
3140,Unfurnished 2BR Apartment Green Palace Kalibat...,Unfurnished,1,1,1300 Watt,"Jl. Raya Kalibata No.09/04, RT.1 Rt/RW.4, Rawa...",106.85046046972275,-6.256900672828798,35.5,Seluruh Rumah / Apartemen,Apartemen,Apartemen Green Palace Kalibata,MAWAR,20,Air Conditioner\nAC Remote,Toilet Bowl\nShower,ATM On Site\nAccess Card\nCafe\nElevator\nLaun...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,"IDR 3,050,000"
3141,Unfurnished 2BR with AC at L'Avenue Apartment ...,Unfurnished,1,1,3500 Watt,"Jl. Raya Pasar Minggu No.Kav.16, RT.7/RW.9, Pa...",106.84432625770569,-6.248592628891698,70,Seluruh Rumah / Apartemen,Apartemen,Apartemen L'Avenue Pancoran,South,292912,Air Conditioner\nAC Remote,Air Conditioner\nAC Remote,ATM On Site\nAccess Card\nCafe\nElevator\nFitn...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,"IDR 8,049,999"


In [189]:
df['elec_power'] = df['elec_power'].str.replace('Watt', '')
df

Unnamed: 0,unit_name,furnish_status,num_rooms,num_bathroom,elec_power,location,longitude,latitude,size,room_type,property_type,building,tower,floor_num,room_facility,kitchen_facility,building_facility,clean_rating,location_rating,satisfaction_rating,checkin/out_rating,economic_rating,staff_rating,total_rating,monthly_price
0,Comfort 2BR at Nifarro Park Apartment By Trave...,Full Furnished,2,1.0,3600,"Jl. Raya Pasar Minggu, RT.1/RW.1, Pejaten Timu...",106.84415861964226,-6.263363578451597,51.5,Seluruh Rumah / Apartemen,Apartemen,Apartemen Nifarro Park,A,11,Queen Bed (160x200)\nPillow\nWardrobe\nAir Con...,Single Bed (100x200)\nPillow\nWardrobe\nAir Co...,Access Card\nElevator,5.0,5.0,5.0,5.0,5.0,5.0,5.0,"IDR 7,536,412"
1,Cozy Stay Studio Tokyo Riverside PIK 2 Apartme...,Full Furnished,1,1.0,1300,"Lemo, Kabupaten Tangerang, Banten, Indonesia",106.692587,-6.0420121,21,Seluruh Rumah / Apartemen,Apartemen,Apartemen Tokyo Riverside PIK 2,AKHIBARA,16,Queen Bed (160x200)\nWardrobe\nAir Conditioner...,Gas Stove (Not Portable)\nRefrigerator\nRice C...,24-Hour Security\nJogging Track\nPlayground\nS...,4.9,4.9,4.9,4.9,4.9,4.9,4.9,"IDR 3,320,000"
2,Comfort Studio Room at Tifolia Apartment By Tr...,Full Furnished,1,1.0,1300,"Jl. Perintis Kemerdekaan No.23, RW.16, Kayu Pu...",106.89268112182617,-6.175350308134791,21,Seluruh Rumah / Apartemen,Apartemen,Apartemen Tifolia,TF,22,Queen Bed (160x200)\nWardrobe\nAir Conditioner...,Electric Stove\nRefrigerator\nRice Cooker\nGla...,24-Hour Security\nCCTV In Public Area\nElevato...,4.8,5.0,4.8,5.0,5.0,4.6,4.6,"IDR 3,470,000"
3,Comfort 2BR at 20th Floor Green Pramuka City A...,Full Furnished,2,1.0,1300,"Jl. A. Yani No.32, RT.16/RW.9, Rawasari, Cempa...",106.87484,-6.188552,33,Seluruh Rumah / Apartemen,Apartemen,Apartemen Green Pramuka City,BOUGENVILLE,20,Queen Bed (160x200)\nMattress Protector 200x20...,Single Bed (90x200)\nMattress Protector 200x20...,24-Hour Security\nATM On Site\nAccess Card\nCC...,,,,,,,,"IDR 5,199,315"
4,Comfortable and Simply Studio at Citra Living ...,Full Furnished,1,1.0,1200,"Jl. Citra 7 No.7, RW.11, Kalideres, Kalideres,...",106.70166358351707,-6.146017734755382,22.61,Seluruh Rumah / Apartemen,Apartemen,Apartemen Citra Living,Lotus,03,Queen Bed (160x200)\nWardrobe\nAir Conditioner...,Induction Stove (2 Furnace)\nRefrigerator\nRic...,24-Hour Security\nCCTV In Public Area\nFitness...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,"IDR 4,405,470"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3138,Unfurnished Studio with AC 16th Floor Green Se...,Unfurnished,1,1,3500,"Jl. Kamal Raya Outer Ring Road, Gerbang Utama ...",106.72831267118454,-6.138466658464273,38,Seluruh Rumah / Apartemen,Apartment,Apartemen Green Sedayu,Pasadena,16,Air Conditioner\nAC Remote\nWardrobe,Kitchen Cabinet\nWashing Machine,24-Hour Security\nAccess Card\nCCTV In Public ...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,"IDR 3,533,000"
3139,1BR Unfurnished with AC at 18th Floor The Mans...,Unfurnished,1,1,5500,"Jl. Trembesi No.D, Pademangan Tim., Pademangan...",106.85455620288849,-6.148443176931242,33,Seluruh Rumah / Apartemen,Apartemen,Apartemen The Mansion Kemayoran,BG,18,Air Conditioner\nAC Remote,Air Conditioner\nAC Remote,24-Hour Security\nATM On Site\nAccess Card\nCC...,,,,,,,,"IDR 3,033,999"
3140,Unfurnished 2BR Apartment Green Palace Kalibat...,Unfurnished,1,1,1300,"Jl. Raya Kalibata No.09/04, RT.1 Rt/RW.4, Rawa...",106.85046046972275,-6.256900672828798,35.5,Seluruh Rumah / Apartemen,Apartemen,Apartemen Green Palace Kalibata,MAWAR,20,Air Conditioner\nAC Remote,Toilet Bowl\nShower,ATM On Site\nAccess Card\nCafe\nElevator\nLaun...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,"IDR 3,050,000"
3141,Unfurnished 2BR with AC at L'Avenue Apartment ...,Unfurnished,1,1,3500,"Jl. Raya Pasar Minggu No.Kav.16, RT.7/RW.9, Pa...",106.84432625770569,-6.248592628891698,70,Seluruh Rumah / Apartemen,Apartemen,Apartemen L'Avenue Pancoran,South,292912,Air Conditioner\nAC Remote,Air Conditioner\nAC Remote,ATM On Site\nAccess Card\nCafe\nElevator\nFitn...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,"IDR 8,049,999"


In [191]:
df = df.replace('N/A', np.nan)
df.isnull().sum()

unit_name                0
furnish_status           0
num_rooms                0
num_bathroom             0
elec_power               5
location                 0
longitude                0
latitude                 0
size                    16
room_type               18
property_type           13
building                89
tower                  126
floor_num              149
room_facility          119
kitchen_facility       131
building_facility      105
clean_rating           694
location_rating        694
satisfaction_rating    694
checkin/out_rating     694
economic_rating        694
staff_rating           694
total_rating           693
monthly_price            0
dtype: int64

In [192]:
# convert to numeric
df['num_rooms'] = pd.to_numeric(df['num_rooms'], errors='coerce')
df['num_bathroom'] = pd.to_numeric(df['num_bathroom'], errors='coerce')
df['elec_power'] = pd.to_numeric(df['elec_power'], errors='coerce')

# fill null values
df['num_rooms'] = df['num_rooms'].fillna(value=np.nan)
df['num_bathroom'] = df['num_bathroom'].fillna(value=np.nan)
df['elec_power'] = df['elec_power'].fillna(value=np.nan)

In [195]:
df

Unnamed: 0,unit_name,furnish_status,num_rooms,num_bathroom,elec_power,location,longitude,latitude,size,room_type,property_type,building,tower,floor_num,room_facility,kitchen_facility,building_facility,clean_rating,location_rating,satisfaction_rating,checkin/out_rating,economic_rating,staff_rating,total_rating,monthly_price
0,Comfort 2BR at Nifarro Park Apartment By Trave...,Full Furnished,2,1.0,3600.0,"Jl. Raya Pasar Minggu, RT.1/RW.1, Pejaten Timu...",106.84415861964226,-6.263363578451597,51.5,Seluruh Rumah / Apartemen,Apartemen,Apartemen Nifarro Park,A,11,Queen Bed (160x200)\nPillow\nWardrobe\nAir Con...,Single Bed (100x200)\nPillow\nWardrobe\nAir Co...,Access Card\nElevator,5.0,5.0,5.0,5.0,5.0,5.0,5.0,"IDR 7,536,412"
1,Cozy Stay Studio Tokyo Riverside PIK 2 Apartme...,Full Furnished,1,1.0,1300.0,"Lemo, Kabupaten Tangerang, Banten, Indonesia",106.692587,-6.0420121,21,Seluruh Rumah / Apartemen,Apartemen,Apartemen Tokyo Riverside PIK 2,AKHIBARA,16,Queen Bed (160x200)\nWardrobe\nAir Conditioner...,Gas Stove (Not Portable)\nRefrigerator\nRice C...,24-Hour Security\nJogging Track\nPlayground\nS...,4.9,4.9,4.9,4.9,4.9,4.9,4.9,"IDR 3,320,000"
2,Comfort Studio Room at Tifolia Apartment By Tr...,Full Furnished,1,1.0,1300.0,"Jl. Perintis Kemerdekaan No.23, RW.16, Kayu Pu...",106.89268112182617,-6.175350308134791,21,Seluruh Rumah / Apartemen,Apartemen,Apartemen Tifolia,TF,22,Queen Bed (160x200)\nWardrobe\nAir Conditioner...,Electric Stove\nRefrigerator\nRice Cooker\nGla...,24-Hour Security\nCCTV In Public Area\nElevato...,4.8,5.0,4.8,5.0,5.0,4.6,4.6,"IDR 3,470,000"
3,Comfort 2BR at 20th Floor Green Pramuka City A...,Full Furnished,2,1.0,1300.0,"Jl. A. Yani No.32, RT.16/RW.9, Rawasari, Cempa...",106.87484,-6.188552,33,Seluruh Rumah / Apartemen,Apartemen,Apartemen Green Pramuka City,BOUGENVILLE,20,Queen Bed (160x200)\nMattress Protector 200x20...,Single Bed (90x200)\nMattress Protector 200x20...,24-Hour Security\nATM On Site\nAccess Card\nCC...,,,,,,,,"IDR 5,199,315"
4,Comfortable and Simply Studio at Citra Living ...,Full Furnished,1,1.0,1200.0,"Jl. Citra 7 No.7, RW.11, Kalideres, Kalideres,...",106.70166358351707,-6.146017734755382,22.61,Seluruh Rumah / Apartemen,Apartemen,Apartemen Citra Living,Lotus,03,Queen Bed (160x200)\nWardrobe\nAir Conditioner...,Induction Stove (2 Furnace)\nRefrigerator\nRic...,24-Hour Security\nCCTV In Public Area\nFitness...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,"IDR 4,405,470"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3138,Unfurnished Studio with AC 16th Floor Green Se...,Unfurnished,1,1.0,3500.0,"Jl. Kamal Raya Outer Ring Road, Gerbang Utama ...",106.72831267118454,-6.138466658464273,38,Seluruh Rumah / Apartemen,Apartment,Apartemen Green Sedayu,Pasadena,16,Air Conditioner\nAC Remote\nWardrobe,Kitchen Cabinet\nWashing Machine,24-Hour Security\nAccess Card\nCCTV In Public ...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,"IDR 3,533,000"
3139,1BR Unfurnished with AC at 18th Floor The Mans...,Unfurnished,1,1.0,5500.0,"Jl. Trembesi No.D, Pademangan Tim., Pademangan...",106.85455620288849,-6.148443176931242,33,Seluruh Rumah / Apartemen,Apartemen,Apartemen The Mansion Kemayoran,BG,18,Air Conditioner\nAC Remote,Air Conditioner\nAC Remote,24-Hour Security\nATM On Site\nAccess Card\nCC...,,,,,,,,"IDR 3,033,999"
3140,Unfurnished 2BR Apartment Green Palace Kalibat...,Unfurnished,1,1.0,1300.0,"Jl. Raya Kalibata No.09/04, RT.1 Rt/RW.4, Rawa...",106.85046046972275,-6.256900672828798,35.5,Seluruh Rumah / Apartemen,Apartemen,Apartemen Green Palace Kalibata,MAWAR,20,Air Conditioner\nAC Remote,Toilet Bowl\nShower,ATM On Site\nAccess Card\nCafe\nElevator\nLaun...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,"IDR 3,050,000"
3141,Unfurnished 2BR with AC at L'Avenue Apartment ...,Unfurnished,1,1.0,3500.0,"Jl. Raya Pasar Minggu No.Kav.16, RT.7/RW.9, Pa...",106.84432625770569,-6.248592628891698,70,Seluruh Rumah / Apartemen,Apartemen,Apartemen L'Avenue Pancoran,South,292912,Air Conditioner\nAC Remote,Air Conditioner\nAC Remote,ATM On Site\nAccess Card\nCafe\nElevator\nFitn...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,"IDR 8,049,999"


In [196]:
df.fillna(value=np.nan, inplace=True)
df.isnull().sum()

unit_name                0
furnish_status           0
num_rooms                0
num_bathroom             0
elec_power               5
location                 0
longitude                0
latitude                 0
size                    16
room_type               18
property_type           13
building                89
tower                  126
floor_num              149
room_facility          119
kitchen_facility       131
building_facility      105
clean_rating           694
location_rating        694
satisfaction_rating    694
checkin/out_rating     694
economic_rating        694
staff_rating           694
total_rating           693
monthly_price            0
dtype: int64

In [213]:
df['longitude'] = df['longitude'].astype(float)
df['latitude'] = df['latitude'].astype(float)
df

Unnamed: 0,unit_name,furnish_status,num_rooms,num_bathroom,elec_power,location,longitude,latitude,size,room_type,property_type,building,tower,floor_num,room_facility,kitchen_facility,building_facility,clean_rating,location_rating,satisfaction_rating,checkin/out_rating,economic_rating,staff_rating,total_rating,monthly_price
0,Comfort 2BR at Nifarro Park Apartment By Trave...,Full Furnished,2,1.0,3600.0,"Jl. Raya Pasar Minggu, RT.1/RW.1, Pejaten Timu...",106.844159,-6.263364,51.5,Seluruh Rumah / Apartemen,Apartemen,Apartemen Nifarro Park,A,11,Queen Bed (160x200)\nPillow\nWardrobe\nAir Con...,Single Bed (100x200)\nPillow\nWardrobe\nAir Co...,Access Card\nElevator,5.0,5.0,5.0,5.0,5.0,5.0,5.0,"IDR 7,536,412"
1,Cozy Stay Studio Tokyo Riverside PIK 2 Apartme...,Full Furnished,1,1.0,1300.0,"Lemo, Kabupaten Tangerang, Banten, Indonesia",106.692587,-6.042012,21,Seluruh Rumah / Apartemen,Apartemen,Apartemen Tokyo Riverside PIK 2,AKHIBARA,16,Queen Bed (160x200)\nWardrobe\nAir Conditioner...,Gas Stove (Not Portable)\nRefrigerator\nRice C...,24-Hour Security\nJogging Track\nPlayground\nS...,4.9,4.9,4.9,4.9,4.9,4.9,4.9,"IDR 3,320,000"
2,Comfort Studio Room at Tifolia Apartment By Tr...,Full Furnished,1,1.0,1300.0,"Jl. Perintis Kemerdekaan No.23, RW.16, Kayu Pu...",106.892681,-6.175350,21,Seluruh Rumah / Apartemen,Apartemen,Apartemen Tifolia,TF,22,Queen Bed (160x200)\nWardrobe\nAir Conditioner...,Electric Stove\nRefrigerator\nRice Cooker\nGla...,24-Hour Security\nCCTV In Public Area\nElevato...,4.8,5.0,4.8,5.0,5.0,4.6,4.6,"IDR 3,470,000"
3,Comfort 2BR at 20th Floor Green Pramuka City A...,Full Furnished,2,1.0,1300.0,"Jl. A. Yani No.32, RT.16/RW.9, Rawasari, Cempa...",106.874840,-6.188552,33,Seluruh Rumah / Apartemen,Apartemen,Apartemen Green Pramuka City,BOUGENVILLE,20,Queen Bed (160x200)\nMattress Protector 200x20...,Single Bed (90x200)\nMattress Protector 200x20...,24-Hour Security\nATM On Site\nAccess Card\nCC...,,,,,,,,"IDR 5,199,315"
4,Comfortable and Simply Studio at Citra Living ...,Full Furnished,1,1.0,1200.0,"Jl. Citra 7 No.7, RW.11, Kalideres, Kalideres,...",106.701664,-6.146018,22.61,Seluruh Rumah / Apartemen,Apartemen,Apartemen Citra Living,Lotus,03,Queen Bed (160x200)\nWardrobe\nAir Conditioner...,Induction Stove (2 Furnace)\nRefrigerator\nRic...,24-Hour Security\nCCTV In Public Area\nFitness...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,"IDR 4,405,470"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3138,Unfurnished Studio with AC 16th Floor Green Se...,Unfurnished,1,1.0,3500.0,"Jl. Kamal Raya Outer Ring Road, Gerbang Utama ...",106.728313,-6.138467,38,Seluruh Rumah / Apartemen,Apartment,Apartemen Green Sedayu,Pasadena,16,Air Conditioner\nAC Remote\nWardrobe,Kitchen Cabinet\nWashing Machine,24-Hour Security\nAccess Card\nCCTV In Public ...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,"IDR 3,533,000"
3139,1BR Unfurnished with AC at 18th Floor The Mans...,Unfurnished,1,1.0,5500.0,"Jl. Trembesi No.D, Pademangan Tim., Pademangan...",106.854556,-6.148443,33,Seluruh Rumah / Apartemen,Apartemen,Apartemen The Mansion Kemayoran,BG,18,Air Conditioner\nAC Remote,Air Conditioner\nAC Remote,24-Hour Security\nATM On Site\nAccess Card\nCC...,,,,,,,,"IDR 3,033,999"
3140,Unfurnished 2BR Apartment Green Palace Kalibat...,Unfurnished,1,1.0,1300.0,"Jl. Raya Kalibata No.09/04, RT.1 Rt/RW.4, Rawa...",106.850460,-6.256901,35.5,Seluruh Rumah / Apartemen,Apartemen,Apartemen Green Palace Kalibata,MAWAR,20,Air Conditioner\nAC Remote,Toilet Bowl\nShower,ATM On Site\nAccess Card\nCafe\nElevator\nLaun...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,"IDR 3,050,000"
3141,Unfurnished 2BR with AC at L'Avenue Apartment ...,Unfurnished,1,1.0,3500.0,"Jl. Raya Pasar Minggu No.Kav.16, RT.7/RW.9, Pa...",106.844326,-6.248593,70,Seluruh Rumah / Apartemen,Apartemen,Apartemen L'Avenue Pancoran,South,292912,Air Conditioner\nAC Remote,Air Conditioner\nAC Remote,ATM On Site\nAccess Card\nCafe\nElevator\nFitn...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,"IDR 8,049,999"


In [216]:
df['size'] = df['size'].astype(float)
df

Unnamed: 0,unit_name,furnish_status,num_rooms,num_bathroom,elec_power,location,longitude,latitude,size,room_type,property_type,building,tower,floor_num,room_facility,kitchen_facility,building_facility,clean_rating,location_rating,satisfaction_rating,checkin/out_rating,economic_rating,staff_rating,total_rating,monthly_price
0,Comfort 2BR at Nifarro Park Apartment By Trave...,Full Furnished,2,1.0,3600.0,"Jl. Raya Pasar Minggu, RT.1/RW.1, Pejaten Timu...",106.844159,-6.263364,51.50,Seluruh Rumah / Apartemen,Apartemen,Apartemen Nifarro Park,A,11,Queen Bed (160x200)\nPillow\nWardrobe\nAir Con...,Single Bed (100x200)\nPillow\nWardrobe\nAir Co...,Access Card\nElevator,5.0,5.0,5.0,5.0,5.0,5.0,5.0,"IDR 7,536,412"
1,Cozy Stay Studio Tokyo Riverside PIK 2 Apartme...,Full Furnished,1,1.0,1300.0,"Lemo, Kabupaten Tangerang, Banten, Indonesia",106.692587,-6.042012,21.00,Seluruh Rumah / Apartemen,Apartemen,Apartemen Tokyo Riverside PIK 2,AKHIBARA,16,Queen Bed (160x200)\nWardrobe\nAir Conditioner...,Gas Stove (Not Portable)\nRefrigerator\nRice C...,24-Hour Security\nJogging Track\nPlayground\nS...,4.9,4.9,4.9,4.9,4.9,4.9,4.9,"IDR 3,320,000"
2,Comfort Studio Room at Tifolia Apartment By Tr...,Full Furnished,1,1.0,1300.0,"Jl. Perintis Kemerdekaan No.23, RW.16, Kayu Pu...",106.892681,-6.175350,21.00,Seluruh Rumah / Apartemen,Apartemen,Apartemen Tifolia,TF,22,Queen Bed (160x200)\nWardrobe\nAir Conditioner...,Electric Stove\nRefrigerator\nRice Cooker\nGla...,24-Hour Security\nCCTV In Public Area\nElevato...,4.8,5.0,4.8,5.0,5.0,4.6,4.6,"IDR 3,470,000"
3,Comfort 2BR at 20th Floor Green Pramuka City A...,Full Furnished,2,1.0,1300.0,"Jl. A. Yani No.32, RT.16/RW.9, Rawasari, Cempa...",106.874840,-6.188552,33.00,Seluruh Rumah / Apartemen,Apartemen,Apartemen Green Pramuka City,BOUGENVILLE,20,Queen Bed (160x200)\nMattress Protector 200x20...,Single Bed (90x200)\nMattress Protector 200x20...,24-Hour Security\nATM On Site\nAccess Card\nCC...,,,,,,,,"IDR 5,199,315"
4,Comfortable and Simply Studio at Citra Living ...,Full Furnished,1,1.0,1200.0,"Jl. Citra 7 No.7, RW.11, Kalideres, Kalideres,...",106.701664,-6.146018,22.61,Seluruh Rumah / Apartemen,Apartemen,Apartemen Citra Living,Lotus,03,Queen Bed (160x200)\nWardrobe\nAir Conditioner...,Induction Stove (2 Furnace)\nRefrigerator\nRic...,24-Hour Security\nCCTV In Public Area\nFitness...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,"IDR 4,405,470"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3138,Unfurnished Studio with AC 16th Floor Green Se...,Unfurnished,1,1.0,3500.0,"Jl. Kamal Raya Outer Ring Road, Gerbang Utama ...",106.728313,-6.138467,38.00,Seluruh Rumah / Apartemen,Apartment,Apartemen Green Sedayu,Pasadena,16,Air Conditioner\nAC Remote\nWardrobe,Kitchen Cabinet\nWashing Machine,24-Hour Security\nAccess Card\nCCTV In Public ...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,"IDR 3,533,000"
3139,1BR Unfurnished with AC at 18th Floor The Mans...,Unfurnished,1,1.0,5500.0,"Jl. Trembesi No.D, Pademangan Tim., Pademangan...",106.854556,-6.148443,33.00,Seluruh Rumah / Apartemen,Apartemen,Apartemen The Mansion Kemayoran,BG,18,Air Conditioner\nAC Remote,Air Conditioner\nAC Remote,24-Hour Security\nATM On Site\nAccess Card\nCC...,,,,,,,,"IDR 3,033,999"
3140,Unfurnished 2BR Apartment Green Palace Kalibat...,Unfurnished,1,1.0,1300.0,"Jl. Raya Kalibata No.09/04, RT.1 Rt/RW.4, Rawa...",106.850460,-6.256901,35.50,Seluruh Rumah / Apartemen,Apartemen,Apartemen Green Palace Kalibata,MAWAR,20,Air Conditioner\nAC Remote,Toilet Bowl\nShower,ATM On Site\nAccess Card\nCafe\nElevator\nLaun...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,"IDR 3,050,000"
3141,Unfurnished 2BR with AC at L'Avenue Apartment ...,Unfurnished,1,1.0,3500.0,"Jl. Raya Pasar Minggu No.Kav.16, RT.7/RW.9, Pa...",106.844326,-6.248593,70.00,Seluruh Rumah / Apartemen,Apartemen,Apartemen L'Avenue Pancoran,South,292912,Air Conditioner\nAC Remote,Air Conditioner\nAC Remote,ATM On Site\nAccess Card\nCafe\nElevator\nFitn...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,"IDR 8,049,999"


In [243]:
# Select rows that meet the condition
mask = ~df['property_type'].str.contains('Apartemen', na=False)
df.loc[mask, 'property_type'] = 'Apartemen'

In [248]:
def extract_max_number(val):
    if pd.isna(val):
        return None
    elif isinstance(val, int):
        val = str(val)
    if ',' in val:
        num_list = val.split(',')
        try:
            return max(list(map(int, num_list)))
        except ValueError:
            return None
    else:
        try:
            return int(val)
        except ValueError:
            return None

In [249]:
df['floor_num'] = df['floor_num'].apply(extract_max_number)
df['floor_num'] = df['floor_num'].fillna(np.nan)
df['floor_num'].unique()

array([1.100e+01, 1.600e+01, 2.200e+01, 2.000e+01, 3.000e+00, 2.800e+01,
       6.000e+00, 1.900e+01, 1.500e+01, 8.000e+00, 9.000e+00, 3.000e+01,
       2.300e+01, 7.000e+00, 3.300e+01, 4.000e+00,       nan, 2.700e+01,
       1.800e+01, 2.000e+00, 1.700e+01, 1.200e+01, 3.100e+01, 1.000e+01,
       3.700e+01, 2.100e+01, 1.000e+00, 3.500e+01, 2.900e+01, 3.600e+01,
       3.200e+01, 2.500e+01, 5.000e+00, 2.600e+01, 3.800e+01, 3.900e+01,
       1.400e+01, 4.500e+01, 5.000e+01, 5.100e+01, 5.700e+01, 4.200e+01,
       4.100e+01, 2.400e+01, 4.000e+01, 2.102e+03, 4.600e+01])

In [252]:
df.loc[df['floor_num'] == 2102.0] 

Unnamed: 0,unit_name,furnish_status,num_rooms,num_bathroom,elec_power,location,longitude,latitude,size,room_type,property_type,building,tower,floor_num,room_facility,kitchen_facility,building_facility,clean_rating,location_rating,satisfaction_rating,checkin/out_rating,economic_rating,staff_rating,total_rating,monthly_price
2263,Strategic 1BR Apartment at Casa Grande Residen...,Full Furnished,1,1.0,2200.0,"Lobby Office 88 Kasablanka, Jl. Raya Casablanc...",106.841813,-6.223437,54.0,Seluruh Rumah / Apartemen,Apartemen,Apartemen Casa Grande Residence,Montana,2102.0,Bowl\nChopping Board\nColander\nCooking Knife\...,Dustbin\nAir Conditioner\nTV\nSofa\nSofa Pillo...,24-Hour Security\n24-Hours Frontdesk\nATM On S...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,"IDR 12,870,526"


In [254]:
df['floor_num'][2263] = 21

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['floor_num'][2263] = 21


In [258]:
def clean_price(price_str):
    if price_str and isinstance(price_str, str):
        price_str = price_str.replace(',', '').replace('IDR', '').strip()
        return int(price_str)
    else:
        return None

df['monthly_price'] = df['monthly_price'].apply(clean_price)

In [265]:
null_monthly_price = df[df['monthly_price'].isnull()]
null_monthly_price

Unnamed: 0,unit_name,furnish_status,num_rooms,num_bathroom,elec_power,location,longitude,latitude,size,room_type,property_type,building,tower,floor_num,room_facility,kitchen_facility,building_facility,clean_rating,location_rating,satisfaction_rating,checkin/out_rating,economic_rating,staff_rating,total_rating,monthly_price
304,Comfort and Best Deal 2BR at Gading Icon Apart...,Full Furnished,2,1.0,2200.0,"Jl. Perintis Kemerdekaan, No.Kav. 99, RT.1/RW....",106.904402,-6.180897,,,Apartemen,Apartemen Gading Icon,2B,17.0,Queen Bed (160x200)\nPillow\nMirror\nWardrobe\...,Single Bed (100x200)\nPillow\nExhaust Fan\nBed...,ATM On Site\nAccess Card\nCCTV In Public Area\...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,
327,Nice and Cozy Studio at Green Pramuka City Apa...,Full Furnished,1,1.0,1300.0,"Jl. A. Yani No.32, RT.16/RW.9, Rawasari, Cempa...",106.87484,-6.188552,,,Apartemen,Apartemen Green Pramuka City,C,6.0,Queen Bed (160x200)\nWardrobe\nAir Conditioner...,Gas Stove\nRefrigerator\nRice Cooker\nGlass Wi...,24-Hour Security\nATM On Site\nAccess Card\nCC...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,
444,Nice and Elegant Studio at Puri Mansion Apartm...,Full Furnished,1,1.0,1300.0,"Jl. Lkr. Luar Barat, RT.8/RW.2, Kembangan Sela...",106.723487,-6.181153,,,Apartemen,Apartemen Puri Mansion,Amethyst,35.0,Queen Bed (160x200)\nWardrobe\nAir Conditioner...,Gas Stove\nRefrigerator\nRice Cooker\nGlass Wi...,24-Hour Security\nAccess Card\nCCTV In Public ...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,
747,Simply Furnished 2BR Apartment at Puri Park Vi...,Full Furnished,2,1.0,1300.0,"Jl. Pesanggrahan Blok AAA 1 No.6, RT.9/RW.5, M...",106.756736,-6.193067,,,Apartemen,Apartemen Puri Park View,E,25.0,Dustbin\nShower\nToilet Brush\nToilet Bowl\nEx...,Single Bed (100x200)\nPillow\nHanger\nCurtain\...,24-Hour Security\nAccess Card\nCCTV Public Are...,4.7,4.8,4.7,4.7,4.7,4.8,4.7,
845,Serene and Minimalist 2BR at Bassura City Apar...,Full Furnished,3,1.0,1300.0,"Jl. Jend. Basuki Rachmat No.1A, Cipinang Besar...",106.88039,-6.22276,,,Apartemen,Apartemen Bassura City,E,21.0,Queen Bed (160x200)\nPillow\nWardrobe\nAir Con...,Trundle Bed (Single Bed & Single Underbed)\nPi...,24-Hour Security\n24-Hours Frontdesk\nATM On S...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,
978,Fancy and Nice 2BR at Puri Park View Apartment...,Full Furnished,2,1.0,1300.0,"Jl. Pesanggrahan Blok AAA 1 No.6, RT.9/RW.5, M...",106.756736,-6.193067,,,Apartemen,Apartemen Puri Park View,AC,20.0,Queen Bed (160x200)\nPillow\nMirror\nWardrobe\...,Super Single Bed (120x200)\nPillow\nWardrobe\n...,24-Hour Security\nAccess Card\nCCTV Public Are...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,
1274,Homey and Elegant 2BR at Bintaro Park View Apa...,Full Furnished,2,1.0,2200.0,"Jl. Bintaro Permai No.5, RW.3, Pesanggrahan, P...",106.758716,-6.25802,,,Apartemen,Apartemen Bintaro Park View,B,25.0,Queen Bed (160x200)\nPillow\nWardrobe\nAir Con...,Single Bed (100x200)\nPillow\nWardrobe\nAir Co...,24-Hour Security\nAccess Card\nCCTV In Public ...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,
1310,Cozy Nice Studio at Signature Park Grande Apar...,Full Furnished,1,1.0,1300.0,"Jl. Letjen Mt. Haryono No.Kav. 20, RT.4/RW.1, ...",106.866084,-6.244828,,,Apartemen,Apartemen Signature Park Grande,Green TA,11.0,Queen Bed (160x200)\nMattress Protector 200x20...,Gas Stove\nRefrigerator\nRice Cooker\nGlass Wi...,24-Hour Security\nATM On Site\nAccess Card\nCC...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,
1602,Vibrant 2BR with Sofa Bed Apartment at Green P...,Full Furnished,3,1.0,2200.0,"Jl. A. Yani No.32, RT.16/RW.9, Rawasari, Cempa...",106.87484,-6.188552,,,Apartemen,Apartemen Green Pramuka City,Penelope,29.0,Queen Bed (160x200)\nPillow\nMirror\nWardrobe\...,Trundle Bed (Single Bed & Single Underbed)\nPi...,24-Hour Security\nATM On Site\nAccess Card\nCC...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,
1715,Well Appointed 1BR at The Wave Kuningan Apartm...,Full Furnished,1,1.0,4400.0,"Jl. Epicentrum Boulevard Timur, RT.5/RW.1, Kun...",106.837781,-6.214122,,,Apartemen,Apartemen The Wave Kuningan,C,28.0,Queen Bed (160x200)\nWardrobe\nAir Conditioner...,Sofa\nTV\nBlackout Curtain\nAir Conditioner\nA...,24-Hour Security\nAccess Card\nCCTV In Public ...,4.9,4.9,4.9,4.9,4.9,4.9,4.9,


In [296]:
df['monthly_price'][304] = 4209787
df['monthly_price'][327] = 3620000
df['monthly_price'][444] = 3820000
df['monthly_price'][747] = 4121713
df['monthly_price'][845] = 4950659
df['monthly_price'][978] = 4665000
df['monthly_price'][1274] = 5256285
df['monthly_price'][1310] = 5321706
df['monthly_price'][1602] = np.nan
df['monthly_price'][1715] = 6620000
df['monthly_price'][2176] = np.nan
df['monthly_price'][2319] = 14478600
df['monthly_price'][2377] = 15510000
df['monthly_price'][2391] = 16105270
df['monthly_price'][2421] = np.nan
df['monthly_price'][3047] = 3620000

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['monthly_price'][304] = 4209787
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['monthly_price'][327] = 3620000
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['monthly_price'][444] = 3820000
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['monthly_price'][747] = 4121713
A value is trying to be set 

In [300]:
df['clean_rating'] = df['clean_rating'].astype(float)
df['location_rating'] = df['clean_rating'].astype(float)
df['satisfaction_rating'] = df['clean_rating'].astype(float)
df['checkin/out_rating'] = df['clean_rating'].astype(float)
df['economic_rating'] = df['clean_rating'].astype(float)
df['staff_rating'] = df['clean_rating'].astype(float)
df['total_rating'] = df['clean_rating'].astype(float)

In [304]:
df = df.replace('\n', ',', regex=True)

In [305]:
df

Unnamed: 0,unit_name,furnish_status,num_rooms,num_bathroom,elec_power,location,longitude,latitude,size,room_type,property_type,building,tower,floor_num,room_facility,kitchen_facility,building_facility,clean_rating,location_rating,satisfaction_rating,checkin/out_rating,economic_rating,staff_rating,total_rating,monthly_price
0,Comfort 2BR at Nifarro Park Apartment By Trave...,Full Furnished,2,1.0,3600.0,"Jl. Raya Pasar Minggu, RT.1/RW.1, Pejaten Timu...",106.844159,-6.263364,51.50,Seluruh Rumah / Apartemen,Apartemen,Apartemen Nifarro Park,A,11.0,"Queen Bed (160x200),Pillow,Wardrobe,Air Condit...","Single Bed (100x200),Pillow,Wardrobe,Air Condi...","Access Card,Elevator",5.0,5.0,5.0,5.0,5.0,5.0,5.0,7536412.0
1,Cozy Stay Studio Tokyo Riverside PIK 2 Apartme...,Full Furnished,1,1.0,1300.0,"Lemo, Kabupaten Tangerang, Banten, Indonesia",106.692587,-6.042012,21.00,Seluruh Rumah / Apartemen,Apartemen,Apartemen Tokyo Riverside PIK 2,AKHIBARA,16.0,"Queen Bed (160x200),Wardrobe,Air Conditioner,C...","Gas Stove (Not Portable),Refrigerator,Rice Coo...","24-Hour Security,Jogging Track,Playground,Supe...",4.9,4.9,4.9,4.9,4.9,4.9,4.9,3320000.0
2,Comfort Studio Room at Tifolia Apartment By Tr...,Full Furnished,1,1.0,1300.0,"Jl. Perintis Kemerdekaan No.23, RW.16, Kayu Pu...",106.892681,-6.175350,21.00,Seluruh Rumah / Apartemen,Apartemen,Apartemen Tifolia,TF,22.0,"Queen Bed (160x200),Wardrobe,Air Conditioner,B...","Electric Stove,Refrigerator,Rice Cooker,Glass ...","24-Hour Security,CCTV In Public Area,Elevator,...",4.8,4.8,4.8,4.8,4.8,4.8,4.8,3470000.0
3,Comfort 2BR at 20th Floor Green Pramuka City A...,Full Furnished,2,1.0,1300.0,"Jl. A. Yani No.32, RT.16/RW.9, Rawasari, Cempa...",106.874840,-6.188552,33.00,Seluruh Rumah / Apartemen,Apartemen,Apartemen Green Pramuka City,BOUGENVILLE,20.0,"Queen Bed (160x200),Mattress Protector 200x200...","Single Bed (90x200),Mattress Protector 200x200...","24-Hour Security,ATM On Site,Access Card,CCTV ...",,,,,,,,5199315.0
4,Comfortable and Simply Studio at Citra Living ...,Full Furnished,1,1.0,1200.0,"Jl. Citra 7 No.7, RW.11, Kalideres, Kalideres,...",106.701664,-6.146018,22.61,Seluruh Rumah / Apartemen,Apartemen,Apartemen Citra Living,Lotus,3.0,"Queen Bed (160x200),Wardrobe,Air Conditioner,C...","Induction Stove (2 Furnace),Refrigerator,Rice ...","24-Hour Security,CCTV In Public Area,Fitness C...",5.0,5.0,5.0,5.0,5.0,5.0,5.0,4405470.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3138,Unfurnished Studio with AC 16th Floor Green Se...,Unfurnished,1,1.0,3500.0,"Jl. Kamal Raya Outer Ring Road, Gerbang Utama ...",106.728313,-6.138467,38.00,Seluruh Rumah / Apartemen,Apartemen,Apartemen Green Sedayu,Pasadena,16.0,"Air Conditioner,AC Remote,Wardrobe","Kitchen Cabinet,Washing Machine","24-Hour Security,Access Card,CCTV In Public Ar...",5.0,5.0,5.0,5.0,5.0,5.0,5.0,3533000.0
3139,1BR Unfurnished with AC at 18th Floor The Mans...,Unfurnished,1,1.0,5500.0,"Jl. Trembesi No.D, Pademangan Tim., Pademangan...",106.854556,-6.148443,33.00,Seluruh Rumah / Apartemen,Apartemen,Apartemen The Mansion Kemayoran,BG,18.0,"Air Conditioner,AC Remote","Air Conditioner,AC Remote","24-Hour Security,ATM On Site,Access Card,CCTV ...",,,,,,,,3033999.0
3140,Unfurnished 2BR Apartment Green Palace Kalibat...,Unfurnished,1,1.0,1300.0,"Jl. Raya Kalibata No.09/04, RT.1 Rt/RW.4, Rawa...",106.850460,-6.256901,35.50,Seluruh Rumah / Apartemen,Apartemen,Apartemen Green Palace Kalibata,MAWAR,20.0,"Air Conditioner,AC Remote","Toilet Bowl,Shower","ATM On Site,Access Card,Cafe,Elevator,Laundry ...",5.0,5.0,5.0,5.0,5.0,5.0,5.0,3050000.0
3141,Unfurnished 2BR with AC at L'Avenue Apartment ...,Unfurnished,1,1.0,3500.0,"Jl. Raya Pasar Minggu No.Kav.16, RT.7/RW.9, Pa...",106.844326,-6.248593,70.00,Seluruh Rumah / Apartemen,Apartemen,Apartemen L'Avenue Pancoran,South,29.0,"Air Conditioner,AC Remote","Air Conditioner,AC Remote","ATM On Site,Access Card,Cafe,Elevator,Fitness ...",5.0,5.0,5.0,5.0,5.0,5.0,5.0,8049999.0


In [306]:
df.to_csv('unit_df.csv')