# Vieques Airbnbs Browser Automation

In [1]:
import os
import random
import time

from seleniumwire import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys

import chromedriver_binary

import requests
from bs4 import BeautifulSoup
import pandas as pd
from tabulate import tabulate
import re

In [2]:
os.makedirs('data/', exist_ok=True)

#### 1) Open the browser, hide automation signs, visit Airbnb.com

In [4]:
def open_browser():
    """
    Opens a new automated browser window with all tell-tales of automated browser disabled
    """
    options = webdriver.ChromeOptions()
    options.add_argument("start-maximized")
    
    ## NOTE WHAT DO YOU DO TO HIDE BROWSER?
    # remove all signs of this being an automated browser
    options.add_argument('--disable-blink-features=AutomationControlled')
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_experimental_option('useAutomationExtension', False)

    # open the browser with the new options
    driver = webdriver.Chrome(options=options)
    return driver

In [5]:
driver = open_browser()
# visit the page
url = 'https://www.airbnb.com/'
driver.get(url)

#### 2) Find Search Box

The Airbnb page have a search bar where you can input place, dates and guest numbers. I only want to search by place, so I lookd for the place button. Before any input, it says "Anywhere".

In [6]:
search_box = driver.find_element(By.XPATH, './/div[@aria-labelledby="littleSearchLabel"]')
search_place_box = search_box.find_element(By.CSS_SELECTOR, 'button')
search_place_box

<selenium.webdriver.remote.webelement.WebElement (session="199987111267de7c7339ab4046e3a395", element="56613B0708D30264B467AC78990F9B84_element_152")>

In [7]:
search_place_box.click()

#### 3) Find Destinations Search Box

After you click on the search_place_box, it will open a new search bar 
to search for destinations. I need to locate it, write "Vieques" on it and hit enter
so I can have all the listings in Vieques.

In [9]:
search_destination = driver.find_element(
    By.XPATH, 
    './/input[@placeholder="Search destinations"]'
)

search_destination

<selenium.webdriver.remote.webelement.WebElement (session="199987111267de7c7339ab4046e3a395", element="56613B0708D30264B467AC78990F9B84_element_155")>

In [10]:
search_place = 'Vieques'
search_destination.send_keys(search_place)

### 4) Find Search Box

Once you have "Vieques" written in the search place, if you hit enter, the calendars will open. We do not want to choose a date because we want all the options available in Vieques. We have to look for the red Search button and click it. 

In [12]:
search_button = driver.find_element(By.XPATH, './/span[@class="t1dqvypu dir dir-ltr"]')
search_button

<selenium.webdriver.remote.webelement.WebElement (session="199987111267de7c7339ab4046e3a395", element="56613B0708D30264B467AC78990F9B84_element_156")>

In [13]:
search_button.click()

In [17]:
from selenium.common.exceptions import NoSuchElementException

#### 5) Make a list with the url for every listing in every page

In [23]:
listings = []

# Loop through each page
for i in range(15):  # Assuming 15 pages
    # Extract href values from listings on the current page
    a_elements = driver.find_elements(By.XPATH, '//a[@class="l1ovpqvx bn2bl2p dir dir-ltr"]')
    for a_element in a_elements:
        href_value = a_element.get_attribute('href')
        print(href_value)
        listings.append(href_value)

    time.sleep(3)

    # Go to the next page if available
    try:
        next_page = driver.find_element(By.XPATH, './/a[@aria-label="Next"]')
        next_page.click()

        time.sleep(7)
    
        # newURl = driver.window_handles[0]
        # driver.switch_to.window(newURl)

    except NoSuchElementException:
        print("No next page found.")

https://www.airbnb.com/rooms/908586706566232069?adults=1&children=0&enable_m3_private_room=true&infants=0&pets=0&check_in=2023-08-27&check_out=2023-09-01&source_impression_id=p3_1691593244_kyRrZS%2FtMbz61OD5&previous_page_section_name=1000&federated_search_id=c9cbb29c-4c6d-4e65-b717-04bd9bfdd2d4
https://www.airbnb.com/rooms/37959635?adults=1&children=0&enable_m3_private_room=true&infants=0&pets=0&check_in=2023-09-08&check_out=2023-09-13&source_impression_id=p3_1691593244_1nvPDOz%2FuepYm3re&previous_page_section_name=1000&federated_search_id=c9cbb29c-4c6d-4e65-b717-04bd9bfdd2d4
https://www.airbnb.com/rooms/20093500?adults=1&children=0&enable_m3_private_room=true&infants=0&pets=0&check_in=2023-09-18&check_out=2023-09-23&source_impression_id=p3_1691593244_hNLyiMCqqJe835PE&previous_page_section_name=1000&federated_search_id=c9cbb29c-4c6d-4e65-b717-04bd9bfdd2d4
https://www.airbnb.com/rooms/32163392?adults=1&children=0&enable_m3_private_room=true&infants=0&pets=0&check_in=2023-09-06&check_ou

In [24]:
len(listings)

270

#### 5) For each listing, look for the link of the host and save it into a list

In [25]:
host_urls = []

for listing in listings:
    driver.get(listing)  # Open the URL in the browser
    time.sleep(20) 

    # Now you can interact with elements on the current page
    # For example:
    try:
        hosts = driver.find_elements(By.XPATH, './/div[@class="h1144bf3 dir dir-ltr"]')
        for host in hosts:
            link_element = host.find_element(By.TAG_NAME, 'a')
            href = link_element.get_attribute('href')
            print(href)
            host_urls.append(href)
             
    
    except NoSuchElementException:
        print("Element not found on", url) 

https://www.airbnb.com/users/show/198714409
https://www.airbnb.com/users/show/57043571
https://www.airbnb.com/users/show/142889283
https://www.airbnb.com/users/show/59636726
https://www.airbnb.com/users/show/23432125
https://www.airbnb.com/users/show/246880719
https://www.airbnb.com/users/show/39856060
https://www.airbnb.com/users/show/71681129
https://www.airbnb.com/users/show/4790521
https://www.airbnb.com/users/show/28271296
https://www.airbnb.com/users/show/4125485
https://www.airbnb.com/users/show/144416640
https://www.airbnb.com/users/show/218587392
https://www.airbnb.com/users/show/248717081
https://www.airbnb.com/users/show/75247930
https://www.airbnb.com/users/show/435404857
https://www.airbnb.com/users/show/118363144
https://www.airbnb.com/users/show/51705188
https://www.airbnb.com/users/show/487551565
https://www.airbnb.com/users/show/118363144
https://www.airbnb.com/users/show/435404857
https://www.airbnb.com/users/show/23432125
https://www.airbnb.com/users/show/75247930
ht

In [34]:
host_urls = [
"https://www.airbnb.com/users/show/198714409",
"https://www.airbnb.com/users/show/57043571",
"https://www.airbnb.com/users/show/142889283",
"https://www.airbnb.com/users/show/59636726",
"https://www.airbnb.com/users/show/23432125",
"https://www.airbnb.com/users/show/246880719",
"https://www.airbnb.com/users/show/39856060",
"https://www.airbnb.com/users/show/71681129",
"https://www.airbnb.com/users/show/4790521",
"https://www.airbnb.com/users/show/28271296",
"https://www.airbnb.com/users/show/4125485",
"https://www.airbnb.com/users/show/144416640",
"https://www.airbnb.com/users/show/218587392",
"https://www.airbnb.com/users/show/248717081",
"https://www.airbnb.com/users/show/75247930",
"https://www.airbnb.com/users/show/435404857",
"https://www.airbnb.com/users/show/118363144",
"https://www.airbnb.com/users/show/51705188",
"https://www.airbnb.com/users/show/487551565",
"https://www.airbnb.com/users/show/118363144",
"https://www.airbnb.com/users/show/435404857",
"https://www.airbnb.com/users/show/23432125",
"https://www.airbnb.com/users/show/75247930",
"https://www.airbnb.com/users/show/198714409",
"https://www.airbnb.com/users/show/297501743",
"https://www.airbnb.com/users/show/139830463",
"https://www.airbnb.com/users/show/71681129",
"https://www.airbnb.com/users/show/138932340",
"https://www.airbnb.com/users/show/43031585",
"https://www.airbnb.com/users/show/116849943",
"https://www.airbnb.com/users/show/39856060",
"https://www.airbnb.com/users/show/71507747",
"https://www.airbnb.com/users/show/6162696",
"https://www.airbnb.com/users/show/6162696",
"https://www.airbnb.com/users/show/179117858",
"https://www.airbnb.com/users/show/93917095",
"https://www.airbnb.com/users/show/93917095",
"https://www.airbnb.com/users/show/172995066",
"https://www.airbnb.com/users/show/86622",
"https://www.airbnb.com/users/show/142889283",
"https://www.airbnb.com/users/show/71507747",
"https://www.airbnb.com/users/show/297501743",
"https://www.airbnb.com/users/show/6162696",
"https://www.airbnb.com/users/show/297501743",
"https://www.airbnb.com/users/show/435404857",
"https://www.airbnb.com/users/show/38282684",
"https://www.airbnb.com/users/show/71507747",
"https://www.airbnb.com/users/show/11513353",
"https://www.airbnb.com/users/show/4125485",
"https://www.airbnb.com/users/show/119815102",
"https://www.airbnb.com/users/show/10305143",
"https://www.airbnb.com/users/show/56426101",
"https://www.airbnb.com/users/show/391696466",
"https://www.airbnb.com/users/show/93874749",
"https://www.airbnb.com/users/show/39856060",
"https://www.airbnb.com/users/show/392855496",
"https://www.airbnb.com/users/show/247771808",
"https://www.airbnb.com/users/show/91406086",
"https://www.airbnb.com/users/show/11513353",
"https://www.airbnb.com/users/show/5879585",
"https://www.airbnb.com/users/show/50820867",
"https://www.airbnb.com/users/show/84149613",
"https://www.airbnb.com/users/show/125186276",
"https://www.airbnb.com/users/show/41303858",
"https://www.airbnb.com/users/show/179117858",
"https://www.airbnb.com/users/show/456052316",
"https://www.airbnb.com/users/show/23432125",
"https://www.airbnb.com/users/show/45157280",
"https://www.airbnb.com/users/show/755188",
"https://www.airbnb.com/users/show/391696466",
"https://www.airbnb.com/users/show/4406745",
"https://www.airbnb.com/users/show/124634734",
"https://www.airbnb.com/users/show/86589202",
"https://www.airbnb.com/users/show/331376790",
"https://www.airbnb.com/users/show/166582775",
"https://www.airbnb.com/users/show/1107127",
"https://www.airbnb.com/users/show/7931678",
"https://www.airbnb.com/users/show/244571744",
"https://www.airbnb.com/users/show/130173741",
"https://www.airbnb.com/users/show/415700360",
"https://www.airbnb.com/users/show/755188",
"https://www.airbnb.com/users/show/30656568",
"https://www.airbnb.com/users/show/443105514",
"https://www.airbnb.com/users/show/198714409",
"https://www.airbnb.com/users/show/14130139",
"https://www.airbnb.com/users/show/479249243",
"https://www.airbnb.com/users/show/16621952",
"https://www.airbnb.com/users/show/3417424",
"https://www.airbnb.com/users/show/15438837",
"https://www.airbnb.com/users/show/117261852",
"https://www.airbnb.com/users/show/58761640",
"https://www.airbnb.com/users/show/220729919",
"https://www.airbnb.com/users/show/21769602",
"https://www.airbnb.com/users/show/11075962",
"https://www.airbnb.com/users/show/10535102",
"https://www.airbnb.com/users/show/10305143",
"https://www.airbnb.com/users/show/115193497",
"https://www.airbnb.com/users/show/4406745",
"https://www.airbnb.com/users/show/6043636",
"https://www.airbnb.com/users/show/5200093",
"https://www.airbnb.com/users/show/66805188",
"https://www.airbnb.com/users/show/204553584",
"https://www.airbnb.com/users/show/18257074",
"https://www.airbnb.com/users/show/12936288",
"https://www.airbnb.com/users/show/45157280",
"https://www.airbnb.com/users/show/219622553",
"https://www.airbnb.com/users/show/20332763",
"https://www.airbnb.com/users/show/188631",
"https://www.airbnb.com/users/show/10778722",
"https://www.airbnb.com/users/show/71669940",
"https://www.airbnb.com/users/show/179117858",
"https://www.airbnb.com/users/show/17013027",
"https://www.airbnb.com/users/show/11513353",
"https://www.airbnb.com/users/show/138932340",
"https://www.airbnb.com/users/show/5879585",
"https://www.airbnb.com/users/show/82520187",
"https://www.airbnb.com/users/show/48062771",
"https://www.airbnb.com/users/show/6162696",
"https://www.airbnb.com/users/show/173580689",
"https://www.airbnb.com/users/show/473333578",
"https://www.airbnb.com/users/show/89379583",
"https://www.airbnb.com/users/show/370350392",
"https://www.airbnb.com/users/show/21938604",
"https://www.airbnb.com/users/show/45157280",
"https://www.airbnb.com/users/show/39856060",
"https://www.airbnb.com/users/show/69220140",
"https://www.airbnb.com/users/show/7931678",
"https://www.airbnb.com/users/show/268451655",
"https://www.airbnb.com/users/show/125364826",
"https://www.airbnb.com/users/show/141731046",
"https://www.airbnb.com/users/show/383210889",
"https://www.airbnb.com/users/show/415700360",
"https://www.airbnb.com/users/show/6162696",
"https://www.airbnb.com/users/show/2701066",
"https://www.airbnb.com/users/show/459515786",
"https://www.airbnb.com/users/show/24712359",
"https://www.airbnb.com/users/show/24521088",
"https://www.airbnb.com/users/show/4406709",
"https://www.airbnb.com/users/show/7931678",
"https://www.airbnb.com/users/show/249723371",
"https://www.airbnb.com/users/show/473813176",
"https://www.airbnb.com/users/show/2546385",
"https://www.airbnb.com/users/show/489734958",
"https://www.airbnb.com/users/show/138932340",
"https://www.airbnb.com/users/show/250149000",
"https://www.airbnb.com/users/show/3704575",
"https://www.airbnb.com/users/show/11309559",
"https://www.airbnb.com/users/show/35419900",
"https://www.airbnb.com/users/show/138932340",
"https://www.airbnb.com/users/show/14609376",
"https://www.airbnb.com/users/show/58037471",
"https://www.airbnb.com/users/show/10760322",
"https://www.airbnb.com/users/show/415700360",
"https://www.airbnb.com/users/show/27323212",
"https://www.airbnb.com/users/show/54604676",
"https://www.airbnb.com/users/show/478815989",
"https://www.airbnb.com/users/show/11513353",
"https://www.airbnb.com/users/show/35419900",
"https://www.airbnb.com/users/show/109527639",
"https://www.airbnb.com/users/show/188631",
"https://www.airbnb.com/users/show/170110483",
"https://www.airbnb.com/users/show/61649661",
"https://www.airbnb.com/users/show/4393021",
"https://www.airbnb.com/users/show/38483346",
"https://www.airbnb.com/users/show/104561173",
"https://www.airbnb.com/users/show/117261852",
"https://www.airbnb.com/users/show/28329257",
"https://www.airbnb.com/users/show/383693209",
"https://www.airbnb.com/users/show/253537146",
"https://www.airbnb.com/users/show/28598578",
"https://www.airbnb.com/users/show/11309559",
"https://www.airbnb.com/users/show/7931678",
"https://www.airbnb.com/users/show/5879585",
"https://www.airbnb.com/users/show/54469986",
"https://www.airbnb.com/users/show/45157280",
"https://www.airbnb.com/users/show/202337970",
"https://www.airbnb.com/users/show/24050027",
"https://www.airbnb.com/users/show/8324667",
"https://www.airbnb.com/users/show/851042",
"https://www.airbnb.com/users/show/7931678",
"https://www.airbnb.com/users/show/4967413",
"https://www.airbnb.com/users/show/87715467",
"https://www.airbnb.com/users/show/432447787",
"https://www.airbnb.com/users/show/28329257",
"https://www.airbnb.com/users/show/138932340",
"https://www.airbnb.com/users/show/384257678",
"https://www.airbnb.com/users/show/5200093",
"https://www.airbnb.com/users/show/383210889",
"https://www.airbnb.com/users/show/47631394",
"https://www.airbnb.com/users/show/85078080",
"https://www.airbnb.com/users/show/4393029",
"https://www.airbnb.com/users/show/202498250",
"https://www.airbnb.com/users/show/39856060",
"https://www.airbnb.com/users/show/28981205",
"https://www.airbnb.com/users/show/281342131",
"https://www.airbnb.com/users/show/103449700",
"https://www.airbnb.com/users/show/10928461",
"https://www.airbnb.com/users/show/59159286",
"https://www.airbnb.com/users/show/73940117",
"https://www.airbnb.com/users/show/17832644",
"https://www.airbnb.com/users/show/249723371",
"https://www.airbnb.com/users/show/22880456",
"https://www.airbnb.com/users/show/139132",
"https://www.airbnb.com/users/show/148232623",
"https://www.airbnb.com/users/show/103449700",
"https://www.airbnb.com/users/show/488503363",
"https://www.airbnb.com/users/show/6162696",
"https://www.airbnb.com/users/show/31313498",
"https://www.airbnb.com/users/show/12577534",
"https://www.airbnb.com/users/show/130173741",
"https://www.airbnb.com/users/show/61369",
"https://www.airbnb.com/users/show/249723371",
"https://www.airbnb.com/users/show/23432125",
"https://www.airbnb.com/users/show/10305143",
"https://www.airbnb.com/users/show/5200093",
"https://www.airbnb.com/users/show/15438837",
"https://www.airbnb.com/users/show/489734958",
"https://www.airbnb.com/users/show/35419900",
"https://www.airbnb.com/users/show/49299370"
]

In [35]:
len(host_urls)

219

In [29]:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [31]:
host_urls = []

for listing in listings:
    print("Loading:", listing)
    try:
        driver.get(listing)  # Open the URL in the browser

        # Wait for the host elements to be present
        hosts = WebDriverWait(driver, 20).until(
            EC.presence_of_all_elements_located((By.XPATH, './/div[@class="h1144bf3 dir dir-ltr"]'))
        )

        for host in hosts:
            link_element = host.find_element(By.TAG_NAME, 'a')
            href = link_element.get_attribute('href')
            print(href)
            host_urls.append(href)

    except Exception as e:
        print(f"An error occurred while loading {listing}: {e}")

Loading: https://www.airbnb.com/rooms/908586706566232069?adults=1&children=0&enable_m3_private_room=true&infants=0&pets=0&check_in=2023-08-27&check_out=2023-09-01&source_impression_id=p3_1691593244_kyRrZS%2FtMbz61OD5&previous_page_section_name=1000&federated_search_id=c9cbb29c-4c6d-4e65-b717-04bd9bfdd2d4
An error occurred while loading https://www.airbnb.com/rooms/908586706566232069?adults=1&children=0&enable_m3_private_room=true&infants=0&pets=0&check_in=2023-08-27&check_out=2023-09-01&source_impression_id=p3_1691593244_kyRrZS%2FtMbz61OD5&previous_page_section_name=1000&federated_search_id=c9cbb29c-4c6d-4e65-b717-04bd9bfdd2d4: Message: 

Loading: https://www.airbnb.com/rooms/37959635?adults=1&children=0&enable_m3_private_room=true&infants=0&pets=0&check_in=2023-09-08&check_out=2023-09-13&source_impression_id=p3_1691593244_1nvPDOz%2FuepYm3re&previous_page_section_name=1000&federated_search_id=c9cbb29c-4c6d-4e65-b717-04bd9bfdd2d4
An error occurred while loading https://www.airbnb.com/ro

KeyboardInterrupt: 

#### 6) For each host, look for the information.

In [45]:
from collections import Counter

hosts_unique_urls = []

# Use Counter to count occurrences of elements in the list
element_counts = Counter(host_urls)

# Print the element counts
for element, count in element_counts.items():
    print(element, count)
    hosts_unique_urls.append(element)

https://www.airbnb.com/users/show/198714409 3
https://www.airbnb.com/users/show/57043571 1
https://www.airbnb.com/users/show/142889283 2
https://www.airbnb.com/users/show/59636726 1
https://www.airbnb.com/users/show/23432125 4
https://www.airbnb.com/users/show/246880719 1
https://www.airbnb.com/users/show/39856060 5
https://www.airbnb.com/users/show/71681129 2
https://www.airbnb.com/users/show/4790521 1
https://www.airbnb.com/users/show/28271296 1
https://www.airbnb.com/users/show/4125485 2
https://www.airbnb.com/users/show/144416640 1
https://www.airbnb.com/users/show/218587392 1
https://www.airbnb.com/users/show/248717081 1
https://www.airbnb.com/users/show/75247930 2
https://www.airbnb.com/users/show/435404857 3
https://www.airbnb.com/users/show/118363144 2
https://www.airbnb.com/users/show/51705188 1
https://www.airbnb.com/users/show/487551565 1
https://www.airbnb.com/users/show/297501743 3
https://www.airbnb.com/users/show/139830463 1
https://www.airbnb.com/users/show/138932340 5


In [46]:
len(hosts_unique_urls)


153

In [40]:
# Create a DataFrame with columns 'Element' and 'Count'
df = pd.DataFrame(element_counts.items(), columns=['Element', 'Count'])
print(df)

                                         Element  Count
0    https://www.airbnb.com/users/show/198714409      3
1     https://www.airbnb.com/users/show/57043571      1
2    https://www.airbnb.com/users/show/142889283      2
3     https://www.airbnb.com/users/show/59636726      1
4     https://www.airbnb.com/users/show/23432125      4
..                                           ...    ...
148  https://www.airbnb.com/users/show/488503363      1
149   https://www.airbnb.com/users/show/31313498      1
150   https://www.airbnb.com/users/show/12577534      1
151      https://www.airbnb.com/users/show/61369      1
152   https://www.airbnb.com/users/show/49299370      1

[153 rows x 2 columns]


In [56]:
df.to_csv('airbnb_scraped.csv')

In [42]:
df_sorted = df.sort_values(by='Count', ascending=False)
df_sorted

Unnamed: 0,Element,Count
25,https://www.airbnb.com/users/show/6162696,6
54,https://www.airbnb.com/users/show/7931678,5
6,https://www.airbnb.com/users/show/39856060,5
21,https://www.airbnb.com/users/show/138932340,5
46,https://www.airbnb.com/users/show/45157280,4
...,...,...
61,https://www.airbnb.com/users/show/479249243,1
60,https://www.airbnb.com/users/show/14130139,1
59,https://www.airbnb.com/users/show/443105514,1
58,https://www.airbnb.com/users/show/30656568,1


In [55]:
df_sorted.to_csv('airbn_scraped.csv', index=False)

In [47]:
host_descriptions = []

count = 0
for host_url in hosts_unique_urls:
    count = count + 1
    driver.get(host_url)  # Open the URL in the browser
    print("------")  
    print(str(count))
    time.sleep(10) 

    # Now you can interact with elements on the current page
    # For example:
    try:
        descriptions = driver.find_elements(By.XPATH, './/span[@class="t1chcso6 dir dir-ltr"]')
        for description in descriptions:
            print(description.text)
            host_descriptions.append(description.text)
            
    
    except NoSuchElementException:
        print("Element not found on", url) 

------
1
Where I went to school: Whittier Law School
I'm obsessed with: The beach, running and traveling.
Lives in Vieques, Puerto Rico
Pets: Princess (Yorkie) and Baby (Cat).
------
2
Lives in Lilburn, GA
------
3
Speaks English, Spanish
Lives in Puerto Rico
------
4
------
5
My work: Doug - commercial Realtor, builder. Olga - teaches at University of Michigan
Speaks English, Spanish
Lives in Ann Arbor, MI
------
6
My work: Microbiologist
Speaks English, Spanish
Lives in Pullman, WA
Pets: My Golden Retrievers - Sophia and Cindy
------
7
My work: Creator of spaces….
Speaks English, Spanish
Lives in San Juan, Puerto Rico
------
8
My work: Rutgers University
Lives in Newark, NJ
------
9
My work: Christopher Robert Matson Interiors
Lives in Vieques, Puerto Rico
------
10
------
11
My work: Attorney
Speaks English, Spanish
Lives in Dallas, TX
------
12
Lives in Boston, MA
------
13
Speaks English, Spanish
Lives in Vieques, Puerto Rico
------
14
I'm obsessed with: Traveling with a family
Pe

In [53]:
host_descriptions = []

count = 0
for host_url in hosts_unique_urls:
    count = count + 1
    driver.get(host_url)  # Open the URL in the browser
    print("------")  
    print(str(count))
    time.sleep(10) 

    # Now you can interact with elements on the current page
    # For example:
    try:
        descriptions = driver.find_elements(By.XPATH, './/span[@class="t1chcso6 dir dir-ltr"]')
        for description in descriptions:
            print(description.text)
            host_descriptions.append(description.text)
            
    
    except NoSuchElementException:
        print("Element not found on", url) 

------
1
Where I went to school: Whittier Law School
I'm obsessed with: The beach, running and traveling.
Lives in Vieques, Puerto Rico
Pets: Princess (Yorkie) and Baby (Cat).
------
2
Lives in Lilburn, GA
------
3
Speaks English, Spanish
Lives in Puerto Rico
------
4
------
5
My work: Doug - commercial Realtor, builder. Olga - teaches at University of Michigan
Speaks English, Spanish
Lives in Ann Arbor, MI
------
6
My work: Microbiologist
Speaks English, Spanish
Lives in Pullman, WA
Pets: My Golden Retrievers - Sophia and Cindy
------
7
My work: Creator of spaces….
Speaks English, Spanish
Lives in San Juan, Puerto Rico
------
8
My work: Rutgers University
Lives in Newark, NJ
------
9
My work: Christopher Robert Matson Interiors
Lives in Vieques, Puerto Rico
------
10
------
11
My work: Attorney
Speaks English, Spanish
Lives in Dallas, TX
------
12
Lives in Boston, MA
------
13
Speaks English, Spanish
Lives in Vieques, Puerto Rico
------
14
I'm obsessed with: Traveling with a family
Pe

TimeoutException: Message: timeout: Timed out receiving message from renderer: 3.417
  (Session info: chrome=115.0.5790.170)
Stacktrace:
0   chromedriver                        0x000000010088bf48 chromedriver + 4226888
1   chromedriver                        0x00000001008844f4 chromedriver + 4195572
2   chromedriver                        0x00000001004c8d68 chromedriver + 281960
3   chromedriver                        0x00000001004b29b8 chromedriver + 190904
4   chromedriver                        0x00000001004b26e0 chromedriver + 190176
5   chromedriver                        0x00000001004b1304 chromedriver + 185092
6   chromedriver                        0x00000001004b178c chromedriver + 186252
7   chromedriver                        0x00000001004bf514 chromedriver + 242964
8   chromedriver                        0x00000001004d303c chromedriver + 323644
9   chromedriver                        0x00000001004b1c8c chromedriver + 187532
10  chromedriver                        0x00000001004d2e88 chromedriver + 323208
11  chromedriver                        0x000000010053ae30 chromedriver + 749104
12  chromedriver                        0x00000001004f7f1c chromedriver + 474908
13  chromedriver                        0x00000001004f8ef4 chromedriver + 478964
14  chromedriver                        0x000000010084d59c chromedriver + 3970460
15  chromedriver                        0x00000001008516f0 chromedriver + 3987184
16  chromedriver                        0x00000001008575b4 chromedriver + 4011444
17  chromedriver                        0x00000001008522fc chromedriver + 3990268
18  chromedriver                        0x000000010082a1c0 chromedriver + 3826112
19  chromedriver                        0x000000010086e088 chromedriver + 4104328
20  chromedriver                        0x000000010086e1e0 chromedriver + 4104672
21  chromedriver                        0x000000010087df28 chromedriver + 4169512
22  libsystem_pthread.dylib             0x000000019c5954ec _pthread_start + 148
23  libsystem_pthread.dylib             0x000000019c5902d0 thread_start + 8


In [49]:
df_sorted['HostDescriptions'] = host_descriptions

print(df_sorted)

ValueError: Length of values (375) does not match length of index (153)

In [14]:
next_page = driver.find_element(By.XPATH, './/a[@aria-label="Next"]')
next_page

<selenium.webdriver.remote.webelement.WebElement (session="199987111267de7c7339ab4046e3a395", element="56613B0708D30264B467AC78990F9B84_element_293")>

#### I've cleaned the output in Google Sheets. I'll do the analysis with pandas.

In [2]:
import pandas as pd

In [4]:
df = pd.read_csv('Cleaned_Airbnb_Vieques_Data.csv')
df.head()

Unnamed: 0,HostURL,Count,LivesIn
0,https://www.airbnb.com/users/show/6162696,6,
1,https://www.airbnb.com/users/show/39856060,5,"San Juan, Puerto Rico"
2,https://www.airbnb.com/users/show/138932340,5,"Vieques, Puerto Rico"
3,https://www.airbnb.com/users/show/7931678,5,"Pennsylvania, United States"
4,https://www.airbnb.com/users/show/23432125,4,"Ann Arbor, MI"


In [8]:
value_counts = df['LivesIn'].value_counts()

print("Value counts in 'LivesIn' column:\n", value_counts)

Value counts in 'LivesIn' column:
 LivesIn
Puerto Rico                    24
Vieques, Puerto Rico           20
San Juan, Puerto Rico          12
Boston, MA                      2
Milwaukee, WI                   2
Washington, DC                  2
Pittsburgh, PA                  2
Atlanta, GA                     2
Newark, NJ                      2
Pennsylvania, United States     2
Luquillo, Puerto Rico           2
Dallas, TX                      1
Lino Lakes, MN                  1
Prince Edward, Canada           1
Somerville, MA                  1
Guaynabo, Puerto Rico           1
Ocala, FL                       1
Mexico City, Mexico             1
Charleston, SC                  1
Red Lion, PA                    1
Portland, OR                    1
Fajardo, Puerto Rico            1
Accokeek, MD                    1
Austin, TX                      1
Columbia, IL                    1
Wrightsville, PA                1
Mechanicsburg, PA               1
Plymouth, MA                    1
Tyler

In [6]:
count_puerto_rico = df['LivesIn'].str.contains('Puerto Rico').sum()

print("Number of rows where 'LivesIn' contains 'Puerto Rico':", count_puerto_rico)

Number of rows where 'LivesIn' contains 'Puerto Rico': 67


In [7]:
count_nan = df['LivesIn'].isna().sum()

print("Number of rows with NaN values in 'LivesIn' column:", count_nan)

Number of rows with NaN values in 'LivesIn' column: 34


In [14]:
# Filter out rows with "Puerto Rico" in the "LivesIn" column
filtered_df = df[~df['LivesIn'].str.contains('Puerto Rico', case=False, na=False)]

# Count the occurrences of each unique value in the filtered "LivesIn" column
value_counts = filtered_df['LivesIn'].value_counts()

print("Value counts in 'LivesIn' column (excluding 'Puerto Rico'):\n", value_counts)

value_counts.to_csv('value_counts_by_livesin.csv', header=True)

Value counts in 'LivesIn' column (excluding 'Puerto Rico'):
 LivesIn
Pennsylvania, United States    2
Atlanta, GA                    2
Newark, NJ                     2
Washington, DC                 2
Boston, MA                     2
Pittsburgh, PA                 2
Milwaukee, WI                  2
Portland, OR                   1
Prince Edward, Canada          1
Somerville, MA                 1
Ocala, FL                      1
Mexico City, Mexico            1
Charleston, SC                 1
Red Lion, PA                   1
Austin, TX                     1
Lino Lakes, MN                 1
Accokeek, MD                   1
Columbia, IL                   1
Wrightsville, PA               1
Mechanicsburg, PA              1
Plymouth, MA                   1
Tyler, TX                      1
Boston, VA                     1
Telluride, CO                  1
State College, PA              1
New Orleans, LA                1
Moultonborough, NH             1
Kissimmee, FL                  1
Richmon

In [11]:
filtered_df

Unnamed: 0,HostURL,Count,LivesIn
0,https://www.airbnb.com/users/show/6162696,6,
3,https://www.airbnb.com/users/show/7931678,5,"Pennsylvania, United States"
4,https://www.airbnb.com/users/show/23432125,4,"Ann Arbor, MI"
5,https://www.airbnb.com/users/show/11513353,4,
9,https://www.airbnb.com/users/show/297501743,3,
...,...,...,...
146,https://www.airbnb.com/users/show/139132,1,"Telluride, CO"
147,https://www.airbnb.com/users/show/148232623,1,
149,https://www.airbnb.com/users/show/31313498,1,"New York, NY"
151,https://www.airbnb.com/users/show/61369,1,


In [13]:
sum_counts = filtered_df.groupby('LivesIn')['Count'].sum()

print("Sum of 'Count' values for each 'LivesIn':\n", sum_counts)

sum_counts.to_csv('sum_counts_by_livesin.csv', header=True)

Sum of 'Count' values for each 'LivesIn':
 LivesIn
Accokeek, MD                   1
Ann Arbor, MI                  4
Atlanta, GA                    2
Austin, TX                     1
Boston, MA                     2
Boston, VA                     1
Charleston, SC                 1
Columbia, IL                   1
Council Bluffs, IA             1
Dallas, TX                     2
East Brunswick, NJ             1
Frankfort, MI                  1
Indialantic, FL                1
Kissimmee, FL                  1
Leawood, KS                    1
Lilburn, GA                    1
Lino Lakes, MN                 1
London, United Kingdom         2
Mechanicsburg, PA              1
Mexico City, Mexico            1
Miami, FL                      1
Milwaukee, WI                  2
Moultonborough, NH             1
New Orleans, LA                1
New York, NY                   1
Newark, NJ                     3
Ocala, FL                      1
Orlando, FL                    1
Pennsylvania, United Stat

#### The total of listings Airbnb showed at the time the scraping was made was 270.
#### The total of listings for which I could get the host was 219.
#### There are 153 hosts. Some of them have more than one listing.
#### At least, 67 hosts say they live in Puerto Rico.
#### 34 hosts do not have information on where do they live.
#### At least 52 hosts do not live in Puerto Rico.
#### At least 64 listings have a host that says they don't live in Puerto Rico.
