In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime

In [2]:
def get_page(url):
    """
    returns a soup object that contains all the information of a given webpage
    """
    result = requests.get(url)
    content = result.content
    return BeautifulSoup(content, features='html.parser')


def get_room_classes(soup_page):
    """
    returns all the listings that can be found on the page (soup object) in a list
    """
    rooms = soup_page.findAll('div', {'class':'_8ssblpx'})
    result = []
    for room in rooms:
        result.append(room)
    return result


def get_listing_link(listing):
    """
    returns the URL link of given listing
    """
    listing_link = 'https://airbnb.com' + listing.find('a')['href']
    listing_link = listing_link.split('?')[0]
    return listing_link


def get_listing_title(listing):
    """
    returns the title of given listing
    """
    return listing.find('meta')['content']


def get_top_row(listing):
    """
    returns the top row of given listing's info
    """
    return listing.find('div', {'class':'_1tanv1h'}).text  # _167gordg


def get_room_info(listing):
    """
    returns room info of listing 
    """
    return listing.find('div', {'class', '_kqh46o'}).text


def get_room_price(listing):
    """
    returns the nightly rate (price) of given listing
    """
    price_text = listing.find('div', {'class':'_ls0e43'}).text
    price = price_text.split('Price:')
    return price[1]


def get_basic_facilities(listing):
    ''' Returns the basic facilities'''
    try:
        output = listing.findAll("div", {"class":"_kqh46o"})[1].text.replace(" ","") #Speeds up cleaning
    except:
        output = []
    return output


def get_room_rating(listing):
    """
    returns star rating of given listing
    """
    try:
        return listing.find('div', {'class':'_vaj62s'}).text
    except:
        return listing.find('div', {'class':'_vaj62s'})

    
def get_n_reviews(listing):
    '''
    Returns the number of reviews
    '''
    try:  # Not all listings have reviews // extraction failed
        output = listing.findAll("span", {"class":"_krjbj"})[1].text
    except:
        output = None   # Indicate that the extraction failed -> can indicate no reviews or a mistake in scraping
    return output


def record_dataset(listings, file_path='output.csv', first_page=False):
    """
    take scraped room classes and record their information to csv
    """
    data = []
    for l in listings:
        a = get_listing_link(l)
        b = get_listing_title(l)
        c = get_top_row(l)
        d = get_room_info(l)
        e = get_room_price(l)
        f = get_basic_facilities(l)
        g = get_room_rating(l)
        h = get_n_reviews(l)
        out = [a, b, c, d, e, f, g, h]
        data.append(out)
    if first_page:
        names = [l for l in 'abcdefgh']
        df = pd.DataFrame(data, columns=names)
    else:
        df = pd.read_csv(file_path)
        names = df.columns
        new_df = pd.DataFrame(data, columns=names)
        df = pd.concat([df, new_df], axis=0)
    df.to_csv(file_path, index=False)
    return len(df)

In [3]:
las_vegas_link = 'https://www.airbnb.com/s/Las-Vegas--NV--United-States/homes'
seattle_link = 'https://www.airbnb.com/s/Seattle--WA--United-States/homes'

record_dataset(get_room_classes(get_page(las_vegas_link)), first_page=True)

20

# Notes
### Base Link
- `https://www.airbnb.com/` + `s/` + `City` + `--` + `STATE` (can be abbr.) + `--` + `COUNTRY` (can be abbr.)
  - `https://www.airbnb.com/s/Las-Vegas--NV--United-States/homes`
  - `https://www.airbnb.com/s/Las-Vegas--Nevada--USA/homes`
  - `https://www.airbnb.com/s/Las-Vegas--NV--US/homes`

### Pages
- Each page holds 20 listings by default
- `base_link` + `?items_offset=20` starts page at 21st listing result

### Search Results
- Max results returned is 300, even if more than 300
- Last page (15) will say `281 – 300 of 300+ places to stay`


### make_page_urls()
- Missing Guests filtering 
- And other filters that aren't reflected in URL?

In [4]:
"""

https://www.airbnb.com/s/Las-Vegas--NV--United-States/homes?tab_id=home_tab&refinement_paths%5B%5D=%2Fhomes&place_id=ChIJ0X31pIK3voARo3mz1ebVzDo&federated_search_session_id=5f8d449b-73b3-4dc9-a1ac-ab166a8eded9&search_type=pagination&items_offset=20&section_offset=3

https://www.airbnb.com/s/Las-Vegas--NV--United-States/homes?items_offset=20


https://www.airbnb.com/s/Las-Vegas--NV--United-States/homes?tab_id=home_tab&refinement_paths%5B%5D=%2Fhomes&search_type=pagination&place_id=ChIJ0X31pIK3voARo3mz1ebVzDo&federated_search_session_id=5f8d449b-73b3-4dc9-a1ac-ab166a8eded9&items_offset=40&section_offset=3

https://www.airbnb.com/s/Las-Vegas--NV--United-States/homes?items_offset=40

"""
pass

In [5]:
def find_n_results(soup_page):
    """
    returns total number of search results from page 1 (of search results)
    """
    return soup_page.find('div', {'class':'_1h559tl'}).text

find_n_results(get_page(las_vegas_link))

'1 – 20 of 300+ places to stay'

In [6]:
import math 

def find_n_pages(soup_page, listings_per_page=20):
    n_results_string = soup_page.find('div', {'class':'_1h559tl'}).text
    # check if 300+ club
    if '300+' in n_results_string:
        n_pages = 15
    else:
        split_results_string = n_results_string.split(' of ')
        n_total_results_string = split_results_string[1]
        # check for unknown + edge case
        if '+' in n_total_results_string:
            raise Exception(f'+ in n_total_results_string but 300+ is not\nn_total_results_string == {n_total_results_string}')
        else:
            # find number of results
            split_total_results_string = n_total_results_string.split(' ')
            n_total_results = int(split_total_results_string[0])
            n_pages = n_total_results / listings_per_page 
            n_pages = math.ceil(n_pages)
    # tell me how many pages there are
    return n_pages


def make_page_urls(base_page, n_pages):
    """
    makes pages for search results (sets of 20)
    """
    if '?' not in base_page:
        c = '?'
    else:
        c = '&'
    page_urls = []
    for i in range(n_pages):
        if i != 0:
            url = f'{base_page}{c}items_offset={i * 20}'
            page_urls.append(url)
        else:
            pass
    return page_urls


make_page_urls(las_vegas_link, find_n_pages(get_page(las_vegas_link)))

['https://www.airbnb.com/s/Las-Vegas--NV--United-States/homes?items_offset=20',
 'https://www.airbnb.com/s/Las-Vegas--NV--United-States/homes?items_offset=40',
 'https://www.airbnb.com/s/Las-Vegas--NV--United-States/homes?items_offset=60',
 'https://www.airbnb.com/s/Las-Vegas--NV--United-States/homes?items_offset=80',
 'https://www.airbnb.com/s/Las-Vegas--NV--United-States/homes?items_offset=100',
 'https://www.airbnb.com/s/Las-Vegas--NV--United-States/homes?items_offset=120',
 'https://www.airbnb.com/s/Las-Vegas--NV--United-States/homes?items_offset=140',
 'https://www.airbnb.com/s/Las-Vegas--NV--United-States/homes?items_offset=160',
 'https://www.airbnb.com/s/Las-Vegas--NV--United-States/homes?items_offset=180',
 'https://www.airbnb.com/s/Las-Vegas--NV--United-States/homes?items_offset=200',
 'https://www.airbnb.com/s/Las-Vegas--NV--United-States/homes?items_offset=220',
 'https://www.airbnb.com/s/Las-Vegas--NV--United-States/homes?items_offset=240',
 'https://www.airbnb.com/s/Las-V

In [7]:
# r_104 = 'https://www.airbnb.com/s/Las-Vegas--NV--United-States/homes?tab_id=home_tab&refinement_paths%5B%5D=%2Fhomes&checkin=2020-12-25&checkout=2021-04-30&source=structured_search_input_header&search_type=filter_change&place_id=ChIJ0X31pIK3voARo3mz1ebVzDo&adults=7'
# make_page_urls(r_104, find_n_pages(get_page(r_104)))

In [8]:
f = find_n_results(get_page(las_vegas_link))
print(f)

1 – 20 of 300+ places to stay


In [9]:
f.split(' of ')

['1 – 20', '300+ places to stay']

In [10]:
f.split(' of ')[0].split(' – ')

['1', '20']

In [11]:
f.split(' of ')[1]

'300+ places to stay'

In [12]:
f.split(' of ')[1].split(' ')

['300+', 'places', 'to', 'stay']

In [13]:
int(f.split(' of ')[1].split(' ')[0].replace('+', ''))

300

### Multi Page

In [14]:
las_vegas_link = 'https://www.airbnb.com/s/Las-Vegas--NV--United-States/homes'

# get 1st page
las_vegas_page_1 = get_page(las_vegas_link)

# record the 1st page
r = record_dataset(get_room_classes(las_vegas_page_1), first_page=True)
print(r)

# get urls for other pages 
las_vegas_urls = make_page_urls(las_vegas_link, find_n_pages(las_vegas_page_1))

for url in las_vegas_urls:
    r = record_dataset(get_room_classes(get_page(url)), first_page=False)
    print(r)

20
40
60
80
100
120
140
160
180
200
220
240
260
280
300


In [15]:
pd.read_csv('output.csv')

Unnamed: 0,a,b,c,d,e,f,g,h
0,https://airbnb.com/rooms/44416011,Eggeman 8 - null - Las Vegas,Entire condominium in Las Vegas,4 guests · 2 bedrooms · 3 beds · 1.5 baths,$50 / night,Wifi·Pool,Rating 4.92 out of 5;4.9239 reviews (39),39 reviews
1,https://airbnb.com/rooms/45827691,Great Room in a House close to the strip - nul...,Private room in Las Vegas,2 guests · 1 bedroom · 1 bed · 1 shared bath,$21 / night,Wifi·Pool,Rating 4.80 out of 5;4.805 reviews (5),5 reviews
2,https://airbnb.com/rooms/44162190,"Wyndham Grand Desert 2 BR Suite, FRIDAY Check-...",Entire condominium in Las Vegas,8 guests · 2 bedrooms · 4 beds · 2 baths,$67 / night,Hottub·Wifi·Pool,Rating 4.80 out of 5;4.805 reviews (5),5 reviews
3,https://airbnb.com/rooms/45840505,Tahiti Village Moorea 1 BR Suite FRIDAY Check-...,Entire condominium in Las Vegas,4 guests · 1 bedroom · 2 beds · 1 bath,$57 / night,Hottub·Wifi·Pool,Rating 5.0 out of 5;5.04 reviews (4),4 reviews
4,https://airbnb.com/rooms/32525262,Tahiti Resort on Tropicana Studio - null - Las...,Entire condominium in Las Vegas,2 guests · Studio · 1 bed · 1 bath,$49 / night,Hottub·Wifi·Pool,,
...,...,...,...,...,...,...,...,...
295,https://airbnb.com/rooms/45387034,Beautiful 2 bedroom Condominium - null - Las V...,Entire condominium in Las Vegas,3 guests · Studio · 1 bed · 2 baths,$85 / night,Wifi·Airconditioning·Pool·Kitchen,,
296,https://airbnb.com/rooms/33579893,"Luxurious, Clean and Spacious Condo - null - L...",Entire serviced apartment in Las Vegas,4 guests · 1 bedroom · 2 beds · 1 bath,$71 / night,Wifi·Airconditioning·Pool·Kitchen,Rating 4.92 out of 5;4.9212 reviews (12),12 reviews
297,https://airbnb.com/rooms/38387884,Master Bedroom w/ Private Bath and entire floo...,Private room in Las Vegas,1 guest · 1 bedroom · 1 bed · 1 private bath,$45 / night,Wifi·Airconditioning·Kitchen·Washer,Rating 4.87 out of 5;4.8723 reviews (23),23 reviews
298,https://airbnb.com/rooms/44559382,Amenity Packed House in North Las Vegas - null...,Private room in North Las Vegas,2 guests · 1 bedroom · 1 bed · 1 shared bath,$41 / night,Wifi·Airconditioning·Pool·Kitchen,Rating 3.82 out of 5;3.8211 reviews (11),11 reviews


### Multi Filter Multi Page
In parallel with Dask Delayed.

In [16]:
import dask.delayed
from dask import compute


@dask.delayed
def scrape_search(base_link, search_alias, n_pages='auto', printout=False):
    """
    record results of a given search link
    """
    # get 1st page
    base_link_page_1 = get_page(base_link)
    
    today = datetime.today()
    today = str(today).split(' ')[0]
    output_path = f'{search_alias}_{today}.csv'
    
    # record the 1st page
    r = record_dataset(get_room_classes(base_link_page_1), file_path=output_path, first_page=True)
    if printout:
        print(r)
    else:
        r

    # get urls for other pages 
    if n_pages=='auto':
        page_urls = make_page_urls(base_link, find_n_pages(base_link_page_1))
    else:
        page_urls = make_page_urls(base_link, n_pages)        

    for url in page_urls:
        r = record_dataset(get_room_classes(get_page(url)), file_path=output_path, first_page=False)
        if printout:
            print(r)
        else:
            r
        

        
def scrape_types(location='Las-Vegas--NV--United-States', location_alias='las_vegas', printout=False):
    base_link = f'https://www.airbnb.com/s/{location}/homes'
    print(f'starting {location.split("--")[0]} @ {base_link}')    

    entire_homes_link = f'https://www.airbnb.com/s/{location}/homes?room_types[]=Entire home'
    entire_home_super_hosts_link = f'https://www.airbnb.com/s/{location}/homes?room_types[]=Entire home&superhost=true'

    hotel_rooms_link = f'https://www.airbnb.com/s/{location}/homes?room_types[]=Hotel room'
    hotel_room_super_hosts_link = f'https://www.airbnb.com/s/{location}/homes?room_types[]=Hotel room&superhost=true'

    private_rooms_link = f'https://www.airbnb.com/s/{location}/homes?room_types[]=Private room'
    private_room_super_hosts_link = f'https://www.airbnb.com/s/{location}/homes?room_types[]=Shared room&superhost=true'

    shared_rooms_link = f'https://www.airbnb.com/s/{location}/homes?room_types[]=Private room'
    shared_room_super_hosts_link = f'https://www.airbnb.com/s/{location}/homes?room_types[]=Shared room&superhost=true'

    super_hosts_link = f'https://www.airbnb.com/s/{location}/homes?superhost=true'

    a = dask.delayed(scrape_search)(base_link, f'{location_alias}', printout=printout)

    b = dask.delayed(scrape_search)(entire_homes_link, f'{location_alias}_entire_homes', printout=printout) 
    c = dask.delayed(scrape_search)(entire_home_super_hosts_link, f'{location_alias}_entire_home_super_hosts', printout=printout)

    d = dask.delayed(scrape_search)(hotel_rooms_link, f'{location_alias}_hotel_rooms', printout=printout)
    e = dask.delayed(scrape_search)(hotel_room_super_hosts_link, f'{location_alias}_hotel_room_super_hosts', printout=printout)

    f = dask.delayed(scrape_search)(private_rooms_link, f'{location_alias}_private_rooms', printout=printout)
    g = dask.delayed(scrape_search)(private_room_super_hosts_link, f'{location_alias}_private_room_super_hosts', printout=printout)

    h = dask.delayed(scrape_search)(shared_rooms_link, f'{location_alias}_shared_rooms', printout=printout)
    i = dask.delayed(scrape_search)(shared_room_super_hosts_link, f'{location_alias}_shared_room_super_hosts', printout=printout)

    j = dask.delayed(scrape_search)(super_hosts_link, f'{location_alias}_super_hosts', printout=printout)

    compute(*[a, b, c, d, e, f, g, h, i, j])

    
# scrape_types(printout=True)

In [17]:
%%time
locations = ['Oakland--California--United-States',
             'San-Diego--California--United-States',
             'San-Francisco--California--United-States',
             'California--United-States',
             
             'Bentonville--Arkansas--United-States',
             'Bella-Vista--Arkansas--United-States',
             'Little-Rock--Arkansas--United-States',
             'Arkansas--United-States',
             
             'Austin--Texas--United-States',
             'Dallas--Texas--United-States',
             'Houston--Texas--United-States',
             'Texas--United-States',
             
             'Paradise--Nevada--United-States',
             'Henderson--Nevada--United-States',
             'Reno--Nevada--United-States',
             'Nevada--United-States',
             
             'Anchorage--Alaska--United-States',
             'North-Pole--Alaska--United-States',
             'Alaska--United-States']

location_aliases = ['oakland',
                    'san_diego',
                    'san_francisco',
                    'california',
                    
                    'bentonville',
                    'bella_vista',
                    'little_rock',
                    'arkansas',
                    
                    'austin',
                    'dallas',
                    'houston',
                    'texas',
                    
                    'paradise',
                    'henderson',
                    'reno',
                    'nevada',
                    
                    'anchorage',
                    'north_pole',
                    'alaska']

collection = []
for _ in range(len(locations)):
    scrape_types(locations[_], location_aliases[_])

starting Oakland @ https://www.airbnb.com/s/Oakland--California--United-States/homes
starting San-Diego @ https://www.airbnb.com/s/San-Diego--California--United-States/homes
starting San-Francisco @ https://www.airbnb.com/s/San-Francisco--California--United-States/homes
starting California @ https://www.airbnb.com/s/California--United-States/homes
starting Bentonville @ https://www.airbnb.com/s/Bentonville--Arkansas--United-States/homes
starting Bella-Vista @ https://www.airbnb.com/s/Bella-Vista--Arkansas--United-States/homes
starting Little-Rock @ https://www.airbnb.com/s/Little-Rock--Arkansas--United-States/homes
starting Arkansas @ https://www.airbnb.com/s/Arkansas--United-States/homes
starting Austin @ https://www.airbnb.com/s/Austin--Texas--United-States/homes
starting Dallas @ https://www.airbnb.com/s/Dallas--Texas--United-States/homes
starting Houston @ https://www.airbnb.com/s/Houston--Texas--United-States/homes
starting Texas @ https://www.airbnb.com/s/Texas--United-States/hom

In [21]:
%%time
scrape_types()

Wall time: 0 ns


Delayed('scrape_types-4b286f05-bc1c-414d-8470-0168fc500673')

In [22]:
%%time
scrape_types(location='San-Francisco--CA--United-States', location_alias='san_francisco')

Wall time: 1.01 ms


Delayed('scrape_types-eafb2fff-f957-4c30-ab54-6e8b45ca3dee')

In [23]:
pd.read_csv('las_vegas_2020-12-26.csv').a[0]

'https://airbnb.com/rooms/45827691'

In [24]:
pd.read_csv('las_vegas_2020-12-26.csv').a[0] == pd.read_csv('las_vegas_2020-12-27.csv').a[0]

True

In [25]:
import numpy as np

np.sum(pd.read_csv('las_vegas_2020-12-26.csv') == pd.read_csv('las_vegas_2020-12-27.csv'))

a     5
b     6
c    62
d    15
e     7
f    11
g     4
h     6
dtype: int64

In [26]:
pd.read_csv('las_vegas_hotel_rooms_2020-12-26.csv')

Unnamed: 0,a,b,c,d,e,f,g,h
0,https://airbnb.com/rooms/46596793,Artistic Boutique Hotel - King Bed - null - La...,Room in boutique hotel in Las Vegas,2 guests · 1 bedroom · 1 bed · 1 private bath,$61 / night,Wifi·Airconditioning·Pool·Freeparking,Rating 4.67 out of 5;4.676 reviews (6),6 reviews
1,https://airbnb.com/rooms/46815482,Unique Boutique Hotel 2 Bed - null - Las Vegas,Room in boutique hotel in Las Vegas,4 guests · 1 bedroom · 2 beds · 1 private bath,$58 / night,Wifi·Airconditioning·Pool·Freeparking,,
2,https://airbnb.com/rooms/28408857,Las Vegas Palms Luxury suite 25th Floor. - nul...,Hotel room in Las Vegas,4 guests · 1 bedroom · 2 beds · 1 bath,$130 / night,Wifi·Airconditioning·Pool·Kitchen,Rating 4.48 out of 5;4.4896 reviews (96),96 reviews
3,https://airbnb.com/rooms/40922200,Vegas Gateway - null - Las Vegas,Hotel room in Las Vegas,4 guests · Studio · 1 bed · 1 bath,$75 / night,Wifi·Pool·Kitchen·Freeparking,,
4,https://airbnb.com/rooms/45303440,Las Vegas Studio for Convention or Pleasure - ...,Resort room in Las Vegas,2 guests · Studio · 0 beds · 1 shared bath,$90 / night,Wifi·Airconditioning·Pool·Kitchen,,
...,...,...,...,...,...,...,...,...
295,https://airbnb.com/rooms/22001091,Las Vegas 1BR Suite at Desert Rose LAST MINUTE...,Resort room in Las Vegas,4 guests · 1 bedroom · 2 beds · 1 bath,$148 / night,Wifi·Airconditioning·Pool·Kitchen,Rating 4.73 out of 5;4.7311 reviews (11),11 reviews
296,https://airbnb.com/rooms/21517499,Las Vegas 1BR Suite at Desert Rose LAST MINUTE...,Resort room in Las Vegas,4 guests · 1 bedroom · 2 beds · 1 bath,$148 / night,Wifi·Airconditioning·Kitchen·Freeparking,Rating 4.87 out of 5;4.8745 reviews (45),45 reviews
297,https://airbnb.com/rooms/23826771,1BR Unit w/Pool Hot Tub by MGM/Tropicana & Cas...,Hotel room in Las Vegas,2 guests · 1 bedroom · 2 beds · 1 bath,$179 / night,Wifi·Airconditioning·Pool·Kitchen,Rating 4.48 out of 5;4.4821 reviews (21),21 reviews
298,https://airbnb.com/rooms/37991360,Las Vegas Resort #3 - null - Las Vegas,Hotel room in Las Vegas,6 guests · 2 bedrooms · 3 beds · 2 baths,$250 / night,Wifi·Airconditioning·Pool·Kitchen,,


In [27]:
pd.read_csv('las_vegas_hotel_room_super_hosts_2020-12-26.csv')

Unnamed: 0,a,b,c,d,e,f,g,h
0,https://airbnb.com/rooms/44266892,MGM Signature 32nd floor Penthouse With Balcon...,Hotel room in Las Vegas,4 guests · Studio · 2 beds · 1 private bath,$113 / night,Gym·Elevator·Wifi·Airconditioning,Rating 4.82 out of 5;4.8244 reviews (44),44 reviews
1,https://airbnb.com/rooms/6328454,Single bed inside 8 Bed Mixed Dormitory - null...,Hotel room in Las Vegas,1 guest · 1 bedroom · 8 beds · 1 bath,$35 / night,Wifi·Airconditioning·Kitchen·Dryer,Rating 4.76 out of 5;4.76299 reviews (299),299 reviews
2,https://airbnb.com/rooms/39000531,Awesome! VDARA FOUNTAIN VIEW SUITE 100% Smoke ...,Hotel room in Las Vegas,4 guests · 1 bedroom · 2 beds · 1 bath,$141 / night,Gym·Elevator·Wifi·Airconditioning,Rating 4.87 out of 5;4.8771 reviews (71),71 reviews
3,https://airbnb.com/rooms/32196105,"Luxury Suite at MGM Signature $75, No Resort F...",Entire apartment in Las Vegas,4 guests · Studio · 2 beds · 1.5 baths,$117 / night,Gym·Elevator·Wifi·Selfcheck-in,Rating 4.76 out of 5;4.7671 reviews (71),71 reviews
4,https://airbnb.com/rooms/13380271,Re-Opened! PalmsPlace Strip View Room w/ Balco...,Room in aparthotel in Las Vegas,4 guests · Studio · 0 beds · 1 private bath,$99 / night,Gym·Elevator·Wifi·Selfcheck-in,Rating 4.88 out of 5;4.88286 reviews (286),286 reviews
...,...,...,...,...,...,...,...,...
286,https://airbnb.com/rooms/7323491,Wyndham Grand Desert Resort * 2BR - null - Las...,Entire apartment in Las Vegas,8 guests · 2 bedrooms · 3 beds · 2 baths,$284 / night,Hottub·Pool·Wifi·Freeparking,Rating 4.86 out of 5;4.867 reviews (7),7 reviews
287,https://airbnb.com/rooms/31166153,Beautiful 2 Queen Room in the Heart of Freemon...,Hotel room in Las Vegas,4 guests · Studio · 2 beds · 1 private bath,$200 / night,Wifi·Freeparking,,
288,https://airbnb.com/rooms/33572395,1 BR suite on the strip right beside the Bella...,Hotel room in Las Vegas,4 guests · 1 bedroom · 2 beds · 1 bath,$248 / night,Hottub·Pool·Wifi·Freeparking,Rating 5.0 out of 5;5.05 reviews (5),5 reviews
289,https://airbnb.com/rooms/33777960,Jockey Club on the strip right beside The Bell...,Hotel room in Las Vegas,4 guests · 1 bedroom · 2 beds · 1 private bath,$214 / night,Hottub·Pool·Wifi·Freeparking,,


In [28]:
pd.read_csv('las_vegas_hotel_room_super_hosts_2020-12-27.csv')

Unnamed: 0,a,b,c,d,e,f,g,h
0,https://airbnb.com/rooms/44266892,MGM Signature 32nd floor Penthouse With Balcon...,Hotel room in Las Vegas,4 guests · Studio · 2 beds · 1 private bath,$114 / night,Hottub·Wifi·Pool,Rating 4.82 out of 5;4.8244 reviews (44),44 reviews
1,https://airbnb.com/rooms/6328454,Single bed inside 8 Bed Mixed Dormitory - null...,Hotel room in Las Vegas,1 guest · 1 bedroom · 8 beds · 1 bath,$37 / night,Wifi,Rating 4.76 out of 5;4.76299 reviews (299),299 reviews
2,https://airbnb.com/rooms/38288513,*Amazing* VDARA FOUNTAIN VIEW SUITE Smoke Free...,Hotel room in Las Vegas,4 guests · 1 bedroom · 2 beds · 1 bath,$132 / night,Hottub·Wifi·Pool,Rating 4.97 out of 5;4.9758 reviews (58),58 reviews
3,https://airbnb.com/rooms/39449362,Mgm Signature Balcony Penthouse. Epic strip vi...,Hotel room in Las Vegas,4 guests · Studio · 2 beds · 1 private bath,$143 / night,Hottub·Wifi·Pool,Rating 4.99 out of 5;4.9968 reviews (68),68 reviews
4,https://airbnb.com/rooms/17318669,Vdara Suite - 23rd Floor - null - Las Vegas,Hotel room in Las Vegas,4 guests · Studio · 2 beds · 1 bath,$157 / night,Hottub·Wifi·Pool,Rating 4.94 out of 5;4.94400 reviews (400),400 reviews
...,...,...,...,...,...,...,...,...
280,https://airbnb.com/rooms/7323491,Wyndham Grand Desert Resort * 2BR - null - Las...,Entire apartment in Las Vegas,8 guests · 2 bedrooms · 3 beds · 2 baths,$282 / night,Wifi·Airconditioning·Pool·Kitchen,Rating 4.86 out of 5;4.867 reviews (7),7 reviews
281,https://airbnb.com/rooms/40318814,Wyndham Tropicana at Las Vegas * 2 Bedroom - n...,Entire apartment in Las Vegas,6 guests · 2 bedrooms · 3 beds · 2 baths,$285 / night,Wifi·Airconditioning·Pool·Kitchen,,
282,https://airbnb.com/rooms/33572395,1 BR suite on the strip right beside the Bella...,Hotel room in Las Vegas,4 guests · 1 bedroom · 2 beds · 1 bath,$223 / night,Wifi·Airconditioning·Pool·Kitchen,Rating 5.0 out of 5;5.05 reviews (5),5 reviews
283,https://airbnb.com/rooms/33777960,Jockey Club on the strip right beside The Bell...,Hotel room in Las Vegas,4 guests · 1 bedroom · 2 beds · 1 private bath,$194 / night,Wifi·Airconditioning·Pool·Kitchen,,


In [29]:
pd.read_csv('las_vegas_shared_rooms_2020-12-26.csv')

Unnamed: 0,a,b,c,d,e,f,g,h
0,https://airbnb.com/rooms/45827691,Great Room in a House close to the strip - nul...,Private room in Las Vegas,2 guests · 1 bedroom · 1 bed · 1 shared bath,$21 / night,Pool·Wifi·Freeparking,Rating 4.80 out of 5;4.805 reviews (5),5 reviews
1,https://airbnb.com/rooms/21055421,Private suite with private bathroom. - null - ...,Private room in Las Vegas,1 guest · 1 bedroom · 1 bed · 1 private bath,$23 / night,Hottub·Pool·Wifi·Freeparking,,
2,https://airbnb.com/rooms/47237465,9min from the strip. 1min from the shopping ce...,Private room in Las Vegas,1 guest · 1 bedroom · 1 bed · 1.5 shared baths,$24 / night,Wifi,,
3,https://airbnb.com/rooms/44137284,Mini-Suite - null - Las Vegas,Private room in Las Vegas,4 guests · 2 bedrooms · 2 beds · 1 private bath,$30 / night,Hottub·Pool·Wifi·Freeparking,Rating 4.94 out of 5;4.9416 reviews (16),16 reviews
4,https://airbnb.com/rooms/41602679,Hospitality House 3 - null - Las Vegas,Private room in Las Vegas,1 guest · 1 bedroom · 1 bed · 1 shared bath,$24 / night,Wifi·Freeparking,Rating 4.54 out of 5;4.5413 reviews (13),13 reviews
...,...,...,...,...,...,...,...,...
295,https://airbnb.com/rooms/39567001,"Private bed & bath, Next 2 Strip, Parking, Cle...",Private room in Las Vegas,2 guests · 1 bedroom · 1 bed · 1 private bath,$57 / night,Hottub·Pool·Wifi·Freeparking,Rating 4.99 out of 5;4.9972 reviews (72),72 reviews
296,https://airbnb.com/rooms/38433961,Affordable room in Las Vegas - null - Las Vegas,Private room in Las Vegas,1 guest · 1 bedroom · 1 bed · 1 shared bath,$17 / night,Wifi,,
297,https://airbnb.com/rooms/47174700,Tuscany Casino & Hotel Strip Suite Stay - null...,Hotel room in Las Vegas,4 guests · 1 bedroom · 1 bed · 1 private bath,$69 / night,Pool·Wifi·Freeparking,,
298,https://airbnb.com/rooms/45773464,1-BR 10mins from Downtown Las Vegas - null - L...,Private room in Las Vegas,2 guests · Studio · 1 bed · 1 private bath,$55 / night,Wifi·Freeparking,Rating 5.0 out of 5;5.012 reviews (12),12 reviews


In [30]:
len(pd.read_csv('las_vegas_hotel_room_super_hosts_2020-12-26.csv')), len(pd.read_csv('las_vegas_hotel_room_super_hosts_2020-12-27.csv'))

(291, 285)

#### Time Comp

In [31]:
%%time 
las_vegas_link = 'https://www.airbnb.com/s/Las-Vegas--NV--United-States/homes'

# get 1st page
las_vegas_page_1 = get_page(las_vegas_link)

# record the 1st page
r = record_dataset(get_room_classes(las_vegas_page_1), first_page=True)
# print(r)
r

# get urls for other pages 
las_vegas_urls = make_page_urls(las_vegas_link, find_n_pages(las_vegas_page_1))

for url in las_vegas_urls:
    r = record_dataset(get_room_classes(get_page(url)), first_page=False)
#     print(r)
    r

Wall time: 17.5 s


In [32]:
%%time
las_vegas_link = 'https://www.airbnb.com/s/Las-Vegas--NV--United-States/homes'
seattle_link = 'https://www.airbnb.com/s/Seattle--WA--United-States/homes'

a = dask.delayed(scrape_search)(las_vegas_link, 'las_vegas')
b = dask.delayed(scrape_search)(seattle_link, 'seattle') 

z = compute(*[a, b])
z

Wall time: 21.9 s


(None, None)

In [33]:
%%time
las_vegas_link = 'https://www.airbnb.com/s/Las-Vegas--NV--United-States/homes'
seattle_link = 'https://www.airbnb.com/s/Seattle--WA--United-States/homes'
las_vegas_entire_homes_link = 'https://www.airbnb.com/s/Las-Vegas--NV--United-States/homes?room_types[]=Entire home'
las_vegas_hotel_rooms_link = 'https://www.airbnb.com/s/Las-Vegas--NV--United-States/homes?room_types[]=Hotel room'

a = dask.delayed(scrape_search)(las_vegas_link, 'las_vegas')
b = dask.delayed(scrape_search)(seattle_link, 'seattle') 
c = dask.delayed(scrape_search)(las_vegas_entire_homes_link, 'las_vegas_entire_homes') 
d = dask.delayed(scrape_search)(las_vegas_hotel_rooms_link, 'las_vegas_hotel_rooms')

z = compute(*[a, b, c, d])
z

Wall time: 20.8 s


(None, None, None, None)