In [1]:
from splinter import Browser
from bs4 import BeautifulSoup as bs
from webdriver_manager.chrome import ChromeDriverManager
import numpy as np
import pandas as pd
from time import sleep
from random import randint

In [2]:
# Set up Splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

[WDM] - Downloading: 100%|████████████████████████████████████████████████████████| 6.58M/6.58M [00:00<00:00, 40.8MB/s]


In [29]:
# Set url
url = 'https://boston.craigslist.org/search/apa#search=1~thumb~0~0'
browser.visit(url)

# Parse the HTML
html = browser.html
soup = bs(html, 'html.parser')

# find and extract relevant data
search = soup.find_all('div', class_='result-info')

# Create empty list to store scraped data
datatoget = []

# Loop through data found and extract relevant information
for (counter,link) in enumerate(search):

    # scrape dates
    dates = link.find_all('div', class_='meta')[0].text.split()[0][:3]

    # scrape titles
    titles = link.find_all('a', class_='titlestring')[0].text

    # scrape links
    links = link.find_all('a', class_='titlestring', href=True)[0]['href']

    # scrape prices
    prices = link.find_all('span', class_='priceinfo')[0].text

    # scrape neighborhoods
    neighborhoods = link.find('div', class_='supertitle').text

    # error handeling in case there are no bedrooms displayed
    try:
        bedrooms = link.find('span', class_='post-bedrooms').text

    except:
        bedrooms = np.nan

    # error handeling in case there are no sqft displayed
    try:
        sqft = link.find('span', class_='post-sqft').text.split()[0][:-3]
    except:
        sqft = np.nan
    
    # ensures that the server does not get overloaded with requests
    sleep(randint(1,5))  
    
    # visit each link seperately
    browser.visit(links)

    # after visiting link, extract bath and amenities data
    html = browser.html
    soup = bs(html, 'html.parser')
    baths = soup.find_all('p', class_='attrgroup')[0].text.split('\n')[1].split()[2][:-2]

    
    amenities = soup.find_all('p', class_='attrgroup')[1].text.split('\n')
    # list comprehension used to make sure no empty strings get added to the list
    all_amenities = [am for am in amenities if len(am) >= 1]
    
    # print statements to ensure that the correct data is scraped
    print(f'Listing Number: {counter+1}')
    print(f'Square Footage': {sqft}')
    print(f'price {prices}')
    print(f'Title {titles}')
    print(f'Date: {dates}')
    print(f'Number of Bedrooms {bedrooms})
    print(f'Neighborhood: {neighborhoods}')
    print(f'Number of Bathrooms: {baths}')
    print(f'Link: {links}')
    print(f'Amenities: {all_amenities}')

    # create dicitonary for our DataFrame
    data = {'date': dates,
            'title': titles,
            'link': links,
            'price': prices,
            'bedroom': bedrooms,
            'sqft': sqft,
            'neighborhood': neighborhoods,
            'bathroom': baths,
            'amenities': all_amenities}
    
    # append the data
    datatoget.append(data)
    
# Quit automated browser instance
browser.quit()       

Listing Number: 0
900
$2,300
Five minute walk to Quincy Center
1/9
2br
Quincy
1
https://boston.craigslist.org/sob/apa/d/quincy-five-minute-walk-to-quincy-center/7576745505.html
['air conditioning', 'apartment', 'laundry in bldg', 'no smoking', 'off-street parking', 'rent period: monthly']
Listing Number: 1
nan
$4,895
~Stunning Student Friendly 3 Bed 1.5 Bath~  w/ In Unit Laundry!
1/9
3br
Brighton
1.5
https://boston.craigslist.org/gbs/apa/d/brighton-stunning-student-friendly-bed/7576744301.html
['cats are OK - purrr', 'dogs are OK - wooof', 'apartment', 'w/d in unit', 'street parking', 'rent period: monthly']
Listing Number: 2
nan
$1,850
SEPTEMBER - real live kitchen and private bathroom!
1/9
nan
boston/cambridge/brookline
1
https://boston.craigslist.org/gbs/apa/d/boston-september-real-live-kitchen-and/7576737233.html
['apartment', 'laundry in bldg', 'no parking', 'rent period: monthly']
Listing Number: 3
325
$1,275
1 bedroom cottage
1/9
1br
metro west
1
https://boston.craigslist.org/bm

Listing Number: 28
450
$1,150
1 Bedroom-Cheerful apartment on 2nd floor in 4 unit building
1/9
1br
NORWOOD, MA
1
https://boston.craigslist.org/sob/apa/d/norwood-bedroom-cheerful-apartment-on/7576667868.html
['apartment', 'no laundry on site', 'no smoking', 'off-street parking', 'rent period: monthly']
Listing Number: 29
681
$2,152
ENCHANTING 1BED, W/D, GYM, POOL, WALK TO TRAIN
1/9
1br
North of Boston Walk to Subway
1
https://boston.craigslist.org/gbs/apa/d/malden-enchanting-1bed-d-gym-pool-walk/7576666522.html
['broker fee details: $500', 'cats are OK - purrr', 'dogs are OK - wooof', 'apartment', 'w/d in unit', 'no smoking', 'off-street parking', 'rent period: monthly']
Listing Number: 30
700
$1,824
REVERE DEAD END ROAD Near shopping/pub transp/hghwys/Logan
1/9
1br
Revere
1
https://boston.craigslist.org/nos/apa/d/revere-revere-dead-end-road-near/7576665417.html
['apartment', 'laundry in bldg', 'off-street parking', 'rent period: monthly']
Listing Number: 31
nan
$2,250
2 Bedroom Apartme

Listing Number: 53
nan
$1,395
January move-in - room in apt building
1/9
1br
Cambridge
shared
https://boston.craigslist.org/gbs/apa/d/cambridge-january-move-in-room-in-apt/7576602540.html
['apartment', 'no laundry on site', 'no smoking', 'street parking', 'rent period: monthly']
Listing Number: 54
600
$2,650
North End Gem - VERY NICE OVERSIZED 1 BEDROOM - Central Air, Laundry
1/9
1br
NORTH END ~ March 1st
1
https://boston.craigslist.org/gbs/apa/d/charlestown-north-end-gem-very-nice/7576598637.html
['apartment', 'laundry in bldg', 'no parking', 'rent period: monthly']
Listing Number: 55
450
$2,400
North End Junior 1 Bed - Roof Deck, Central Air, Laundry - HAS IT ALL!
1/9
nan
NORTH END ~ March 1st
1
https://boston.craigslist.org/gbs/apa/d/boston-north-end-junior-bed-roof-deck/7576597152.html
['apartment', 'laundry in bldg', 'no parking', 'rent period: monthly']
Listing Number: 56
nan
$1,000
NEWLY COMPLETED LUXURY PRIVATE ROOM AVAILABLE- NO BROKERS FEE
1/9
nan
Lynn, MA
9+
https://boston.c

Listing Number: 79
nan
$1,550
Hull Quiet Side-Street One Block To Beach And Bus
1/9
1br
south shore
1
https://boston.craigslist.org/sob/apa/d/hull-hull-quiet-side-street-one-block/7576546605.html
['cats are OK - purrr', 'dogs are OK - wooof', 'apartment', 'laundry on site', 'off-street parking', 'rent period: monthly']
Listing Number: 80
nan
$6,500
Amazing 3br in Fenway!!
1/9
3br
Fenway
2
https://boston.craigslist.org/gbs/apa/d/boston-amazing-3br-in-fenway/7576545697.html
['cats are OK - purrr', 'dogs are OK - wooof', 'apartment', 'w/d in unit', 'off-street parking', 'rent period: monthly']
Listing Number: 81
nan
$4,700
LUXURY Renovated Beacon Street 3Bed/2Bath, GARAGE PKNG, A/C, May 15th!
1/9
3br
Beacon Street/Brookline/Washington Square
2
https://boston.craigslist.org/gbs/apa/d/brookline-village-luxury-renovated/7576540925.html
["broker fee details: One Month's Rent", 'condo', 'laundry in bldg', 'listed by: Heidi Shenker Real Estate', 'attached garage', 'rent period: monthly']
Listin

Listing Number: 105
860
$3,971
2BR UNIT CLOSE TO SULLIVAN STATION! WITH W&D IN UNIT! GREAT DEAL!
1/9
2br
Charlestown
2
https://boston.craigslist.org/gbs/apa/d/charlestown-2br-unit-close-to-sullivan/7576483654.html
['air conditioning', 'broker fee details: 0', 'cats are OK - purrr', 'dogs are OK - wooof', 'apartment', 'w/d in unit', 'listed by: Hartley Realty Group', 'no smoking', 'attached garage', 'rent period: monthly', 'wheelchair accessible']
Listing Number: 106
585
$3,199
1BR UNIT CLOSE TO SULLIVAN STATION! WITH W&D IN UNIT! GREAT DEAL!
1/9
1br
Charlestown
1
https://boston.craigslist.org/gbs/apa/d/charlestown-1br-unit-close-to-sullivan/7576483507.html
['air conditioning', 'broker fee details: 0', 'cats are OK - purrr', 'dogs are OK - wooof', 'apartment', 'w/d in unit', 'listed by: Hartley Realty Group', 'no smoking', 'attached garage', 'rent period: monthly', 'wheelchair accessible']
Listing Number: 107
756
$3,174
1BR UNIT CLOSE TO SULLIVAN STATION! WITH W&D IN UNIT! GREAT DEAL!
1

In [30]:
datatoget

[{'date': '1/9',
  'title': 'Five minute walk to Quincy Center',
  'link': 'https://boston.craigslist.org/sob/apa/d/quincy-five-minute-walk-to-quincy-center/7576745505.html',
  'price': '$2,300',
  'bedroom': '2br',
  'sqft': '900',
  'neighborhood': 'Quincy',
  'bathroom': '1',
  'amenities': ['air conditioning',
   'apartment',
   'laundry in bldg',
   'no smoking',
   'off-street parking',
   'rent period: monthly']},
 {'date': '1/9',
  'title': '~Stunning Student Friendly 3 Bed 1.5 Bath~  w/ In Unit Laundry!',
  'link': 'https://boston.craigslist.org/gbs/apa/d/brighton-stunning-student-friendly-bed/7576744301.html',
  'price': '$4,895',
  'bedroom': '3br',
  'sqft': nan,
  'neighborhood': 'Brighton',
  'bathroom': '1.5',
  'amenities': ['cats are OK - purrr',
   'dogs are OK - wooof',
   'apartment',
   'w/d in unit',
   'street parking',
   'rent period: monthly']},
 {'date': '1/9',
  'title': 'SEPTEMBER - real live kitchen and private bathroom!',
  'link': 'https://boston.craigsl

In [31]:
# creating DataFrame
df = pd.DataFrame(datatoget)
df

Unnamed: 0,date,title,link,price,bedroom,sqft,neighborhood,bathroom,amenities
0,1/9,Five minute walk to Quincy Center,https://boston.craigslist.org/sob/apa/d/quincy...,"$2,300",2br,900,Quincy,1,"[air conditioning, apartment, laundry in bldg,..."
1,1/9,~Stunning Student Friendly 3 Bed 1.5 Bath~ w/...,https://boston.craigslist.org/gbs/apa/d/bright...,"$4,895",3br,,Brighton,1.5,"[cats are OK - purrr, dogs are OK - wooof, apa..."
2,1/9,SEPTEMBER - real live kitchen and private bath...,https://boston.craigslist.org/gbs/apa/d/boston...,"$1,850",,,boston/cambridge/brookline,1,"[apartment, laundry in bldg, no parking, rent ..."
3,1/9,1 bedroom cottage,https://boston.craigslist.org/bmw/apa/d/natick...,"$1,275",1br,325,metro west,1,"[cottage/cabin, no laundry on site, off-street..."
4,1/9,filled with bright natural light - SEPTEMBER,https://boston.craigslist.org/gbs/apa/d/boston...,"$1,850",,,boston/cambridge/brookline,1,"[apartment, laundry in bldg, no parking, rent ..."
...,...,...,...,...,...,...,...,...,...
115,1/8,Apt for rent,https://boston.craigslist.org/gbs/apa/d/roxbur...,"$3,500",4br,,"Roxbury, Mass",1.5,"[cats are OK - purrr, apartment, w/d hookups, ..."
116,1/8,2 BED 2 BATH || HEAT/HW/PRKG || LARGE CLOSETS ...,https://boston.craigslist.org/gbs/apa/d/newton...,"$2,595",2br,,NEWTON,2,"[broker fee details: 2595, apartment, laundry ..."
117,1/8,SUN DRENCHED 2 BED *** HEAT/HW/PRKG *** WALK ...,https://boston.craigslist.org/gbs/apa/d/newton...,"$2,495",2br,,NEWTON,1,"[broker fee details: 2495, apartment, laundry ..."
118,1/8,SUNNY 1 BED ** HEAT/HW/PRKG ** NO LAST + NO SE...,https://boston.craigslist.org/gbs/apa/d/auburn...,"$2,095",1br,,NEWTON,1,"[broker fee details: 2095, apartment, laundry ..."


In [33]:
# Exporting data to csv
df.to_csv('boston_data_raw.csv', index=False)