In [1]:
from splinter import Browser
from bs4 import BeautifulSoup as bs
from webdriver_manager.chrome import ChromeDriverManager
import numpy as np
import pandas as pd
from time import sleep
from random import randint

In [2]:
# Set up Splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
city = 'Chicago'

In [7]:
# Set url
url = 'https://chicago.craigslist.org/search/apa'
browser.visit(url)
sleep(3)

# Parse the HTML
html = browser.html
soup = bs(html, 'html.parser')

# find and extract relevant data
search = soup.find_all('div', class_='result-info')

# Create empty list to store scraped data
datatoget = []

# Loop through data found and extract relevant information
for (counter,link) in enumerate(search):

#     if counter == 15:
#         break
    # scrape dates
    dates = link.find_all('time', class_='result-date')[0].text.split('·')[0]

    # scrape titles
    titles = link.find_all('a', class_='result-title hdrlnk')[0].text

    # scrape links
    links = link.find_all('a', class_='result-title hdrlnk', href=True)[0]['href']

    try:
    # scrape prices
        prices = link.find_all('span', class_='result-price')[0].text
    except:
        prices = np.nan

    # scrape neighborhoods
    try:
        neighborhoods = link.find('span', class_='result-hood').text.strip()[1:-1]

    except:
        neighborhoods = np.nan
    
    # visit each link seperately
    browser.visit(links)
    
    # ensures that the server does not get overloaded with requests
    sleep(randint(2,5))  

    # after visiting link, extract bath and amenities data
    html = browser.html
    soup = bs(html, 'html.parser')
    baths = soup.find_all('p', class_='attrgroup')[0].text.split('\n')[1].split()[2][:-2]

    try:
        if soup.find_all('p', class_='attrgroup')[0].text.split('\n')[2][-3:] == 'ft2':
            sqft = soup.find_all('p', class_='attrgroup')[0].text.split('\n')[2][:-3]
        else:
            sqft = np.nan
    except:
        sqft = np.nan    
    
    
    # error handeling in case there are no bedrooms displayed
    try:
        bedrooms = soup.find_all('p', class_='attrgroup')[0].text.split('\n')[1].split('/')[0].strip().lower()

    except:
        bedrooms = np.nan
    
    amenities = soup.find_all('p', class_='attrgroup')[1].text.split('\n')
    # list comprehension used to make sure no empty strings get added to the list
    all_amenities = [am for am in amenities if am != '']
    
    # print statements to ensure that the correct data is scraped
    print(f'\nListing Number: {counter+1}')
    print(f'Square Footage: {sqft if sqft == sqft else "not available"}')
    print(f'Price: {prices if prices == prices else "not available"}')
    print(f'Title: {titles}')
    print(f'Date: {dates}')
    print(f'Number of Bedrooms: {bedrooms if bedrooms == bedrooms else "not available"}')
    print(f'Neighborhood: {neighborhoods if neighborhoods == neighborhoods else "not available"}')
    print(f'Number of Bathrooms: {baths if baths == baths else "not available"}')
    print(f'Link: {links}')
    print(f'Amenities: {all_amenities}')

    # create dicitonary for our DataFrame
    data = {'date': dates,
            'title': titles,
            'link': links,
            'price': prices,
            'bedroom': bedrooms,
            'sqft': sqft,
            'neighborhood': neighborhoods,
            'bathroom': baths,
            'amenities': all_amenities}
    
    # append the data
    datatoget.append(data)

browser.quit()      


Listing Number: 1
Square Footage: 600
Price: $675
Title: Brighton Park Studio Apartment for Rent
Date: Jan 10
Number of Bedrooms: 0br
Neighborhood: Brighton Park Chicago
Number of Bathrooms: 1
Link: https://chicago.craigslist.org/chc/apa/d/chicago-brighton-park-studio-apartment/7577197405.html
Amenities: ['application fee details: Application fee for background & credit check', 'apartment', 'no laundry on site', 'no smoking', 'street parking', 'rent period: monthly', 'wheelchair accessible']

Listing Number: 2
Square Footage: 1500
Price: $1,800
Title: South Oak Park Apartment
Date: Jan 10
Number of Bedrooms: 3br
Neighborhood: Oak Park
Number of Bathrooms: 1
Link: https://chicago.craigslist.org/nwc/apa/d/oak-park-south-oak-park-apartment/7577194028.html
Amenities: ['apartment', 'laundry in bldg', 'detached garage', 'rent period: monthly']

Listing Number: 3
Square Footage: 750
Price: $925
Title: 1 Bedroom House Near Griffith
Date: Jan 10
Number of Bedrooms: 1br
Neighborhood: Griffith/C


Listing Number: 20
Square Footage: not available
Price: $3,490
Title: 3 Bed/2Bath Duplex~ Central Heat/AC~ In Unit Washer/Dryer! Deck!
Date: Jan 10
Number of Bedrooms: 3br
Neighborhood: East Lakeview/Wrigleyville
Number of Bathrooms: 2
Link: https://chicago.craigslist.org/chc/apa/d/chicago-bed-2bath-duplex-central-heat/7577120108.html
Amenities: ['cats are OK - purrr', 'dogs are OK - wooof', 'apartment', 'w/d in unit', 'carport', 'rent period: monthly']

Listing Number: 21
Square Footage: not available
Price: $1,845
Title: Gorgeous Convertible 1-Bed Logan Square! Great Location, New Building
Date: Jan 10
Number of Bedrooms: 1br
Neighborhood: Logan Square
Number of Bathrooms: 1
Link: https://chicago.craigslist.org/chc/apa/d/chicago-gorgeous-convertible-bed-logan/7577119715.html
Amenities: ['cats are OK - purrr', 'dogs are OK - wooof', 'apartment', 'w/d in unit', 'attached garage', 'rent period: monthly']

Listing Number: 22
Square Footage: not available
Price: $2,750
Title: 3BD 1BA Apa


Listing Number: 39
Square Footage: not available
Price: $3,600
Title: 3 Bed 2 Bath Duplex In Southport Corridor
Date: Jan 10
Number of Bedrooms: 3br
Neighborhood: Lakeview
Number of Bathrooms: 2
Link: https://chicago.craigslist.org/chc/apa/d/chicago-bed-bath-duplex-in-southport/7577080238.html
Amenities: ['cats are OK - purrr', 'dogs are OK - wooof', 'apartment', 'w/d in unit', 'off-street parking', 'rent period: monthly']

Listing Number: 40
Square Footage: not available
Price: $1,670
Title: Unique update Lakeview 1 bedroom
Date: Jan 10
Number of Bedrooms: 1br
Neighborhood: Lakeview
Number of Bathrooms: 1
Link: https://chicago.craigslist.org/chc/apa/d/chicago-unique-update-lakeview-bedroom/7577076983.html
Amenities: ['air conditioning', 'cats are OK - purrr', 'dogs are OK - wooof', 'apartment', 'w/d in unit', 'street parking', 'rent period: monthly']

Listing Number: 41
Square Footage: not available
Price: $2,597
Title: Simply Beautiful! Central Lakeview Location!
Date: Jan 10
Number


Listing Number: 58
Square Footage: not available
Price: $2,188
Title: River North 1 bedroom with balcony
Date: Jan 10
Number of Bedrooms: 1br
Neighborhood: River North
Number of Bathrooms: 1
Link: https://chicago.craigslist.org/chc/apa/d/chicago-river-north-bedroom-with-balcony/7577049835.html
Amenities: ['cats are OK - purrr', 'dogs are OK - wooof', 'apartment', 'w/d in unit', 'attached garage', 'rent period: monthly']

Listing Number: 59
Square Footage: not available
Price: $1,800
Title: River North Studio, washer/dryer, 24hr security, excellent walkability
Date: Jan 10
Number of Bedrooms: 0br
Neighborhood: River North
Number of Bathrooms: 1
Link: https://chicago.craigslist.org/chc/apa/d/chicago-river-north-studio-washer-dryer/7577049606.html
Amenities: ['cats are OK - purrr', 'dogs are OK - wooof', 'apartment', 'w/d in unit', 'attached garage', 'rent period: monthly']

Listing Number: 60
Square Footage: not available
Price: $1,900
Title: 4 beds available in a nice place
Date: Jan 1


Listing Number: 77
Square Footage: not available
Price: $2,025
Title: Medical District - UIC ! Brand New 1-Bedroom. Gym, Pool and Rooftop.
Date: Jan 10
Number of Bedrooms: 1br
Neighborhood: Medical District
Number of Bathrooms: 1
Link: https://chicago.craigslist.org/chc/apa/d/chicago-medical-district-uic-brand-new/7577029220.html
Amenities: ['cats are OK - purrr', 'dogs are OK - wooof', 'apartment', 'w/d in unit', 'street parking', 'rent period: monthly']

Listing Number: 78
Square Footage: not available
Price: $1,550
Title: Beautiful  Two Bedroom Apartments   @ Garden House Apartments
Date: Jan 10
Number of Bedrooms: 2br
Neighborhood: Skokie
Number of Bathrooms: 1
Link: https://chicago.craigslist.org/nch/apa/d/skokie-beautiful-two-bedroom-apartments/7577028320.html
Amenities: ['apartment', 'laundry on site', 'off-street parking', 'rent period: monthly']

Listing Number: 79
Square Footage: not available
Price: $1,350
Title: Beautiful One   Bedroom Apts @10737 S Keating
Date: Jan 10
Nu


Listing Number: 97
Square Footage: not available
Price: $1,100
Title: Location! Heat Included! Large Floorplan! Outdoor Space Ready Now!
Date: Jan 10
Number of Bedrooms: 1br
Neighborhood: South Old Irving Park/Kilbourn Park
Number of Bathrooms: 1
Link: https://chicago.craigslist.org/chc/apa/d/chicago-location-heat-included-large/7576988287.html
Amenities: ['cats are OK - purrr', 'dogs are OK - wooof', 'apartment', 'laundry on site', 'street parking', 'rent period: monthly']

Listing Number: 98
Square Footage: not available
Price: $1,400
Title: Pilsen Bright and Spacious 2bed on Top Floor! - In Unit Laundry
Date: Jan 10
Number of Bedrooms: 2br
Neighborhood: Pilsen
Number of Bathrooms: 1
Link: https://chicago.craigslist.org/chc/apa/d/chicago-pilsen-bright-and-spacious-2bed/7576984487.html
Amenities: ['cats are OK - purrr', 'apartment', 'laundry in bldg', 'no smoking', 'street parking', 'rent period: monthly']

Listing Number: 99
Square Footage: not available
Price: $1,275
Title: LOGAN S


Listing Number: 117
Square Footage: not available
Price: $3,300
Title: LOYOLA BEACH  4 BED 2 BATH  TOP FLOOR
Date: Jan 10
Number of Bedrooms: 4br
Neighborhood: Rogers Park
Number of Bathrooms: 2
Link: https://chicago.craigslist.org/chc/apa/d/chicago-loyola-beach-bed-bath-top-floor/7576967700.html
Amenities: ['cats are OK - purrr', 'dogs are OK - wooof', 'apartment', 'w/d in unit', 'attached garage', 'rent period: monthly']

Listing Number: 118
Square Footage: 550
Price: $1,395
Title: Rare Lincoln Square Jr One bedroom!
Date: Jan 10
Number of Bedrooms: 1br
Neighborhood: Lincoln Square
Number of Bathrooms: 1
Link: https://chicago.craigslist.org/chc/apa/d/chicago-rare-lincoln-square-jr-one/7576966561.html
Amenities: ['cats are OK - purrr', 'apartment', 'laundry in bldg', 'street parking', 'rent period: monthly']

Listing Number: 119
Square Footage: not available
Price: $1,460
Title: Spacious Apartments -One, Two, Three -Itasca, Il
Date: Jan 10
Number of Bedrooms: 2br
Neighborhood: Coloni

In [8]:
datatoget

[{'date': 'Jan 10',
  'title': 'Brighton Park Studio Apartment for Rent',
  'link': 'https://chicago.craigslist.org/chc/apa/d/chicago-brighton-park-studio-apartment/7577197405.html',
  'price': '$675',
  'bedroom': '0br',
  'sqft': '600',
  'neighborhood': 'Brighton Park Chicago',
  'bathroom': '1',
  'amenities': ['application fee details: Application fee for background & credit check',
   'apartment',
   'no laundry on site',
   'no smoking',
   'street parking',
   'rent period: monthly',
   'wheelchair accessible']},
 {'date': 'Jan 10',
  'title': 'South Oak Park Apartment',
  'link': 'https://chicago.craigslist.org/nwc/apa/d/oak-park-south-oak-park-apartment/7577194028.html',
  'price': '$1,800',
  'bedroom': '3br',
  'sqft': '1500',
  'neighborhood': 'Oak Park',
  'bathroom': '1',
  'amenities': ['apartment',
   'laundry in bldg',
   'detached garage',
   'rent period: monthly']},
 {'date': 'Jan 10',
  'title': '1 Bedroom House Near Griffith',
  'link': 'https://chicago.craigslis

In [9]:
# creating DataFrame
df = pd.DataFrame(datatoget)
df

Unnamed: 0,date,title,link,price,bedroom,sqft,neighborhood,bathroom,amenities
0,Jan 10,Brighton Park Studio Apartment for Rent,https://chicago.craigslist.org/chc/apa/d/chica...,$675,0br,600,Brighton Park Chicago,1,[application fee details: Application fee for ...
1,Jan 10,South Oak Park Apartment,https://chicago.craigslist.org/nwc/apa/d/oak-p...,"$1,800",3br,1500,Oak Park,1,"[apartment, laundry in bldg, detached garage, ..."
2,Jan 10,1 Bedroom House Near Griffith,https://chicago.craigslist.org/nwi/apa/d/gary-...,$925,1br,750,Griffith/Calumet Township,1,"[air conditioning, house, w/d hookups, off-str..."
3,Jan 10,"GORGEOUS 1 Bedroom in West Loop! W/D in Unit, ...",https://chicago.craigslist.org/chc/apa/d/chica...,"$2,260",1br,,West Loop,1,"[cats are OK - purrr, dogs are OK - wooof, apa..."
4,Jan 10,All new renovation! Ample natural light!,https://chicago.craigslist.org/chc/apa/d/chica...,"$1,050",0br,500,Lakeview,1,"[air conditioning, cats are OK - purrr, apartm..."
...,...,...,...,...,...,...,...,...,...
115,Jan 10,Location! Heat Included! Hardwood Floors! Smal...,https://chicago.craigslist.org/chc/apa/d/chica...,"$1,000",1br,,Mayfair,1,"[cats are OK - purrr, dogs are OK - wooof, apa..."
116,Jan 10,LOYOLA BEACH 4 BED 2 BATH TOP FLOOR,https://chicago.craigslist.org/chc/apa/d/chica...,"$3,300",4br,,Rogers Park,2,"[cats are OK - purrr, dogs are OK - wooof, apa..."
117,Jan 10,Rare Lincoln Square Jr One bedroom!,https://chicago.craigslist.org/chc/apa/d/chica...,"$1,395",1br,550,Lincoln Square,1,"[cats are OK - purrr, apartment, laundry in bl..."
118,Jan 10,"Spacious Apartments -One, Two, Three -Itasca, Il",https://chicago.craigslist.org/nwc/apa/d/medin...,"$1,460",2br,,Colonial Village Apartments -1640 Norwood Ave,1,"[cats are OK - purrr, dogs are OK - wooof, apa..."


In [11]:
# Exporting data to csv
df.to_csv(f"{city}_data_raw.csv", index=False)