In [16]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pymongo

In [17]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [18]:
# Define database and collection
db = client.craigslist_db
collection = db.items

In [19]:
# URL of page to be scraped
url = 'https://newjersey.craigslist.org/search/sss?sort=rel&query=guitar'

# Retrieve page with the requests module
response = requests.get(url)
# Create BeautifulSoup object; parse with 'lxml'
soup = BeautifulSoup(response.text, 'lxml')

In [20]:
# Examine the results, then determine element that contains sought info
# results are returned as an iterable list
results = soup.find_all('li', class_='result-row')

# Loop through returned results
for result in results:
    # Error handling
    try:
        # Identify and return title of listing
        title = result.find('a', class_='result-title').text
        # Identify and return price of listing
        price = result.a.span.text
        # Identify and return link to listing
        link = result.a['href']

        # Run only if title, price, and link are available
        if (title and price and link):
            # Print results
            print('-------------')
            print(title)
            print(price)
            print(link)

            # Dictionary to be inserted as a MongoDB document
            post = {
                'title': title,
                'price': price,
                'url': link
            }

            collection.insert_one(post)

    except Exception as e:
        print(e)

-------------
GUITAR HERO /w GUITAR Game Boy ADVANCE
$15
https://newjersey.craigslist.org/vgm/d/paterson-guitar-hero-guitar-game-boy/6862722046.html
-------------
ELVIRA GUITAR & Stereo component glass door rack  & Components
$13
https://newjersey.craigslist.org/for/d/totowa-elvira-guitar-stereo-component/6851992599.html
'NoneType' object has no attribute 'text'
'NoneType' object has no attribute 'text'
-------------
***** ELVIS GUITAR CD RACK BY ATLANTIC *****
$20
https://newjersey.craigslist.org/for/d/elvis-guitar-cd-rack-by-atlantic/6869865744.html
-------------
Vintage GIBSON S-1 Electric Guitar
$1200
https://newjersey.craigslist.org/msg/d/wayne-vintage-gibson-1-electric-guitar/6850196232.html
-------------
Vintage GIBSON S-1 Electric Guitar
$1200
https://newjersey.craigslist.org/msg/d/wayne-vintage-gibson-1-electric-guitar/6870826209.html
-------------
WASHBURN -GRAND AUDITORIUM ACOUSTIC/ELECTRIC GUITAR
$500
https://newjersey.craigslist.org/msg/d/little-falls-washburn-grand-audito

In [15]:
# Display items in MongoDB collection
listings = db.items.find()

for listing in listings:
    print(listing)

{'_id': ObjectId('5cbe7f7062c9f4eef926ca07'), 'title': 'GUITAR HERO /w GUITAR Game Boy ADVANCE', 'price': '$15', 'url': 'https://newjersey.craigslist.org/vgm/d/paterson-guitar-hero-guitar-game-boy/6862722046.html'}
{'_id': ObjectId('5cbe7f7062c9f4eef926ca08'), 'title': 'ELVIRA GUITAR & Stereo component glass door rack  & Components', 'price': '$13', 'url': 'https://newjersey.craigslist.org/for/d/totowa-elvira-guitar-stereo-component/6851992599.html'}
{'_id': ObjectId('5cbe7f7062c9f4eef926ca09'), 'title': '***** ELVIS GUITAR CD RACK BY ATLANTIC *****', 'price': '$20', 'url': 'https://newjersey.craigslist.org/for/d/elvis-guitar-cd-rack-by-atlantic/6869865744.html'}
{'_id': ObjectId('5cbe7f7062c9f4eef926ca0a'), 'title': 'Vintage GIBSON S-1 Electric Guitar', 'price': '$1200', 'url': 'https://newjersey.craigslist.org/msg/d/wayne-vintage-gibson-1-electric-guitar/6850196232.html'}
{'_id': ObjectId('5cbe7f7062c9f4eef926ca0b'), 'title': 'Vintage GIBSON S-1 Electric Guitar', 'price': '$1200', 'u