In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pymongo

In [2]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [3]:
# Define database and collection
db = client.commerce_db
collection = db.items

In [7]:
# URL of page to be scraped
url = 'https://webscraper.io/test-sites/e-commerce/allinone/computers/laptops'

# Retrieve page with the requests module
response = requests.get(url)
# Create BeautifulSoup object; parse with 'lxml'
soup = BeautifulSoup(response.text, 'lxml')
soup

<!DOCTYPE html>
<html lang="en">
<head>
<!-- Anti-flicker snippet (recommended)  -->
<style>.async-hide {
		opacity: 0 !important
	} </style>
<script>(function (a, s, y, n, c, h, i, d, e) {
		s.className += ' ' + y;
		h.start = 1 * new Date;
		h.end = i = function () {
			s.className = s.className.replace(RegExp(' ?' + y), '')
		};
		(a[n] = a[n] || []).hide = h;
		setTimeout(function () {
			i();
			h.end = null
		}, c);
		h.timeout = c;
	})(window, document.documentElement, 'async-hide', 'dataLayer', 4000,
		{'GTM-NVFPDWB': true});</script>
<!-- Google Tag Manager -->
<script>(function (w, d, s, l, i) {
		w[l] = w[l] || [];
		w[l].push({
			'gtm.start':
				new Date().getTime(), event: 'gtm.js'
		});
		var f = d.getElementsByTagName(s)[0],
			j = d.createElement(s), dl = l != 'dataLayer' ? '&l=' + l : '';
		j.async = true;
		j.src =
			'https://www.googletagmanager.com/gtm.js?id=' + i + dl;
		f.parentNode.insertBefore(j, f);
	})(window, document, 'script', 'dataLayer', 'GTM-NVFPDWB')

In [5]:
# Examine the results, then determine element that contains sought info
# results are returned as an iterable list
results = soup.find_all('div', class_='caption')

# Loop through returned results
for result in results:
    # Error handling
    try:
        # Identify and return title of listing
        title = result.find('a', class_='title').text
        # Identify and return price of listing
        price = result.find('h4', class_='price').text
        # Identify and return link to listing
        link = result.a['href']

        # Run only if title, price, and link are available
        if (title and price and link):
            # Print results
            print('-------------')
            print(title)
            print(price)
            print(link)

            # Dictionary to be inserted as a MongoDB document
            post = {
                'title': title,
                'price': price,
                'url': link
            }

            collection.insert_one(post)

    except Exception as e:
        print(e)

-------------
Asus VivoBook X4...
$295.99
/test-sites/e-commerce/allinone/product/545
-------------
Prestigio SmartB...
$299.00
/test-sites/e-commerce/allinone/product/546
-------------
Prestigio SmartB...
$299.00
/test-sites/e-commerce/allinone/product/547
-------------
Aspire E1-510
$306.99
/test-sites/e-commerce/allinone/product/517
-------------
Lenovo V110-15IA...
$321.94
/test-sites/e-commerce/allinone/product/548
-------------
Lenovo V110-15IA...
$356.49
/test-sites/e-commerce/allinone/product/549
-------------
Hewlett Packard...
$364.46
/test-sites/e-commerce/allinone/product/550
-------------
Acer Aspire 3 A3...
$372.70
/test-sites/e-commerce/allinone/product/551
-------------
Acer Aspire A315...
$379.94
/test-sites/e-commerce/allinone/product/552
-------------
Acer Aspire ES1-...
$379.95
/test-sites/e-commerce/allinone/product/553
-------------
Acer Aspire 3 A3...
$391.48
/test-sites/e-commerce/allinone/product/554
-------------
Acer Aspire 3 A3...
$393.88
/test-sites/e-comme

In [6]:
# Display items in MongoDB collection
listings = db.items.find()

for listing in listings:
    print(listing)

{'_id': ObjectId('6090846d1f7e0efe5bdf01ff'), 'title': 'Asus VivoBook X4...', 'price': '$295.99', 'url': '/test-sites/e-commerce/allinone/product/545'}
{'_id': ObjectId('6090846d1f7e0efe5bdf0200'), 'title': 'Prestigio SmartB...', 'price': '$299.00', 'url': '/test-sites/e-commerce/allinone/product/546'}
{'_id': ObjectId('6090846d1f7e0efe5bdf0201'), 'title': 'Prestigio SmartB...', 'price': '$299.00', 'url': '/test-sites/e-commerce/allinone/product/547'}
{'_id': ObjectId('6090846d1f7e0efe5bdf0202'), 'title': 'Aspire E1-510', 'price': '$306.99', 'url': '/test-sites/e-commerce/allinone/product/517'}
{'_id': ObjectId('6090846d1f7e0efe5bdf0203'), 'title': 'Lenovo V110-15IA...', 'price': '$321.94', 'url': '/test-sites/e-commerce/allinone/product/548'}
{'_id': ObjectId('6090846d1f7e0efe5bdf0204'), 'title': 'Lenovo V110-15IA...', 'price': '$356.49', 'url': '/test-sites/e-commerce/allinone/product/549'}
{'_id': ObjectId('6090846d1f7e0efe5bdf0205'), 'title': 'Hewlett Packard...', 'price': '$364.46