In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pymongo

In [2]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [3]:
# Define database and collection
db = client.nhl_db
collection = db.articles

In [4]:
# URL of page to be scraped
url = 'https://www.nhl.com/news'

# Retrieve page with the requests module
response = requests.get(url)
# Create BeautifulSoup object; parse with 'lxml'
soup = BeautifulSoup(response.text, 'lxml')

In [5]:
# Retrieve the parent divs for all articles
results = soup.find_all('div', class_='article-item__top')

# loop over results to get article data
for result in results:
    # scrape the article header 
    header = result.find('h1', class_='article-item__headline').text
    
    # scrape the article subheader
    subheader = result.find('h2', class_='article-item__subheader').text
    
    # scrape the datetime
    datetime = result.find('span', class_='article-item__date')['data-date'] 
    
    # get only the date from the datetime
    date = datetime.split('T')[0]
    
    # print article data
    print('-----------------')
    print(header)
    print(subheader)
    print(date)

    # Dictionary to be inserted into MongoDB
    post = {
        'header': header,
        'subheader': subheader,
        'date': date,
    }

    # Insert dictionary into MongoDB as a document
    collection.insert_one(post)

-----------------
Draisaitl scores four goals in Oilers win against Predators
Forward, McDavid each has five points to help Edmonton gain in Pacific
2020-03-02
-----------------
Avalanche defeat Red Wings for seventh straight win
Move within one point of first in Central Division; Makar misses game with upper-body injury
2020-03-02
-----------------
Emergency goalie procedure doesn't need change, GMs say
Topic discussed at annual meetings after Ayres plays for Hurricanes
2020-03-02
-----------------
Rielly returns to practice with Maple Leafs, could play next week
Defenseman has missed 20 games with broken foot
2020-03-02
-----------------
Fantasy forward top 100 rankings for 2019-20
Fiala biggest riser from surprising Wild; Foligno, Buchnevich, Thomas join list
2020-03-02
-----------------
Penguins search for answers, look for ways to end six-game losing streak
Try to have proper mindset heading into game against Senators
2020-03-02
-----------------
Panthers must fix issues before se

In [6]:
# Display the MongoDB records created above
articles = db.articles.find()
for article in articles:
    print(article)

{'_id': ObjectId('5e5ddda1b1b0e0523f1b194c'), 'header': 'Draisaitl scores four goals in Oilers win against Predators', 'subheader': 'Forward, McDavid each has five points to help Edmonton gain in Pacific', 'date': '2020-03-02'}
{'_id': ObjectId('5e5ddda1b1b0e0523f1b194d'), 'header': 'Avalanche defeat Red Wings for seventh straight win', 'subheader': 'Move within one point of first in Central Division; Makar misses game with upper-body injury', 'date': '2020-03-02'}
{'_id': ObjectId('5e5ddda1b1b0e0523f1b194e'), 'header': "Emergency goalie procedure doesn't need change, GMs say", 'subheader': 'Topic discussed at annual meetings after Ayres plays for Hurricanes', 'date': '2020-03-02'}
{'_id': ObjectId('5e5ddda1b1b0e0523f1b194f'), 'header': 'Rielly returns to practice with Maple Leafs, could play next week', 'subheader': 'Defenseman has missed 20 games with broken foot', 'date': '2020-03-02'}
{'_id': ObjectId('5e5ddda1b1b0e0523f1b1950'), 'header': 'Fantasy forward top 100 rankings for 2019