In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pymongo

In [2]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [3]:
# Define database and collection
db = client.nhl_db
collection = db.articles

In [4]:
# URL of page to be scraped
url = 'https://www.nhl.com/news'

# Retrieve page with the requests module
response = requests.get(url)
# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(response.text, 'html.parser')

In [5]:
# Retrieve the parent divs for all articles
results = soup.find_all('div', class_='article-item__top')

# loop over results to get article data
for result in results:
    # scrape the article header 
    header = result.find('h1', class_='article-item__headline').text
    
    # scrape the article subheader
    subheader = result.find('h2', class_='article-item__subheader').text
    
    # scrape the datetime
    datetime = result.find('span', class_='article-item__date')["data-date"] 
    
    # get only the date from the datetime
    date = datetime.split('T')[0]
    
    # print article data
#     print('-----------------')
#     print(header)
#     print(subheader)
#     print(date)

    # Dictionary to be inserted into MongoDB
    post = {
        'header': header,
        'subheader': subheader,
        'date': date,
    }
    
    print(post)

    # Insert dictionary into MongoDB as a document
    collection.insert_one(post)

{'header': 'Kane suspended 21 games for Sharks, violated NHL COVID-19 protocol', 'subheader': 'Forward did not attend training camp, play in season opener, previously cleared in gambling investigation', 'date': '2021-10-18'}
{'header': 'Kucherov out indefinitely for Lightning with undisclosed injury', 'subheader': "'Not a day-to-day thing' coach Cooper says about forward who left win against Capitals", 'date': '2021-10-18'}
{'header': 'NHL Buzz: Malkin skates prior to Penguins practice', 'subheader': 'Center had offseason knee surgery; Hoffman, Boeser each expected to play Tuesday; Wheeler in COVID-19 protocol for Jets', 'date': '2021-10-18'}
{'header': 'NHL On Tap: Coleman to make Flames debut after serving suspension', 'subheader': "Matthews expected to play first game of season for Maple Leafs; Hakstol's Kraken visit Flyers", 'date': '2021-10-18'}
{'header': 'Kopitar leads 3 Stars of the Week', 'subheader': 'Kings center, Capitals forward Ovechkin, Lightning center Stamkos earn hono

In [6]:
# Display the MongoDB records created above
articles = db.articles.find()
for article in articles:
    print(article)

{'_id': ObjectId('616e267ab75ac3f618e0e2f4'), 'header': 'Kane suspended 21 games for Sharks, violated NHL COVID-19 protocol', 'subheader': 'Forward did not attend training camp, play in season opener, previously cleared in gambling investigation', 'date': '2021-10-18'}
{'_id': ObjectId('616e267ab75ac3f618e0e2f5'), 'header': 'Kucherov out indefinitely for Lightning with undisclosed injury', 'subheader': "'Not a day-to-day thing' coach Cooper says about forward who left win against Capitals", 'date': '2021-10-18'}
{'_id': ObjectId('616e267ab75ac3f618e0e2f6'), 'header': 'NHL Buzz: Malkin skates prior to Penguins practice', 'subheader': 'Center had offseason knee surgery; Hoffman, Boeser each expected to play Tuesday; Wheeler in COVID-19 protocol for Jets', 'date': '2021-10-18'}
{'_id': ObjectId('616e267ab75ac3f618e0e2f7'), 'header': 'NHL On Tap: Coleman to make Flames debut after serving suspension', 'subheader': "Matthews expected to play first game of season for Maple Leafs; Hakstol's K