In [6]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pymongo

In [7]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [8]:
# Define database and collection
db = client.nhl_db
collection = db.articles

In [9]:
# URL of page to be scraped
url = 'https://www.nhl.com/news'

# Retrieve page with the requests module
response = requests.get(url)
# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(response.text, 'html.parser')

In [11]:
# Retrieve the parent divs for all articles
results = soup.find_all('div', class_='article-item__top')

# loop over results to get article data
for result in results:
    # scrape the article header 
    header = result.find('h1', class_='article-item__headline').text
    
    # scrape the article subheader
    subheader = result.find('h2', class_='article-item__subheader').text
    
    # scrape the datetime
    datetime = result.find('span', class_='article-item__date')['data-date'] 
    
    # get only the date from the datetime
    date = datetime.split('T')[0]
    
    # print article data
    print('-----------------')
    print(header)
    print(subheader)
    print(date)

    # Dictionary to be inserted into MongoDB
    post = {
        'header': header,
        'subheader': subheader,
        'date': date,
    }

    # Insert dictionary into MongoDB as a document
    collection.insert_one(post)

-----------------
Penguins shut out Bruins, move into first in MassMutual East Division
Jarry makes 30 saves, Guentzel scores only goal in Pittsburgh's fourth straight win
2021-04-25
-----------------
Stanley Cup Playoffs Buzz: Rangers gain on Bruins in East
Penguins move into first; Lightning pick up ground in Central
2021-04-26
-----------------
Shaw ends playing career, won Stanley Cup twice with Blackhawks
Forward was on long-term injured reserve with concussion
2021-04-26
-----------------
Hedman scores 10 seconds into OT, Lightning defeat Blue Jackets
Tampa Bay gains ground in Central; Korpisalo leaves with lower-body injury
2021-04-25
-----------------
Ovechkin has lower-body injury, may not play for Capitals vs. Islanders
Forward might miss second straight game Tuesday, is one goal from tying Dionne for fifth all-time
2021-04-25
-----------------
Lafreniere's confidence growing for Rangers
Rookie forward's effort, understanding of offense has 'elevated to an NHL level,' coach s

In [6]:
# Display the MongoDB records created above
articles = db.articles.find()
for article in articles:
    print(article)

{'_id': ObjectId('5f85c6141473bf1962e97120'), 'header': 'Pietrangelo signs seven-year, $61.6 million contract with Golden Knights', 'subheader': 'Defenseman gets $8.8 million per season, won Stanley Cup in 2019 as Blues captain', 'date': '2020-10-12'}
{'_id': ObjectId('5f85c6151473bf1962e97121'), 'header': 'Pietrangelo 7-year, $61.6 million contract latest splash for Vegas', 'subheader': 'Golden Knights again show commitment to winning Cup by signing free agent defenseman', 'date': '2020-10-13'}
{'_id': ObjectId('5f85c6151473bf1962e97122'), 'header': 'Toffoli agrees to four-year, $17 million contract with Canadiens', 'subheader': 'Forward scored 24 goals for Kings, Canucks last season, won Stanley Cup in 2014', 'date': '2020-10-12'}
{'_id': ObjectId('5f85c6151473bf1962e97123'), 'header': 'Schmidt traded to Canucks by Golden Knights for draft pick', 'subheader': 'Defenseman has five seasons remaining on six-year contract; Vegas gets 2022 third-round choice', 'date': '2020-10-12'}
{'_id'