In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pymongo

In [2]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [3]:
# Define database and collection
db = client.nhl_database
collection = db.articles

In [4]:
# URL of page to be scraped
url = 'https://www.nhl.com/news'

In [5]:
# Retrieve page with the requests module
nhl_retrieved = requests.get(url)

In [6]:
# Create BeautifulSoup object; parse with 'html.parser'
nhl_soup = BeautifulSoup(nhl_retrieved.text, 'html.parser')
nhl_soup.prettify()

www.nhl.com/info/frequently-asked-questions">\n      FAQ\n     </a>\n     <a class="site-footer__legal-link" data-phone-href="https://www.nhl.com/info/contact-us" href="https://www.nhl.com/info/contact-us">\n      Contact Us\n     </a>\n     <a class="site-footer__legal-link" data-phone-href="https://www.nhl.com/info/copyright-policy" href="https://www.nhl.com/info/copyright-policy">\n      Copyright Policy\n     </a>\n     <a class="site-footer__legal-link" data-phone-href="/info/do-not-sell" href="/info/do-not-sell">\n      Do Not Sell My Info\n     </a>\n    </nav>\n    <div class="site-footer__colophon-container">\n     <a class="site-footer__shield-link" href="https://www.nhl.com" title="NHL.com">\n      <img alt="" class="logo site-footer__shield-logo" onerror="this.src =\n                                                                \'//www-league.nhlstatic.com/images/logos/league-light/133.svg\'" role="img" src="//www-league.nhlstatic.com/images/logos/league-dark/133.svg"/>\n

In [7]:
# Retrieve the parent divs for all articles
nhl_results = nhl_soup.find_all('div', class_='article-item__top')

# loop over results to get article data
for result in nhl_results:
    try:
        # scrape the article header 
        header = result.find('h1', class_='article-item__headline').text
        
        # scrape the article subheader
        subheader = result.find('h2', class_='article-item__subheader').text
        
        # scrape the datetime
        datetime = result.find('span', class_='article-item__date')['data-date']
        
        # get only the date from the datetime
        nhl_date = datetime.split('T')[0]
        
        # print article data
        print('-----------------')
        print(header)
        print(subheader)
        print(nhl_date)

        # Dictionary to be inserted into MongoDB
        nhl_dict = {'header': header,
                    'subheader': subheader,
                    'date': nhl_date,
                    }

        # Insert dictionary into MongoDB as a document
        collection.insert_one(nhl_dict)
    except AttributeError as e:
        print(e)


-----------------
Kucherov, Lightning top Canadiens in Game 1 of Stanley Cup Final
Has two goals, assist to boost defending champions
2021-06-28
-----------------
NHL participation at 2022 Beijing Olympics 'work in progress'
Commissioner Bettman says time running short, but willingness remains
2021-06-28
-----------------
Vegas to host 2022 NHL All-Star Weekend; 2021-22 outdoor games announced
Commissioner Bettman reveals Winter Classic, Stadium Series locations, teams, dates
2021-06-28
-----------------
President Biden, Prime Minister Trudeau wager on Stanley Cup Final
With Lightning taking on Canadiens, North American nations plan a bet on Twitter
2021-06-28
-----------------
Stanley Cup Final Game 1 Live Blog: Lightning vs. Canadiens
Sights, sounds from Tampa Bay's victory against Montreal
2021-06-28
-----------------
Stanley Cup Final schedule
Best-of-7 series between Canadiens, Lightning starts June 28 at Tampa Bay
2021-06-28
-----------------
Expansion, NHL Draft television infor

In [8]:
# Display the MongoDB records created above
articles = db.articles.find()
for article in articles:
    print(article)

{'_id': ObjectId('60da9eda5efd0dfaec8aff15'), 'header': 'Kucherov, Lightning top Canadiens in Game 1 of Stanley Cup Final', 'subheader': 'Has two goals, assist to boost defending champions', 'date': '2021-06-28'}
{'_id': ObjectId('60da9eda5efd0dfaec8aff16'), 'header': "NHL participation at 2022 Beijing Olympics 'work in progress'", 'subheader': 'Commissioner Bettman says time running short, but willingness remains', 'date': '2021-06-28'}
{'_id': ObjectId('60da9eda5efd0dfaec8aff17'), 'header': 'Vegas to host 2022 NHL All-Star Weekend; 2021-22 outdoor games announced', 'subheader': 'Commissioner Bettman reveals Winter Classic, Stadium Series locations, teams, dates', 'date': '2021-06-28'}
{'_id': ObjectId('60da9eda5efd0dfaec8aff18'), 'header': 'President Biden, Prime Minister Trudeau wager on Stanley Cup Final', 'subheader': 'With Lightning taking on Canadiens, North American nations plan a bet on Twitter', 'date': '2021-06-28'}
{'_id': ObjectId('60da9eda5efd0dfaec8aff19'), 'header': 'St