In [17]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pymongo

In [18]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [19]:
# Define database and collection
db = client.nhl_db
collection = db.articles

In [20]:
# URL of page to be scraped
url = 'https://www.nhl.com/news'

# Retrieve page with the requests module
response = requests.get(url)
# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(response.text, 'html.parser')

In [21]:
soup

<!DOCTYPE html>

<html lang="en_US">
<head>
<title>NHL Hockey News | NHL.com </title>
<!-- meta meta tag -->
<meta content="IE=Edge" http-equiv="X-UA-Compatible"/>
<meta content="text/html; charset=utf-8" http-equiv="content-type"/>
<meta content="no-cache" http-equiv="Cache-Control"/>
<meta content="no-cache" http-equiv="Pragma"/>
<meta content="-1" http-equiv="Expires"/>
<meta content="en" http-equiv="content-language"/>
<meta content="nhl, nhl.com, www.nhl.com, playoffs, scores, video, photos, standings, news, features, players, shop, auctions, tickets, mobile, game center live, stanley cup, winter classic, draft, free agency" name="keywords"/>
<meta content="US" name="countryCode"/>
<meta content="NHL Hockey News" property="og:title"/>
<meta content="NHL Hockey News NHL.com" itemprop="name"/>
<meta content="NHL.com" property="og:site_name"/>
<meta content="website" property="og:type"/>
<meta content="https://cms.nhl.bamgrid.com/images/photos/324293932/1024x576/cut.jpg" property="og

In [26]:
# Retrieve the parent divs for all articles
# results = soup.find_all('div', class_='article-item__top')
results = soup.find_all('div', class_='article-item__top')
# results

# loop over results to get article data

for result in results:
    # scrape the article header 
    header = result.find('h1', class_='article-item__headline').text
    
    # scrape the article subheader
    subheader = result.find('h2', class_='article-item__subheader').text
    
    # scrape the datetime
    datetime = result.find('span', class_='article-item__date')['data-date'] 
    
    # get only the date from the datetime
    date = datetime.split('T')[0]
    
    # print article data
    print('-----------------')
    print(header)
    print(subheader)
    print(date)

    # Dictionary to be inserted into MongoDB
    post = {
        'header': header,
        'subheader': subheader,
        'date': date,
    }

    # Insert dictionary into MongoDB as a document
    collection.insert_one(post)
    

-----------------
Houser, ECHL goalie, to make NHL debut for Sabres
28-year-old will be sixth to start for Buffalo this season; Luukkonen, Hutton, Ullmark each remain out
2021-05-03
-----------------
NHL Buzz: Malkin returns to lineup for Penguins against Flyers
Seguin makes season debut; Ovechkin, Carlson back for Capitals; Golden Knights could be without Pacioretty
2021-05-03
-----------------
McDavid's race to 100 points for Oilers amplified in shortened season
Center needs 13 more in final seven games but says team goals most important before playoffs
2021-05-02
-----------------
Stanley Cup Playoffs clinching scenarios for May 3
Bruins, Oilers can each earn berth
2021-05-03
-----------------
Stars forward Seguin shares video of grueling rehab from two surgeries
Dallas forward was activated on Monday from injured reserve after missing six months
2021-05-03
-----------------
Bracket Challenge for 2021 Stanley Cup Playoffs opens Monday
Fan who finishes atop leader board will attend 2

In [23]:
# Display the MongoDB records created above
articles = db.articles.find()
for article in articles:
    print(article)

{'_id': ObjectId('609086a93bec87bb01344637'), 'header': 'Houser, ECHL goalie, to make NHL debut for Sabres', 'subheader': '28-year-old will be sixth to start for Buffalo this season; Luukkonen, Hutton, Ullmark each remain out', 'date': '2021-05-03'}
{'_id': ObjectId('609086a93bec87bb01344638'), 'header': 'NHL Buzz: Malkin returns to lineup for Penguins against Flyers', 'subheader': 'Seguin makes season debut; Ovechkin, Carlson back for Capitals; Golden Knights could be without Pacioretty', 'date': '2021-05-03'}
{'_id': ObjectId('609086a93bec87bb01344639'), 'header': "McDavid's race to 100 points for Oilers amplified in shortened season", 'subheader': 'Center needs 13 more in final seven games but says team goals most important before playoffs', 'date': '2021-05-02'}
{'_id': ObjectId('609086a93bec87bb0134463a'), 'header': 'Stanley Cup Playoffs clinching scenarios for May 3', 'subheader': 'Bruins, Oilers can each earn berth', 'date': '2021-05-03'}
{'_id': ObjectId('609086a93bec87bb013446

In [24]:
# drop collection
# db.articles.drop()