In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pymongo

In [2]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [3]:
# Define database and collection
db = client.nhl_db
collection = db.articles

In [10]:
# URL of page to be scraped
url = 'https://www.nhl.com/'

# Retrieve page with the requests module
response = requests.get(url)
# Create BeautifulSoup object; parse with 'lxml'
soup = BeautifulSoup(response.text, 'lxml')
soup
results = soup.find_all('li', class_='mixed-feed__item--article')
results

[<li class="mixed-feed__item mixed-feed__item--article" data-content-id="300299752">
 <div class="mixed-feed__item-header">
 <div class="mixed-feed__logos">
 <div class="mixed-feed__team-logo mixed-feed__team-logo-nhl logo-round-team logo-bg-dark--league-nhl primary-bg--league-nhl"></div>
 </div>
 <div class="mixed-feed__item-header-text">
 <a href="/news/nhl-training-camp-news-and-notes-september-21/c-300299752?tid=282066676">
 <h4 class="mixed-feed__header headline-link">Training Camp Buzz: Tavares set for first home game with Maple Leafs</h4>
 <h5 class="mixed-feed__subheader">Tarasenko will be in lineup for Blues; Green might miss start of Red Wings season</h5>
 </a>
 </div>
 </div>
 <div class="mixed-feed__item-content">
 <div class="mixed-feed__meta">
 <div class="mixed-feed__tags">
 <a class="mixed-feed__tag-link" disabled="true">The Buzz</a>
 </div>
 <a href="/news/nhl-training-camp-news-and-notes-september-21/c-300299752?tid=282066676">
 <h4 class="mixed-feed__header-meta">
  

In [5]:
# Retrieve the parent divs for all articles
results = soup.find_all('li', class_='mixed-feed__item--article')

# Loop through results to retrieve article title, header, and timestamp of article
for result in results:
    title = result.find('h4', class_='mixed-feed__header').text

    lede = result.find('h5', class_='mixed-feed__subheader').text

    # The time and date of article publication
    date = result.find('time')['datetime']
    # Slice the datetime string for the date
    article_date = date[:10]
    # Slice the datetime string for the time
    time = date[11:16]
    # Determine whether article was published in AM or PM
    if (int(time[:2]) >= 13):
        meridiem = 'pm'
    else:
        meridiem = 'am'

    # Concatenate time string
    time = time + meridiem
    print('-----------------')
    print(title)
    print(lede)
    print(article_date)
    print(time)

    # Dictionary to be inserted into MongoDB
    post = {
        'title': title,
        'lede': lede,
        'date': article_date,
        'time published': time
    }

    # Insert dictionary into MongoDB as a document
    collection.insert_one(post)

-----------------
Training Camp Buzz: Tavares set for first home game with Maple Leafs
Tarsenko will be in lineup for Blues; Green might miss start of Red Wings season
2018-09-21
14:10pm
-----------------
Thornton healthy, has high hopes for Sharks with Karlsson
After second knee surgery, center can't wait to start season with new defenseman
2018-09-20
17:44pm
-----------------
Fantasy hits league draft guide for 2018-19
75 players with proven production in new standard category
2018-09-21
00:00am
-----------------
2019 Draft Diary: Jack Hughes
Forward projected to be No. 1 pick discusses All-American Prospect Game
2018-09-21
00:00am
-----------------
Sullivan returns to Penguins following death of father
Coach happy 'to be back and to get back into a routine'
2018-09-21
13:16pm
-----------------
Mailbag: Karlsson's future, Rangers' center options
NHL.com's Dan Rosen answers weekly questions
2018-09-19
19:03pm
-----------------
Hall of Famers embrace being removed from Stanley Cup
Lind

In [8]:
# Display the MongoDB records created above
articles = db.articles.find()
for article in articles:
    print(article)

{'_id': ObjectId('5b9b1bf022cd0f0398132f4e'), 'title': 'Seguin signs eight-year contract extension with Stars', 'lede': 'Deal for center is worth $9.85 million per season through 2026-27', 'date': '2018-09-13', 'time published': '16:40pm'}
{'_id': ObjectId('5b9b1bf022cd0f0398132f4f'), 'title': "Karlsson trade completes Sharks' quest to add difference-maker", 'lede': 'Get defenseman from Senators after trying to land Tavares', 'date': '2018-09-13', 'time published': '20:42pm'}
{'_id': ObjectId('5b9b1bf022cd0f0398132f50'), 'title': 'Tavares named one of three Maple Leafs alternate captains', 'lede': 'Center joins Marleau, Rielly as part of Toronto leadership group', 'date': '2018-09-13', 'time published': '12:26am'}
{'_id': ObjectId('5b9b1bf022cd0f0398132f51'), 'title': 'Fantasy spin: Karlsson trade to Sharks', 'lede': 'Defenseman worth targeting in first round, surpasses new teammate Burns in value', 'date': '2018-09-13', 'time published': '15:56pm'}
{'_id': ObjectId('5b9b1bf022cd0f0398