In [1]:
# Import Splinter and BeautifulSoup
from splinter import Browser
from bs4 import BeautifulSoup as soup
from webdriver_manager.chrome import ChromeDriverManager

In [2]:
# Set the executable path and open up a browser session
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

In [5]:
# Visit the Mars NASA news site
url = 'https://redplanetscience.com'
browser.visit(url)

# Optional delay for loading the page (Searches for 'div' tag with list_text class attribute)
#    Returns 'True' if found on page, after waiting 1 sec. before searching
browser.is_element_present_by_css('div.list_text', wait_time=1)   # '.' for class, '#' for id

True

In [8]:
# SKILL DRILL:  Does 'button' element with id of 'more' exist on the page (Wait 2 sec.)
browser.is_element_present_by_css('button#more', wait_time=2)

True

In [10]:
# Set up the HTML parser for bs4
html = browser.html
news_soup = soup(html, 'html.parser')

# Search for a <div /> tag that has a class attribute of 'list_text', and we assign the result
slide_elem = news_soup.select_one('div.list_text')   # 'select_one' and 'find' get same result

# This result is the parent element of each article, which means that it holds all the other 
#    elements within it.  Later, we'll use it to filter our search results further.
slide_elem

<div class="list_text">
<div class="list_date">December 18, 2022</div>
<div class="content_title">NASA's Mars Perseverance Rover Gets Its Sample Handling System</div>
<div class="article_teaser_body">The system will be collecting and storing Martian rock and soil. Its installation marks another milestone in the march toward the July launch period.</div>
</div>

In [18]:
#######################    Scrape an Article Title and Its Summary Text    #######################

# Use the parent element to find the article title
title_elem = slide_elem.find('div', class_='content_title')
print(title_elem)

<div class="content_title">NASA's Mars Perseverance Rover Gets Its Sample Handling System</div>


In [19]:
# Pull out only the text of the title
title = title_elem.get_text()
print(title)

NASA's Mars Perseverance Rover Gets Its Sample Handling System


In [20]:
# Use the parent element to find the paragraph summary text
news_p = slide_elem.find('div', class_='article_teaser_body').text
news_p

'The system will be collecting and storing Martian rock and soil. Its installation marks another milestone in the march toward the July launch period.'

In [21]:
# Close the browser session
browser.quit()