In [1]:
# Dependencies
from bs4 import BeautifulSoup
from splinter import Browser
import requests
import pandas as pd

In [2]:
# URL of page to be scraped
url1 = 'https://mars.nasa.gov/news/'

# Retrieve page with the requests module
response = requests.get(url1)

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(response.text, 'html.parser')

# Navigate through the page to find the latest news title and paragraph
news_title = soup.find('div', class_='content_title').find('a').text
news_p = soup.find('div', class_='rollover_description_inner').text

print(news_title)
print(news_p)


NASA Readies Perseverance Mars Rover's Earthly Twin 


Did you know NASA's next Mars rover has a nearly identical sibling on Earth for testing? Even better, it's about to roll for the first time through a replica Martian landscape.



In [3]:
# Activate the chromedriver
executable_path = {'executable_path': 'chromedriver_win32/chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

# Open the url in chrome
url2 = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url2)

# Activate BeautifulSoup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Locate the featured image url
find_featured_image_url = soup.find('footer').find('a').get('data-fancybox-href')
featured_image_url = print(f'https://www.jpl.nasa.gov{find_featured_image_url}')

# Close the browser
browser.quit()

featured_image_url

https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA19808_ip.jpg


In [4]:
# Use pandas to read the tables in the url
url3 = 'https://space-facts.com/mars/'
tables = pd.read_html(url3)

# Find the relevant table 
first_table = tables[0]

# Convert the table to html
table_html = first_table.to_html(index=False, header=False)

table_html

'<table border="1" class="dataframe">\n  <tbody>\n    <tr>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <td>Surface Temperature:</td>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <td>First Record:</td>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <td>Recorded By:</td>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tbody>\n</table>'

In [5]:
# Activate the chromedriver
executable_path = {'executable_path': 'chromedriver_win32/chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

# Open the url in chrome
url4 = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url4)

# Activate BeautifulSoup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Locate the Mars hemisphere urls
items = soup.find_all('div', class_='item')

# Create a list of the hemisphere urls using a for loop
url_list = []

for item in items:
    url = item.find('a').get('href')
    url_list.append(url)

# Format the urls correctly
hemisphere_url_list = ['https://astrogeology.usgs.gov' + url for url in url_list]

# Close the browser
browser.quit()

hemisphere_url_list

['https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced']

In [6]:
# Activate the chromedriver
executable_path = {'executable_path': 'chromedriver_win32/chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

# Create a list of dictionaries for the hemisphere titles and image urls using a for loop 
hemisphere_image_urls = []

for url in hemisphere_url_list:

    browser.visit(url)

    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')

    hemisphere_title = soup.find('div', class_='content').find('h2').text
    hemisphere_image = soup.find('div', class_='downloads').find('ul').find('li').find('a').get('href')

    hemisphere_image_urls.append({"title": hemisphere_title, "image": hemisphere_image})

# Close the browser
browser.quit()

hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'image': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'image': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'image': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'image': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]

In [7]:
# Create a dictionary of the items that were scraped
mars_dictionary = {
        "title": news_title,
        "paragraph": news_p,
        "image": featured_image_url,
        "table": table_html,
        "hemispheres": hemisphere_image_urls
    }

mars_dictionary

{'title': "\nNASA Readies Perseverance Mars Rover's Earthly Twin \n",
 'paragraph': "\nDid you know NASA's next Mars rover has a nearly identical sibling on Earth for testing? Even better, it's about to roll for the first time through a replica Martian landscape.\n",
 'image': None,
 'table': '<table border="1" class="dataframe">\n  <tbody>\n    <tr>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <td>Surface Temperature:</td>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <td>First Record:</td>\n      <td>2nd