In [1]:
# Import dependencies
# Complete your initial scraping using Jupyter Notebook, BeautifulSoup, Pandas, and Requests/Splinter.
from splinter import Browser
from bs4 import BeautifulSoup as bs
import pandas as pd
import requests

In [2]:
# Chromedriver for Mac Users
!which chromedriver

/usr/local/bin/chromedriver


In [3]:
# Set up path and browser 
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False) 

# NASA Mars News 

In [4]:
# Use Splinter to visit the NASA news website to be scraped
# Scrape https://mars.nasa.gov/news/ and collect the latest News Title and Paragraph Text
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

In [5]:
# Iterate through pages and pull articles
# Create HTML object   
html = browser.html

# Create BeautifulSoup object to parse using html.parser
soup_news = bs(html, 'html.parser')

# Use classes to extract information
# Assign the text to variables that you can reference later
news_title = soup_news.find('div', class_='list_text').find('div', class_='content_title').text
print(f'The most current article on NASA.gov is "{news_title}."')

# Extract the teaser paragraph for the first article
news_p = soup_news.find('div', class_='article_teaser_body').text
print(f'The article, "{news_title}," discusses how {news_p}')


The most current article on NASA.gov is "AI Is Helping Scientists Discover Fresh Craters on Mars."
The article, "AI Is Helping Scientists Discover Fresh Craters on Mars," discusses how It's the first time machine learning has been used to find previously unknown craters on the Red Planet.


# JPL Mars Space Images

In [6]:
# Visit the url for JPL Featured Space Image
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [7]:
# Use splinter to navigate the site and find the image url for the current Featured Mars Image
full_image = browser.find_by_id('full_image')
full_image.click()

In [8]:
# Find the more info button and click it
browser.is_element_present_by_text('more info', wait_time=1)
more_info = browser.links.find_by_partial_text('more info')
more_info.click()

In [9]:
# Parse the resulting html with soup
html = browser.html
soup_img = bs(html, 'html.parser')

In [10]:
# Assign the url string to a variable called `featured_image_url`
featured_image_url = soup_img.select_one('figure.lede a img').get("src")
featured_image_url

'/spaceimages/images/largesize/PIA15883_hires.jpg'

# Mars Facts 

In [11]:
# Bring in link for Mars Facts scrape
# Visit the Mars Facts webpage https://space-facts.com/mars/
url = 'https://space-facts.com/mars/'

In [12]:
# Use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.
table_df = pd.read_html(url)
table_df

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers,
   Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:     -87 to -5 °C      -88 to 58°C,
           

In [13]:
# Save table to files and organize in dataframe
tables = table_df[0]

# Use Pandas to convert the data to a HTML table string
# Define column headers
tables.columns = ['Mars Data', 'Value']

# Save table to files
mars_facts = tables.to_html('mars_facts.html')

!open mars_facts.html

tables

Unnamed: 0,Mars Data,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


# Mars Hemispheres

In [14]:
# Bring in Mars website to be scraped and visit URL
# USGS Astrogeology site https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars 
# Obtain high resolution images for each of Mar's hemispheres
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)

In [15]:
# Create a list to hold the images and titles.
hemisphere_image_urls = []

# Write code to retrieve the image urls and titles for each hemisphere.
# First, get a list of all of the hemispheres
links = browser.find_by_css("a.product-item h3")

# Next, loop through those links, click the link, find the sample anchor, and return the href
for i in range(len(links)):
    hemisphere = {}
    
# Find the elements on each loop to avoid a stale element exception
    browser.find_by_css("a.product-item h3")[i].click()
    
# Find the Sample image anchor tag and extract the href
    sample_elem = browser.links.find_by_text('Sample').first
    sample_elem = browser.links.find_by_partial_text('Sample')
    sample_elem = browser.find_by_text('Sample').first
    hemisphere['img_url'] = sample_elem['href']
    
# Get Hemisphere title
    hemisphere['title'] = browser.find_by_css("h2.title").text
    
# Append hemisphere object to list
    hemisphere_image_urls.append(hemisphere)
    
#  Navigate backwards to iterate all 4 images
    browser.back()

In [16]:
print(len(links))

4


In [17]:
# Save Hemisphere information to a dataframe
# Iterate image list
hemisphere_image_urls

[{'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]

In [18]:
# Quit the browser
browser.quit()

In [19]:
# Save hemisphere urls to a dataframe
hemisphere_image_urls_df = pd.DataFrame(hemisphere_image_urls)
hemisphere_image_urls_df

Unnamed: 0,img_url,title
0,https://astropedia.astrogeology.usgs.gov/downl...,Cerberus Hemisphere Enhanced
1,https://astropedia.astrogeology.usgs.gov/downl...,Schiaparelli Hemisphere Enhanced
2,https://astropedia.astrogeology.usgs.gov/downl...,Syrtis Major Hemisphere Enhanced
3,https://astropedia.astrogeology.usgs.gov/downl...,Valles Marineris Hemisphere Enhanced
