In [4]:
# Import dependencies
import splinter
from splinter import Browser
from bs4 import BeautifulSoup
import pandas as pd
import scraping

In [5]:
# Set the executable path and initialize the chrome browser in splinter
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path)

In [10]:
# Visit the mars nasa news site
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

# Optional delay for loading the page
browser.is_element_present_by_css("ul.item_list li.slide", wait_time=1)

True

In [12]:
html = browser.html
news_soup = BeautifulSoup(html, 'html.parser')
slide_elem = news_soup.select_one('ul.item_list li.slide')

In [13]:
slide_elem.find("div", class_='content_title')

<div class="content_title"><a href="/news/8678/the-detective-aboard-nasas-perseverance-rover/" target="_self">The Detective Aboard NASA's Perseverance Rover</a></div>

In [14]:
# Use the parent element to find the first `a` tag and save it as `news_title`
news_title = slide_elem.find("div", class_='content_title').get_text()
news_title

"The Detective Aboard NASA's Perseverance Rover"

In [15]:
# Use the parent element to find the paragraph text
news_p = slide_elem.find('div', class_="article_teaser_body").get_text()
news_p

'An instrument called SHERLOC will, with the help of its partner WATSON, hunt for signs of ancient life by detecting organic molecules and minerals.'

### Featured Images

In [3]:
# Visit URL
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [13]:
# Find and click the full image button
full_image_elem = browser.find_by_id('full_image')
full_image_elem.click()

In [14]:
# Find the more info button and click that
browser.is_element_present_by_text('more info', wait_time=1)
more_info_elem = browser.links.find_by_partial_text('more info')
more_info_elem.click()

In [15]:
# Parse the resulting html with soup
html = browser.html
img_soup = BeautifulSoup(html, 'html.parser')

In [None]:
# Find the relative image url
img_url_rel = img_soup.select_one('figure.lede a img').get("src")
img_url_rel

In [None]:
# Use the base URL to create an absolute URL
img_url = f'https://www.jpl.nasa.gov{img_url_rel}'
img_url

In [None]:
df = pd.read_html('http://space-facts.com/mars/')[0]
df.columns=['description', 'value']
df.set_index('description', inplace=True)
df

In [None]:
df.to_html()

In [39]:
browser.quit()

## Challenge Code Development

In [3]:
# Mars’ hemispheres webpage
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)
hemi_soup = BeautifulSoup(browser.html, 'html.parser')

NameError: name 'browser' is not defined

In [37]:
image_hemi_URL_List = []

div_descript_list = hemi_soup.find_all("div", class_="description")

for div_description in div_descript_list:
    image_hemi_URL_List.append("https://astrogeology.usgs.gov" + div_description.find("a")["href"])

In [38]:
image_hemi_URL_List

['https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced']

In [39]:
image_hemi_data_list = []

for url in image_hemi_URL_List:

    browser.visit(url)

    img_soup = BeautifulSoup(browser.html, 'html.parser')

    try:

        # Get hemisphere full size image URL
        img_url = img_soup.select_one("div.downloads ul li a").get("href")

        image_title = img_soup.select_one("div.content h2.title").text

        image_description = img_soup.select_one("div.content p").text

    except AttributeError:
        continue

    # Append the hemisphere image data dictionary to the list
    image_hemi_data_list.append({"title" : image_title, "description": image_description, "img_url" : img_url})

In [40]:
image_hemi_data_list

[{'title': 'Cerberus Hemisphere Enhanced',
  'description': 'Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired on February 11, 1980. At that time, it was early northern summer on Mars. The center of the image is at latitude 3 degrees, longitude 185 degrees.',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'description': 'Mosaic of the Schiaparelli hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. The images were acquired in 1980 during early northern summer on Mars. The center of this image is near the impact crater Schiaparelli (latitude -3, longitude 343) The limits of this mosaic are approximately latitude -60 to 60 and longitude 260 to 30. The color variations 

In [6]:
scraping.mars_hemispheres(browser)

[{'title': 'Cerberus Hemisphere Enhanced',
  'description': 'Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired on February 11, 1980. At that time, it was early northern summer on Mars. The center of the image is at latitude 3 degrees, longitude 185 degrees.',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'description': 'Mosaic of the Schiaparelli hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. The images were acquired in 1980 during early northern summer on Mars. The center of this image is near the impact crater Schiaparelli (latitude -3, longitude 343) The limits of this mosaic are approximately latitude -60 to 60 and longitude 260 to 30. The color variations 

In [43]:
try:
    # Read the Mars data html table into a DataFrame
    # using the Mars' fact webpage URL
    df = pd.read_html('http://space-facts.com/mars/')[0]
except BaseException:
    pass

# Add DataFrame column names
df.columns=['Description', 'Value']

# Convert the Mars fact DataFrame to a list
# of dictionaries with one fact per row
df.to_dict("records")

[{'Description': 'Equatorial Diameter:', 'Value': '6,792 km'},
 {'Description': 'Polar Diameter:', 'Value': '6,752 km'},
 {'Description': 'Mass:', 'Value': '6.39 × 10^23 kg (0.11 Earths)'},
 {'Description': 'Moons:', 'Value': '2 (Phobos & Deimos)'},
 {'Description': 'Orbit Distance:', 'Value': '227,943,824 km (1.38 AU)'},
 {'Description': 'Orbit Period:', 'Value': '687 days (1.9 years)'},
 {'Description': 'Surface Temperature:', 'Value': '-87 to -5 °C'},
 {'Description': 'First Record:', 'Value': '2nd millennium BC'},
 {'Description': 'Recorded By:', 'Value': 'Egyptian astronomers'}]

In [7]:
scraping.scrape_all()

{'news_title': "The Extraordinary Sample-Gathering System of NASA's Perseverance Mars Rover",
 'news_paragraph': 'Two astronauts collected Moon rocks on Apollo 11. It will take three robotic systems working together to gather up the first Mars rock samples for return to Earth.',
 'featured_image': 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA16227_hires.jpg',
 'facts': [{'Description': 'Equatorial Diameter:', 'Value': '6,792 km'},
  {'Description': 'Polar Diameter:', 'Value': '6,752 km'},
  {'Description': 'Mass:', 'Value': '6.39 × 10^23 kg (0.11 Earths)'},
  {'Description': 'Moons:', 'Value': '2 (Phobos & Deimos)'},
  {'Description': 'Orbit Distance:', 'Value': '227,943,824 km (1.38 AU)'},
  {'Description': 'Orbit Period:', 'Value': '687 days (1.9 years)'},
  {'Description': 'Surface Temperature:', 'Value': '-87 to -5 °C'},
  {'Description': 'First Record:', 'Value': '2nd millennium BC'},
  {'Description': 'Recorded By:', 'Value': 'Egyptian astronomers'}],
 'hemispheres':