In [1]:
# Dependencies
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
from bs4 import BeautifulSoup
from time import sleep
import pandas as pd
import requests
import pprint

### NASA Mars News: 
#### Scrape the NASA Mars News Site and collect the latest News Title and Paragraph Text. 

In [2]:
# This step opens a new chrome window that will be driven by code in the next few cells
# It also adds another Chrome icon on the taskbar because it's using chromedriver instead of chrome
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
# This step navigates the chromebrowser window to NASA News site & runs BeautifulSoup to parse the site's HTML
nasa_url = "https://mars.nasa.gov/news/"
browser.visit(nasa_url)
sleep(3) # This is a manual delay that prevents a "Race Condition" with JavaScript and this script competing for resources
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [4]:
# Grab the latest headline from NASA Mars News
nasa_headline = soup.find_all("div", class_="content_title")[1].get_text()
nasa_headline

'NASA Moves Forward With Campaign to Return Mars Samples to Earth'

In [5]:
# Grab the latest teaser from NASA Mars News
nasa_teaser = soup.find_all("div", class_="article_teaser_body")[0].get_text()
nasa_teaser

'During this next phase, the program will mature critical technologies and make critical design decisions as well as assess industry partnerships.'

In [None]:
# Close the chromebrowser window
browser.quit()

In [None]:
# ADAM: this code is "test code" from Dec 11th tutoring. Probably ok to delete this before submitting final homework.
# sidebar = soup.find('div', class_='content_title')
# categories = sidebar.find_all('li')

### JPL Mars Space Images
#### Use splinter to find the current Featured Mars Image 

In [None]:
# This step opens a new chrome window that will be driven by code in the next few cells
# It also adds another Chrome icon on the taskbar because it's using chromedriver instead of chrome
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [None]:
# This step navigates the chromebrowser window to JPL images site & runs BeautifulSoup to parse the site's HTML
jpl_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(jpl_url)
sleep(3) # This is a manual delay that prevents a "Race Condition" with JavaScript and this script competing for resources
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [None]:
# This step clicks on the FULL IMAGE link (in the chromebrowser window)
browser.click_link_by_partial_text("FULL IMAGE")
# browser.links.find_by_partial_text("FULL IMAGE")

In [None]:
# This step clicks on the MORE INFO link (in the chromebrowser window)
sleep(3) # This is a manual delay that prevents a "Race Condition" with JavaScript and this script competing for resources
browser.click_link_by_partial_text("more info")
# browser.links.find_by_partial_text("more info")

In [None]:
# This step clicks on the JPEG image (in the chromebrowser window)
browser.click_link_by_partial_text(".jpg")

In [None]:
# This step saves as a variable the URL of the JPEG image file 
featured_image_url = browser.url
featured_image_url

In [None]:
# Close the chromebrowser window
browser.quit()

### Mars Facts
#### Use Pandas to scrape the Mars Data table containing facts about the planet including Diameter, Mass, etc.

In [None]:
# Define URL of Mars Facts website
facts_url = "https://space-facts.com/mars/"
#  Read in data table via Pandas. 
# Specify zero'th position to get just Mars data, since table contains both Mars & Earth data 
facts_df = pd.read_html(facts_url)[0] 

In [None]:
# Create a "pretty" Mars Facts dataframe with column headers and text clean-up
facts_df.columns = ["Metric", "Value (Planet Mars)"]
facts_df["Metric"] = facts_df["Metric"].str.replace(":","") # Remove the colon character in DESCRIPTION column
facts_df

In [None]:
# Use Pandas to convert the data to a HTML table string
facts_df.set_index("Metric", inplace=True)
facts_html = facts_df.to_html()
# print(facts_html)

### Mars Hemispheres
#### Scrape from USGS Astrogeology site hi-res images for each of Mars' hemispheres, including URL for each image.

In [None]:
# This step opens a new chrome window that will be driven by code in the next few cells
# It also adds another Chrome icon on the taskbar because it's using chromedriver instead of chrome
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [None]:
# This step navigates the chromebrowser window to USGS Astrogeology & runs BeautifulSoup to parse the site's HTML
mars_hemisphere_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(mars_hemisphere_url)
sleep(3) # This is a manual delay that prevents a "Race Condition" with JavaScript and this script competing for resources
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [None]:
# Create list of Mars Hemisphere names, empty list for housing hemisphere image URLs, 
# and empty dictionary for housing hemisphere names & image URLS
mars_hemispheres = ["Cerberus","Schiaparelli","Syrtis","Valles"]
hemisphere_pic_urls = []
hemi_dict = {}

In [None]:
# Loop through list of hemispheres, and populate dictionary with name & image URL by scraping USGS site
for hemi in mars_hemispheres:
    browser.click_link_by_partial_text(hemi)
    hemi_html = browser.html
    soup = BeautifulSoup(hemi_html, 'html.parser')
    hemi_dict["title"] = soup.find("h2").get_text().replace("Enhanced","").strip()
    hemi_dict["img_url"] = soup.find_all("div", class_="downloads")[0].find_all("a")[0]["href"]
    hemisphere_pic_urls.append(hemi_dict)
    browser.back() # go back to original page with all the hemispheres

In [None]:
# Print dictionary containing Mars hemisphere names & images
hemisphere_pic_urls

In [None]:
# Close the chromebrowser window
browser.quit()