In [18]:
# import dependencies

from splinter import Browser
from bs4 import BeautifulSoup
import mars_urls as mars

def init_browser():
    # @NOTE: Replace the path with your actual path to the chromedriver
    executable_path = {"executable_path": "C:\chromedrv\chromedriver.exe"}
    return Browser("chrome", **executable_path, headless=True)

In [19]:
# Scrape the latest news headline from the NASA Mars Mission
# Path to results working as of May 15, 2019

def scrape_news_article(nasa_url=mars.NASA_URL, nasa_news_url=mars.NASA_NEWS_URL):
    """
    Args:  The URL for the NASA Mars Mission and the URL for the news section.
    Returns the title and first paragraph for the most recent article in the latest news.
    """
    # Initialize and move the browser to the URL
    browser = init_browser()
    browser.visit(nasa_news_url)
    
    # retrieve html and pass to Beautiful Soup for parsing
    html = browser.html
    soup = BeautifulSoup(html, "html.parser")
    
    # from the main page we can get teaser titles and bodies
    titles = soup.find_all('div', class_="content_title")
    first_story = titles[0]
    first_story_title = first_story.get_text()
    first_story_anchor = first_story.find('a', target="_self")
    first_story_url = first_story_anchor.get('href')
    
    # short story body from the main page:
    bodies = soup.find_all('div', class_='article_teaser_body')
    first_body = bodies[0]
    first_story_oneliner = first_body.get_text()
    
    # Now move the broswer to the full story page and grab the first paragraph.
    first_story_url = nasa_url + first_story_url
    browser.visit(first_story_url)
    
    html = browser.html
    soup = BeautifulSoup(html, "html.parser")
    
    titles = soup.find_all('h1', class_="article_title")
    first_title = titles[0].get_text().strip()
    
    first_paragraph = soup.find_all('p')[0].get_text()
    
    return first_story_title, first_story_oneliner, first_paragraph

In [20]:
scrape_news_article()

("NASA's MRO Completes 60,000 Trips Around Mars",
 'The orbiting spacecraft is also about to set a record for data relayed from the Martian surface.',
 "MRO Soaring Over Mars: This still from an animation shows NASA's Mars Reconnaissance Orbiter soaring over Mars. The spacecraft has been in Mars orbit for 13 years, and just completed 60,000 trips around the planet. Image credit: NASA/JPL-Caltech")

In [8]:
# Scrape the latest featured image from the JPL Mars page
# Path to results working as of May 15, 2019

def scrape_jpl_featured_image(jpl_url=mars.JPL_URL, jpl_mars_url=mars.JPL_MARS_URL):
    """
    Return URL for the current day's featured Mars image.  The input args are the 
    current URLs to the JPL and the JPL's featured Mars image of the day.
    """
    
    # initialize browser
    browser = init_browser()

    # visit the main Mars featured image site.
    browser.visit(jpl_mars_url)
    
    # grab the html and pass to BeautifulSoup for parsing
    jpl_html = browser.html
    jpl_soup = BeautifulSoup(jpl_html, "html.parser")
    
    # As of May 14, 2019, one way to grab the featured image URL is the following:
    # We can find the link to the story for the background image
    story_anchor = jpl_soup.find_all('a', class_="button fancybox")[0] 
    story_url = story_anchor.get('data-link')
    story_url = jpl_url + story_url
    #print(story_url)
    
    # now move the browser to the story page
    browser.visit(story_url)
    story_html = browser.html
    story_soup = BeautifulSoup(story_html, "html.parser")
    
    # on the story page, there is a URL to the full-size image
    full_image_anchor = story_soup.find_all('img', class_="main_image")[0] 
    full_image_url = full_image_anchor.get('src')
    
    return jpl_url + full_image_url

In [9]:
scrape_jpl_featured_image()

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA14762_hires.jpg'

In [10]:
mars.MARS_FACTS_URL = "https://space-facts.com/mars/"
import pandas as pd

def scrape_mars_facts(mars_facts_url = mars.MARS_FACTS_URL):
    """
    """
    tables = pd.read_html(mars_facts_url)
    mars_df = tables[0]
    mars_df.columns = ["description", "value"]
    mars_df.set_index("description", inplace=True)
    
    return mars_df

In [11]:
scrape_mars_facts()

Unnamed: 0_level_0,value
description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [4]:
mars.MARS_USGS = "https://astrogeology.usgs.gov"

def scrape_mars_hemispheres(usgs_url=mars.MARS_USGS, hemispheres_url=mars.MARS_HEMISPHERES):
    """
    Return URL for the current day's featured Mars image.  The input args are the 
    current URLs to the JPL and the JPL's featured Mars image of the day.
    """
    
    # initialize browser
    browser = init_browser()

    # visit the Mars hemispheres site.
    browser.visit(hemispheres_url)
    
    # Now search for each hemisphere name in a link
    # A list of hemisphere names
    hemispheres = ["Cerberus", "Schiaparelli", "Syrtis Major", "Valles Marineris"]

    hemisphere_image_urls = []
    
    for hemisphere in hemispheres:
        title = browser.find_link_by_partial_text(hemisphere).first.text
        browser.click_link_by_partial_text(hemisphere)
        html = browser.html
        soup = BeautifulSoup(html, "html.parser")
        suburl = soup.find_all('img', class_="wide-image")[0].get('src')
        img_url = usgs_url + suburl
        
        hemisphere_image_urls.append({'title': title, 'img_url': img_url})
        browser.back()
        
    return hemisphere_image_urls

In [5]:
scrape_mars_hemispheres()

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]

In [16]:
browser=init_browser()