# Mission to Mars: Step 1 - Scraping

In [1]:
# Import Dependencies
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup as bs

In [2]:
def init_browser():
    """Initializes a splinter Browser object"""
    
    executable_path = {"executable_path": "/usr/local/bin/chromedriver"}
    return Browser("chrome", **executable_path, headless=True)

## NASA Mars News

In [3]:
def news_scraper():

    """Scrapes the NASA Mars News Site and collect the latest news title and paragraph text.
    Returns a dictionary with the news title and paragraph text as strings."""
    
    # Open NASA Mars News Site using Splinter
    with init_browser() as browser:
        
        url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
        browser.visit(url)

        # Create Beautiful soup object
        soup = bs(browser.html, "html.parser")

        # Get Latest News Title
        news_title_html = soup.find("div", class_='image_and_description_container').find("a")["href"]
        news_title = " ".join(news_title_html.split("/")[3].split("-")).title()

        # Get Text for latest news 
        news_p = soup.find("div", class_="rollover_description_inner").text
        
    # Create dictionary of results to return
    news_dict = {"news_title": news_title, "news_p": news_p}

    return(news_dict)

In [4]:
print(news_scraper())

{'news_title': 'Opportunity Hunkers Down During Dust Storm', 'news_p': "\nIt's the beginning of the end for the planet-encircling dust storm on Mars. But it could still be weeks, or even months, before skies are clear enough for NASA's Opportunity rover to recharge its batteries and phone home. \n"}


## JPL Mars Space Images - Featured Image

In [5]:
def image_scraper():
      
    """Scrapes the NASA Mars News Site and collects the image url for the current Featured Mars Image.
    Returns a dictionary with the news url as a string."""
    
    # Open NASA Mars News Site using Splinter
    with init_browser() as browser:
        url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
        browser.visit(url)

        # Create Beautiful soup object
        soup = bs(browser.html, "html.parser")

        # Get image url for current Featured Mars Image
        JPL_image = soup.find("a", class_ = "button fancybox")["data-fancybox-href"]
        featured_image_url = f"https://www.jpl.nasa.gov{JPL_image}"

    # Create dictionary of results to return
    image_dict = {"featured_image_url": featured_image_url}

    return(image_dict)

In [6]:
print(image_scraper())

{'featured_image_url': 'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA19177_ip.jpg'}


## Mars Weather

In [7]:
def weather_scraper():
      
    """Scrapes the weather information from the most recent Mars weather Tweet.
    Returns a dictionary with the weather as a string"""
    
    # Open NASA Mars News Site using Splinter
    with init_browser() as browser:
        url = "https://twitter.com/marswxreport?lang=en"
        browser.visit(url)

        # Create Beautiful soup object
        soup = bs(browser.html, "html.parser")

        # Pull out text from tweet and format
        tweet_text = soup.find("div", class_="js-tweet-text-container").find("p").text
        mars_weather = " ".join(tweet_text.split("pic")[0].split("InSight ")[1].split("\n"))
    
    # Create dictionary of results to return
    weather_dict = {"mars_weather": mars_weather}

    return(weather_dict)

In [8]:
print(weather_scraper())

{'mars_weather': 'sol 146 (2019-04-25) low -98.6ºC (-145.4ºF) high -17.7ºC (0.1ºF) winds from the W at 4.2 m/s (9.4 mph) gusting to 11.6 m/s (25.9 mph) pressure at 7.40 hPa'}


## Mars Facts

In [9]:
def facts_scraper():
      
    """Scrapes Facts about Mars from space-facts.com.
    Returns a dictionary with a string of html for a table of these facts"""
    
    # Convert table of facts from url into pandas dataframe
    url = "https://space-facts.com/mars/"
    facts_df = pd.read_html(url)[0]
    
    # Reformat dataframe
    facts_df = facts_df.rename(columns={0: "Parameter" , 1: "Values"})
    facts_df = facts_df.set_index("Parameter")

    # Convert dataframe to htlp and clean up newlines
    html_facts = facts_df.to_html().replace("\n", "").replace(' border="1"', '')
        
    # Create dictionary of results to return
    facts_dict = {"html_facts": html_facts}

    return(facts_dict)

In [10]:
print(facts_scraper())

{'html_facts': '<table class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Values</th>    </tr>    <tr>      <th>Parameter</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'}


## Mars Hemispheres

In [11]:
def hemi_scraper():
      
    """Scrapes the name and url of high resolution images for each hemisphere of Mars.
    Returns a dictionary with a list of dictionaries of the image title and url for each hemisphere"""
    
    # Create list of hemisphere name and urls for each hemisphere in dictionary form
    hemisphere_image_urls = []
    url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
    
    for x in range(4):
        with init_browser() as browser:
            
            browser.visit(url)

            # Create Beautiful soup object
            soup = bs(browser.html, "html.parser")
            
            hemi = soup.find_all("div", class_="description")[x]
            
            # Get name of hemisphere
            hemi_name = hemi.find("h3").text
            
            # Get link to page with full resolution image
            hemi_image = hemi.find("a")["href"]
            hemi_image_url = f"https://astrogeology.usgs.gov/{hemi_image}"
            
            with init_browser() as browser:
                # Open browser to selected hemisphere's page
                browser.visit(hemi_image_url)
                soup = bs(browser.html, "html.parser")

                # Get link to full resolution image
                hemi_full_url = soup.find("div", class_="downloads").find_all("li")[1].find("a")["href"]
                
            hemi_image_dict = {"title": hemi_name, "img_url": hemi_full_url}      
            hemisphere_image_urls.append(hemi_image_dict)
    
    # Create dictionary of results to return
    hemi_dict = {"hemisphere_image_urls": hemisphere_image_urls}
    
    return(hemi_dict)

In [12]:
print(hemi_scraper())

{'hemisphere_image_urls': [{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif'}]}


In [13]:
def scrape():
      
    """Scrapes various websites for information on Mars.
    Returns a dictionary of this information."""
    
    news_dict = news_scraper()
    image_dict = image_scraper()
    weather_dict = weather_scraper()
    facts_dict = facts_scraper()
    hemi_dict = hemi_scraper()

    mars_dict = {**news_dict, **image_dict, **weather_dict, **facts_dict, **hemi_dict}
    return(mars_dict)

In [14]:
print(scrape())

{'news_title': 'Nasa Social Media And Websites Win Webby Awards', 'news_p': 'NASA\'s social media presence, the InSight mission social media accounts, NASA.gov and SolarSystem.NASA.gov will be honored at the 2019 Webby Awards - "the Oscars of the Internet."', 'featured_image_url': 'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA19177_ip.jpg', 'mars_weather': 'sol 146 (2019-04-25) low -98.6ºC (-145.4ºF) high -17.7ºC (0.1ºF) winds from the W at 4.2 m/s (9.4 mph) gusting to 11.6 m/s (25.9 mph) pressure at 7.40 hPa', 'html_facts': '<table class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Values</th>    </tr>    <tr>      <th>Parameter</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2