# Mission to Mars: Step 1 - Scraping

In [1]:
# Import Dependencies
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup as bs

In [2]:
def init_browser():
    """Initializes a splinter Browser object"""
    
    executable_path = {"executable_path": "/usr/local/bin/chromedriver"}
    return Browser("chrome", **executable_path, headless=True)

## NASA Mars News

In [3]:
def news_scraper():

    """Scrapes the NASA Mars News Site and collect the latest news title and paragraph text.
    Returns a dictionary with the news title and paragraph text as strings."""
    
    # Open NASA Mars News Site using Splinter
    with init_browser() as browser:
        
        url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
        browser.visit(url)

        # Create Beautiful soup object
        soup = bs(browser.html, "html.parser")

        # Get Latest News Title
        news_title= soup.find("div", class_="content_title").text.replace("\n", "")

        # Get Text for latest news 
        news_p = soup.find("div", class_= "rollover_description_inner").text
        
    # Create dictionary of results to return
    news_dict = {"news_title": news_title, "news_p": news_p}

    return(news_dict)

In [4]:
print(news_scraper())

{'news_title': 'InSight Captures Sunrise and Sunset on Mars', 'news_p': "InSight joins the rest of NASA's Red Planet surface missions, all of which have photographed either the start or end of a Martian day."}


## JPL Mars Space Images - Featured Image

In [5]:
def image_scraper():
      
    """Scrapes the NASA Mars News Site and collects the image url for the current Featured Mars Image.
    Returns a dictionary with the news url as a string."""
    
    # Open NASA Mars News Site using Splinter
    with init_browser() as browser:
        url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
        browser.visit(url)

        # Create Beautiful soup object
        soup = bs(browser.html, "html.parser")

        # Get image url for current Featured Mars Image
        JPL_image = soup.find("a", class_ = "button fancybox")["data-fancybox-href"]
        featured_image_url = f"https://www.jpl.nasa.gov{JPL_image}"

    # Create dictionary of results to return
    image_dict = {"featured_image_url": featured_image_url}

    return(image_dict)

In [6]:
print(image_scraper())

{'featured_image_url': 'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA18292_ip.jpg'}


## Mars Weather

In [10]:
def weather_scraper():
      
    """Scrapes the weather information from the most recent Mars weather Tweet.
    Returns a dictionary with the weather as a string"""
    
    # Open NASA Mars News Site using Splinter
    with init_browser() as browser:
        url = "https://twitter.com/marswxreport?lang=en"
        browser.visit(url)

        # Create Beautiful soup object
        soup = bs(browser.html, "html.parser")

        # Pull out text from tweet and format
        tweet_text = soup.find("div", class_="js-tweet-text-container").find("p").text
        mars_weather = " ".join(tweet_text.split("pic")[0].split("InSight ")[1].split("\n"))
        
    # Create dictionary of results to return
    weather_dict = {"mars_weather": mars_weather}

    return(weather_dict)

In [11]:
print(weather_scraper())

{'mars_weather': 'sol 152 (2019-05-01) low -98.1ºC (-144.5ºF) high -17.2ºC (1.0ºF) winds from the SW at 4.8 m/s (10.7 mph) gusting to 13.2 m/s (29.5 mph) pressure at 7.40 hPa'}


## Mars Facts

In [12]:
def facts_scraper():
      
    """Scrapes Facts about Mars from space-facts.com.
    Returns a dictionary with a string of html for a table of these facts"""
    
    # Convert table of facts from url into pandas dataframe
    url = "https://space-facts.com/mars/"
    facts_df = pd.read_html(url)[0]
    
    # Reformat dataframe
    facts_df = facts_df.rename(columns={1: "Value"})
    facts_df = facts_df.set_index(0)

    # Convert dataframe to htlp and clean up newlines
    html_facts = facts_df.to_html().replace("\n", "").replace('<tr style="text-align: right;">', '').\
    replace("    </tr>    <tr>      <th>0</th>      <th></th>    </tr>  ", "")
        
    # Create dictionary of results to return
    facts_dict = {"html_facts": html_facts}

    return(facts_dict)

In [13]:
print(facts_scraper())

{'html_facts': '<table border="1" class="dataframe">  <thead>          <th></th>      <th>Value</th></thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'}


## Mars Hemispheres

In [14]:
def hemi_scraper():
      
    """Scrapes the name and url of high resolution images for each hemisphere of Mars.
    Returns a dictionary with a list of dictionaries of the image title and url for each hemisphere"""
    
    # Create list of hemisphere name and urls for each hemisphere in dictionary form
    hemisphere_image_urls = []
    url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
    
    for x in range(4):
        with init_browser() as browser:
            
            browser.visit(url)

            # Create Beautiful soup object
            soup = bs(browser.html, "html.parser")
            
            hemi = soup.find_all("div", class_="description")[x]
            
            # Get name of hemisphere
            hemi_name = hemi.find("h3").text
            
            # Get link to page with full resolution image
            hemi_image = hemi.find("a")["href"]
            hemi_image_url = f"https://astrogeology.usgs.gov/{hemi_image}"
            
            with init_browser() as browser:
                # Open browser to selected hemisphere's page
                browser.visit(hemi_image_url)
                soup = bs(browser.html, "html.parser")

                # Get link to full resolution image
                hemi_full = soup.find("img", class_ = "wide-image")["src"]
                hemi_full_url = f"https://astrogeology.usgs.gov/{hemi_full}"

                
            hemi_image_dict = {"title": hemi_name, "img_url": hemi_full_url}      
            hemisphere_image_urls.append(hemi_image_dict)
    
    # Create dictionary of results to return
    hemi_dict = {"hemisphere_image_urls": hemisphere_image_urls}
    
    return(hemi_dict)

In [15]:
print(hemi_scraper())

{'hemisphere_image_urls': [{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov//cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov//cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov//cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov//cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]}


In [16]:
def scrape():
      
    """Scrapes various websites for information on Mars.
    Returns a dictionary of this information."""
    
    news_dict = news_scraper()
    image_dict = image_scraper()
    weather_dict = weather_scraper()
    facts_dict = facts_scraper()
    hemi_dict = hemi_scraper()

    mars_dict = {**news_dict, **image_dict, **weather_dict, **facts_dict, **hemi_dict}
    return(mars_dict)