# Mission to Mars: Step 1 - Scraping

## Preparation

Import modules

In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import splinter

Create a chrome driver

In [2]:
driver = splinter.Browser('chrome', executable_path='chromedriver', headless=False)

## NASA Mars News

In [3]:
driver.visit('https://mars.nasa.gov/news/')

def fetch_news(driver):
    # Wait for presentation of list items
    driver.is_element_present_by_css('ul.item_list li.slide', wait_time=5.0)
    # select the latest news
    news = BeautifulSoup(driver.html, 'html.parser').select_one('ul.item_list li.slide')
    news_title = news.find('div', class_='content_title').get_text()
    news_p = news.find('div', class_='article_teaser_body').get_text()
    return news_title, news_p

news_title, news_p = fetch_news(driver)

In [4]:
news_title

'NASA Launches a New Podcast to Mars'

In [5]:
news_p

"NASA's new eight-episode series 'On a Mission' follows the InSight spacecraft on its journey to Mars and details the extraordinary challenges of landing on the Red Planet."

## JPL Mars Space Images - Featured Image

In [6]:
driver.visit('https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars')

def fetch_featured_image(driver):
    full_image = driver.find_by_id('full_image')
    full_image.click()
    # Wait for presentation of 'more info' button
    driver.is_element_present_by_text('more info', wait_time=5.0)
    driver.find_link_by_partial_text('more info').click()
    image = BeautifulSoup(driver.html, 'html.parser').select_one('figure.lede a img')
    featured_image_url = 'https://www.jpl.nasa.gov%s' % image.get('src')
    return featured_image_url

featured_image_url = fetch_featured_image(driver)
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA19036_hires.jpg'

## Mars Weather

In [10]:
driver.visit('https://twitter.com/marswxreport?lang=en')

def fetch_weather(driver):
    tweet = BeautifulSoup(driver.html, 'html.parser').find('div',
            attrs={'class': 'tweet', 'data-name': 'Mars Weather'}
        )
    mars_weather = tweet.find('p', 'tweet-text').get_text()
    return mars_weather

mars_weather = fetch_weather(driver)
mars_weather

'Sol 2213 (2018-10-27), high -12C/10F, low -70C/-93F, pressure at 8.74 hPa, daylight 06:11-18:29'

## Mars Facts

In [11]:
driver.visit('http://space-facts.com/mars/')

def fetch_facts(driver):
    df = pd.read_html(driver.html)[0]
    df.columns = ['description', 'value']
    df.set_index('description', inplace=True)
    return df.to_html(classes='table table-striped')

facts = fetch_facts(driver)
facts

'<table border="1" class="dataframe table table-striped">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>value</th>\n    </tr>\n    <tr>\n      <th>description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astrono

## Mars Hemispheres

In [12]:
driver.visit('https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars')

def fetch_hemispheres(driver):
    hemisphere_image_urls = []
    num_items = int(len(BeautifulSoup(driver.html, 'html.parser').find_all(class_='item')))
    for i in range(num_items):
        driver.find_by_css('a.product-item h3')[i].click()
        soup = BeautifulSoup(driver.html, 'html.parser')
        title = soup.find('h2', class_='title').get_text()
        img_url = soup.find('a', text='Sample').get('href')
        hemisphere_image_urls.append({'title': title, 'img_url': img_url})
        driver.back()
    return hemisphere_image_urls

hemisphere_image_urls = fetch_hemispheres(driver)
hemisphere_image_urls

[{'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]

## Release resource

In [14]:
driver.quit()