# Scrape the NASA Mars News Site
* collect the latest News Title and Paragraph Text

In [1]:
# Setup dependencies
import pandas as pd
from bs4 import BeautifulSoup
import requests

In [2]:
# Import splinter
from splinter import Browser
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
# URLs
# NASA Mars News
nasa_url = 'https://mars.nasa.gov/news/'

# JPL Mars Space Image
jpl_base_url = 'https://www.jpl.nasa.gov'
jpl_url = f'{jpl_base_url}/spaceimages/?search=&category=Mars'

# Mars Weather
weather_url = 'https://twitter.com/marswxreport?lang=en'

# Mars Fact
mars_facts_url = 'https://space-facts.com/mars/'

# USGS Astrogeology site with high resolution images for each of Mar's hemispheres.
mars_hemi_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

### NASA Mars News

In [5]:
# Retrieve page with splinter, the requests module appears to return older news
browser.visit(nasa_url)

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(browser.html, 'html.parser')

# Return the latest news title and paragraph text
result = soup.body.find("li", class_="slide")

news_title = result.find("div", class_="content_title").a.text
news_p = result.find("div", class_="article_teaser_body").text
print(f"Title: {news_title}")
print(f"Text: {news_p}")

Title: Mars InSight Lander to Push on Top of the 'Mole'
Text: Engineers have a plan for pushing down on the heat probe, which has been stuck at the Martian surface for a year.


In [6]:
# [Not Used] Retrieve page with requests module
# Appear to return older news
# Attempted on 2/21/2020 and only returned news before 2/11/2020
# response = requests.get(url)
#soup = BeautifulSoup(response.text, 'html.parser')

### JPL Mars Space Images - Featured Image

In [7]:
# Retrieve page with splinter
browser.visit(jpl_url)

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(browser.html, 'html.parser')

# Retrieve link to featured image page
result = soup.body.find('div', class_='carousel_container')\
    .find('a', class_='button fancybox')
image_detail_url = f"{jpl_base_url}{result['data-link']}"
image_detail_url

'https://www.jpl.nasa.gov/spaceimages/details.php?id=PIA17793'

In [8]:
# Retrieve page with splinter
browser.visit(image_detail_url)

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(browser.html, 'html.parser')

# Retrieve url of featured image
result = soup.body.find("figure", class_="lede")

featured_image_url = f"{jpl_base_url}{result.a['href']}"
print(f"Featured image link is {featured_image_url}")

Featured image link is https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA17793_hires.jpg


### Mars Weather

In [9]:
# Retrieve page with request module
response = requests.get(weather_url)

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(response.text, 'html.parser')

# Extract weather data from latest tweet message
result = soup.body.find('div', class_='js-tweet-text-container')
mars_weather = result.p.text.replace('\n',' ').rstrip(result.p.a.text)
mars_weather

'InSight sol 439 (2020-02-20) low -94.7ºC (-138.4ºF) high -9.3ºC (15.2ºF) winds from the SSE at 6.5 m/s (14.6 mph) gusting to 23.2 m/s (51.9 mph) pressure at 6.30 hPa'

### Mars Facts

In [10]:
# Use Pandas to scrape the table containing facts about the planet
mars_facts = pd.read_html(mars_facts_url)[0]
mars_facts.columns = ['description', 'value']
mars_facts.set_index('description', inplace=True)
mars_facts

Unnamed: 0_level_0,value
description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [11]:
# Export to html
mars_facts.to_html('mars_facts.html', justify='left', classes='table table-bordered')

### Mars Hemispheres

In [12]:
# List to store the hemisphere title and image url.
hemisphere_image_urls = []

# Retrieve page with splinter, the requests module appears to return older news
browser.visit(mars_hemi_url)

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(browser.html, 'html.parser')

results = soup.body.find_all('div', class_='item')
for result in results:
    # title
    title = result.find('div', class_='description').a.text
    
    # click title link to find image url
    browser.links.find_by_partial_text(title).click()
    
    # new BeautifulSoup object for clicked page
    soup_img = BeautifulSoup(browser.html, 'html.parser')
    
    # full image link in a tag with 'Sample' text under div tag with 'download' class
    image_url = soup_img.body\
        .find('div', class_='downloads')\
        .find('a', text='Sample')['href']
    
    # append data to list
    hemisphere_image_urls.append({
        'title': title,
        'image_url': image_url
    })

    # back to previous page for next result
    browser.back()
    
# print list
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]