#  Step-1: Scraping: Mission to Mars

In [53]:
# Set Dependencies 
from bs4 import BeautifulSoup
from splinter import Browser
import pandas as pd
import requests

In [54]:
#  Set executable path for chromedeiver (using Macbook Air)
executable_path = {"executable_path": "/usr/local/bin/chromedriver"}
browser = Browser("chrome", **executable_path, headless=False)

# NASA Mars News

In [55]:
# Go to NASA news site 
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

In [56]:
##############################################
# Web Scapping 
##############################################

# define html object
html = browser.html

# Parsing html using Beautiful Soup
news_soup = BeautifulSoup(html, 'html.parser')


# Colllect latest news title and news_paragraph from NASA site
#
article = news_soup.find("div", class_='list_text')
news_title = article.find("div", class_="content_title").text
news_para = article.find("div", class_ ="article_teaser_body").text

# Display scrapped data 
print(news_title)
print(news_para)

Independent Review Indicates NASA Prepared for Mars Sample Return Campaign
NASA released an independent review report Tuesday indicating the agency is well positioned for its Mars Sample Return campaign to bring pristine samples from Mars to Earth for scientific study.


# JPL Mars Space Images - Featured Image

In [57]:
# Visit Mars Space Images through splinter module
image_url_featured = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(image_url_featured)

In [58]:
# create HTML object for image 
html_image = browser.html

# Parse HTML using BeautifulSoup() func
#
jpl_soup = BeautifulSoup(html_image, 'html.parser')

# Find background-image url from style tag 
featured_image_url  =jpl_soup.find('article')['style'].replace('background-image: url(','').replace(');', '')[1:-1]

# NASA JPL Url 
index_url = 'https://www.jpl.nasa.gov'

# Concatenate website NASA JPL url with scrapped url for image
#
featured_image_url = index_url + featured_image_url

featured_image_title = jpl_soup.find('h1', class_="media_feature_title").text.strip()

# Show complete url string for this image: featured_image_url
print(f'Featured image title is:{featured_image_title}')
print(f'Featured image url is:{featured_image_url}')


Featured image title is:Triple Crescents
Featured image url is:https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA18322-1920x1200.jpg


# Mars Facts

In [59]:
# Visit Mars facts web site
#
mars_facts_url = 'http://space-facts.com/mars/'
browser.visit(mars_facts_url)


In [61]:

# Using pandas function read_html() to parse the url
mars_facts = pd.read_html(mars_facts_url)

# Create pandas dataframe on mars facts 
mars_facts_df = mars_facts[0]

# Assign the columns with 'Description' and 'Value'
#
mars_facts_df.columns = ['Description','Value']

# Set the index with `Description` column 
mars_facts_df.set_index('Description', inplace=True)

# Convert dataframe to html 
#mars_facts_df.to_html()
mars_fact_html = mars_facts_df.to_html(header=False, index=False)

# Show dataframe table on Mars Facts:
mars_facts_df

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


# Mars Hemispheres

In [39]:
# Visit USGS Astrogeology site to get high resolution images for Mars Hemisphere
#
hemispheres_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemispheres_url)

In [40]:
# Set html browser Object
#
hemispheres_html = browser.html

# Parse the html with Beautiful Soup
#
mh_soup = BeautifulSoup(hemispheres_html, 'html.parser')

# Get all information on Mars hemispheres 
#
items = mh_soup.find_all('div', class_='item')

# Create empty list for hemisphere image url for the dictionary to be created later
#
hemisphere_image_urls = []

# Assign variable for main url for astrogeology.usgs.gov
#
hemispheres_main_url = 'https://astrogeology.usgs.gov'

# Loop through each of the items that was previously stored on Mars hemispheres, 
# store data uisng img_url and title. 
# create data dictionary and then display list with title and img_url
# 
for i in items: 
    # Store title
    title = i.find('h3').text
    
    # Get web link for full image 
    partial_img_url = i.find('a', class_='itemLink product-item')['href']
    
    # Visit link that contains full image on Mars Hemispheres
    #
    browser.visit(hemispheres_main_url + partial_img_url)
    
    # Get html object of individual hemisphere information website 
    individual_img_html = browser.html
    
    # Parse through the html with Beautiful Soup for each 
    # individual hemisphere information 
    #
    soup = BeautifulSoup(individual_img_html, 'html.parser')
    
    # Get full image source for reporting
    #
    img_url = hemispheres_main_url + soup.find('img', class_='wide-image')['src']
    
    # Create "hemisphere_image_urls" dictionary by using append() with 
    # the retreived information in the for loop
    #
    hemisphere_image_urls.append({"title" : title, "img_url" : img_url})
    

# Show Hemisphere Image urls with Title
#
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]