# Mission To Mars
Jupyter notebook file to contain code for all web scraping and analysis of that data.

## Setup

In [1]:
# Import dependencies
import pandas as pd
import requests

from bs4 import BeautifulSoup
from splinter import Browser
from selenium import webdriver

In [2]:
# Confirm location of my chromdriver package
!which chromedriver

/usr/local/bin/chromedriver


In [3]:
# Define executable path and create a 'browser' instance
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

## NASA Mars News
Collect latest news title and paragraph from NASA Mars News Site.

In [4]:
# Define url, use splinter to visit the url, get the response object, and create the beautiful soup object.
url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
browser.visit(url)
response = browser.html
soup = BeautifulSoup(response, 'html.parser')

In [9]:
# Find the most recent article (i.e., the top article)
news_title = soup.find('div', class_='content_title').text
news_p = soup.find('div', class_='article_teaser_body').text

In [10]:
# Print variables
print(
    '\033[1m' + 'Title:' + '\033[0m' + f' {news_title}\n\n'
    '\033[1m' + 'Description:' + '\033[0m' + f' {news_p}'
)

[1mTitle:[0m Small Satellite Mission of the Year

[1mDescription:[0m The first interplanetary CubeSats were recognized by the engineering community with the 2019 Small Satellite Mission of the Year award.


## JPL Mars Space Images - Featured Image
Use splinter to navigate the site and find the image url for the current Featured Mars Image.

In [9]:
# Define url (base to be used in final url path calculation and mars specific for page visit),
# use splinter to visit the url, navigate the site, get the response object, 
# and create the beautiful soup object.
base_url = 'https://www.jpl.nasa.gov'
mars_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(mars_url)

featured_image_xpath = '//*[@id="full_image"]'
browser.find_by_xpath(featured_image_xpath)[0].click()
response = browser.html
soup = BeautifulSoup(response, 'html.parser')

In [10]:
# Find the featured image url. 
featured_image_path = soup.find('img', class_='fancybox-image')['src']
featured_image_url = base_url + featured_image_path

In [12]:
# Print out url
print('\033[1m' + 'URL: ' + '\033[0m' + featured_image_url)

[1mURL: [0mhttps://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA16711_ip.jpg


## Mars Weather
Scrape the latest Mars weather tweet from the Mars Weather twitter account.

In [47]:
# Define url, use splinter to visit the url, get the response object, and create the beautiful soup object.
url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url)
response = browser.html
soup = BeautifulSoup(response, 'html.parser')

# Find the most recent tweet (i.e., the top most tweet)
mars_weather = soup.find('p', class_='TweetTextSize TweetTextSize--normal js-tweet-text tweet-text').\
                    contents[0]

# Clean up the text by replacing \n's with spaces and removing the url at the end.
mars_weather = mars_weather.replace('\n', ' ')
print('\033[1m' + 'Most recent tweet: ' + '\033[0m' + mars_weather)

[1mMost recent tweet: [0mInSight sol 250 (2019-08-10) low -100.0ºC (-148.1ºF) high -26.2ºC (-15.1ºF) winds from the SSE at 4.4 m/s (9.8 mph) gusting to 16.2 m/s (36.2 mph) pressure at 7.60 hPa


## Mars Facts
Scrape the table from the Mars Facts website for facts about the planet including Diameter, Mass, etc.

In [49]:
# Define url, use splinter to visit the url, get the response object, and create the beautiful soup object.
url = 'https://space-facts.com/mars/'
browser.visit(url)
response = browser.html
soup = BeautifulSoup(response, 'html.parser')

# Find the table and use pandas to extract it into a dataframe.
scraped_table = soup.find('table', id='tablepress-p-mars')
mars_facts_dataframe = pd.read_html(str(scraped_table))[0]

# Remove the column headers and reset the index
mars_facts_dataframe.columns = ['label', 'information']
mars_facts_dataframe = mars_facts_dataframe.set_index('label')

# Convert the table to an html string
final_table = mars_facts_dataframe.to_html()

In [50]:
# Print table as a dataframe to demonstrate it was pulled correctly
mars_facts_dataframe

Unnamed: 0_level_0,information
label,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


## Mars Hemispheres
Scrape multiple images from the USGS Astrogeology site and store image urls in a dictionary.

In [51]:
# Define base url and mars search url, define lists for hemisphere names and for image_urls,
# iterate through each hemisphere type to find the src of the image via beautiful soup, and populate the dictionary.
base_url = 'https://astrogeology.usgs.gov'
mars_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

# Define lists of data from which to build our image urls.
hemispheres_list = ['Cerberus Hemisphere', 'Schiaparelli Hemisphere', 'Syrtis Major Hemisphere', 'Valles Marineris Hemisphere']
xpath_list = ['//*[@id="product-section"]/div[2]/div[1]/a/img', '//*[@id="product-section"]/div[2]/div[2]/a/img', '//*[@id="product-section"]/div[2]/div[3]/a/img', '//*[@id="product-section"]/div[2]/div[4]/a/img']
hemisphere_image_urls = []

# Iteration
for i in range(len(hemispheres_list)):
    # Visit the page and navigate using splinter
    browser.visit(mars_url)
    browser.find_by_xpath(xpath_list[i])[0].click()
    browser.find_by_xpath('//*[@id="wide-image-toggle"]')[0].click()
    
    # Store the response and create the beautiful soup object
    response = browser.html
    soup = BeautifulSoup(response, 'html.parser')
    
    # Grab the src from img tag (img_path) and build the correct url
    img_path = soup.find('img', class_='wide-image')['src']
    img_url = base_url + img_path
    
    # Store the dictionary entry temporarily and then append it to our list
    temp_dict = {'title': hemispheres_list[i], 'img_url': img_url}   
    hemisphere_image_urls.append(temp_dict)

In [52]:
# Print list of dictionaries
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]