# Mission To Mars
Jupyter notebook file to contain code for all web scraping and analysis of that data.

## Setup

In [5]:
# Import dependencies
import pandas as pd
import requests
import time

from bs4 import BeautifulSoup
from splinter import Browser
from selenium import webdriver

In [6]:
# Confirm location of my chromdriver package
!which chromedriver

/usr/local/bin/chromedriver


In [7]:
# Define executable path and create a 'browser' instance
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

## NASA Mars News
Collect latest news title and paragraph from NASA Mars News Site.

In [8]:
# Define url, use splinter to visit the url, get the response object, and create the beautiful soup object.
url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
browser.visit(url)
time.sleep(2.5)
response = browser.html
news_soup = BeautifulSoup(response, 'html.parser')

In [10]:
# Find the most recent article (i.e., the top article)
try_counter = 0

while try_counter <= 3:
    try:
        news_title = news_soup.find('div', class_='content_title').text
        news_p = news_soup.find('div', class_='article_teaser_body').text
        try_counter = 4
    except:
        try_counter = try_counter + 1

In [11]:
# Print variables
print(
    '\033[1m' + 'Title:' + '\033[0m' + f' {news_title}\n\n'
    '\033[1m' + 'Description:' + '\033[0m' + f' {news_p}'
)

[1mTitle:[0m Space Samples Link NASA's Apollo 11 and Mars 2020

[1mDescription:[0m While separated by half a century, NASA's Apollo 11 and Mars 2020 missions share the same historic goal: returning samples to Earth.


## JPL Mars Space Images - Featured Image
Use splinter to navigate the site and find the image url for the current Featured Mars Image.

In [12]:
# Define url (base to be used in final url path calculation and mars specific for page visit),
# use splinter to visit the url, navigate the site, get the response object, 
# and create the beautiful soup object.
base_url = 'https://www.jpl.nasa.gov'
mars_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(mars_url)
time.sleep(2.5)

# Go one page in to get the featured image
featured_image_xpath = '//*[@id="full_image"]'
browser.find_by_xpath(featured_image_xpath)[0].click()
time.sleep(2.5)

# Go one more page to get the high res image
high_res_xpath = '//*[@id="fancybox-lock"]/div/div[2]/div/div[1]/a[2]'
browser.find_by_xpath(high_res_xpath)[0].click()
time.sleep(2.5)

response = browser.html
image_soup = BeautifulSoup(response, 'html.parser')

In [13]:
# Find the featured image url. 
try_counter = 0
while try_counter <= 3:
    try:
        # Find the featured image url. 
        featured_image_path = image_soup.find('figure', class_='lede').\
                                 find('a')['href']
        featured_image_url = base_url + featured_image_path
        try_counter = 4
    except:
        try_counter = try_counter + 1

In [14]:
# Print out url
print('\033[1m' + 'URL: ' + '\033[0m' + featured_image_url)

[1mURL: [0mhttps://www.jpl.nasa.gov/spaceimages/images/largesize/PIA17794_hires.jpg


## Mars Weather
Scrape the latest Mars weather tweet from the Mars Weather twitter account.

In [15]:
# Define url, use splinter to visit the url, get the response object, and create the beautiful soup object.
url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url)
time.sleep(2.5)
response = browser.html
tweet_soup = BeautifulSoup(response, 'html.parser')

# Find the most recent tweet (i.e., the top most tweet)
try_counter = 0
while try_counter <= 3:
    try:
        # Find the most recent tweet (i.e., the top most tweet)
        mars_weather = tweet_soup.find('p', class_='TweetTextSize TweetTextSize--normal js-tweet-text tweet-text').\
                                        contents[0]
        # Clean up the text by replacing \n's with spaces and removing the url at the end.
        mars_weather = mars_weather.replace('\n', ' ')
        try_counter = 4
    except:
        try_counter = try_counter + 1

# Clean up the text by replacing \n's with spaces and removing the url at the end.
mars_weather = mars_weather.replace('\n', ' ')
print('\033[1m' + 'Most recent tweet: ' + '\033[0m' + mars_weather)

[1mMost recent tweet: [0mI’d say a plutonium-238 powered RTG qualifies the Curiosity and Mars2020 rovers as alternative fuel vehicles. You can explore these and other missions, rockets and more with JPL’s Spacecraft AR for IOS and Androhttps://www.jpl.nasa.gov/apps/


## Mars Facts
Scrape the table from the Mars Facts website for facts about the planet including Diameter, Mass, etc.

In [20]:
# Define url, use splinter to visit the url, get the response object, and create the beautiful soup object.
url = 'https://space-facts.com/mars/'
browser.visit(url)
time.sleep(2.5)
response = browser.html
table_soup = BeautifulSoup(response, 'html.parser')

# Find the table and use pandas to extract it into a dataframe.
try_counter = 0
while try_counter <= 3:
    try:
        # Find the table.
        scraped_table = table_soup.find('table', id='tablepress-p-mars')
        try_counter = 4
    except:
        try_counter = try_counter + 1

# Extract table into a dataframe
mars_facts_dataframe = pd.read_html(str(scraped_table))[0]

# Remove the column headers and reset the index
mars_facts_dataframe = mars_facts_dataframe.set_index(0)

# Convert the table to an html string
final_table = mars_facts_dataframe.to_html(header=False)

In [22]:
# Print table as a dataframe to demonstrate it was pulled correctly
mars_facts_dataframe

Unnamed: 0_level_0,1
0,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


## Mars Hemispheres
Scrape multiple images from the USGS Astrogeology site and store image urls in a dictionary.

In [23]:
# Define base url and mars search url, define lists for hemisphere names and for image_urls,
# iterate through each hemisphere type to find the src of the image via beautiful soup, and populate the dictionary.
base_url = 'https://astrogeology.usgs.gov'
mars_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

# Define lists of data from which to build our image urls.
hemispheres_list = ['Cerberus Hemisphere', 'Schiaparelli Hemisphere', 'Syrtis Major Hemisphere', 'Valles Marineris Hemisphere']
xpath_list = ['//*[@id="product-section"]/div[2]/div[1]/a/img', '//*[@id="product-section"]/div[2]/div[2]/a/img', '//*[@id="product-section"]/div[2]/div[3]/a/img', '//*[@id="product-section"]/div[2]/div[4]/a/img']
hemisphere_image_urls = []

# Iteration
for i in range(len(hemispheres_list)):
    # Visit the page and navigate using splinter
    browser.visit(mars_url)
    time.sleep(2.5)
    browser.find_by_xpath(xpath_list[i])[0].click()
    time.sleep(2.5)
    browser.find_by_xpath('//*[@id="wide-image-toggle"]')[0].click()
    time.sleep(2.5)
    
    # Store the response and create the beautiful soup object
    response = browser.html
    hemisphere_soup = BeautifulSoup(response, 'html.parser')
    
    try_counter = 0
    while try_counter <= 3:
        try:
            # Grab the src from img tag (img_path) and build the correct url
            img_path = hemisphere_soup.find('img', class_='wide-image')['src']
            img_url = base_url + img_path
            try_counter = 4
        except:
            try_counter = try_counter + 1
    
    # Store the dictionary entry temporarily and then append it to our list
    temp_dict = {'title': hemispheres_list[i], 'img_url': img_url}   
    hemisphere_image_urls.append(temp_dict)

In [24]:
# Print list of dictionaries
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]