In [6]:
#-----------------------------
# Colby Alexander Hoke
# UNC Data Analytics Bootcamp
# August, 2020
#-----------------------------

In [7]:
# Setup
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
from bs4 import BeautifulSoup
import pandas as pd
import time

In [8]:
# Mac path for chromedriver
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}

# Windows path (uncomment and comment above to use)
#executable_path = {"executable_path": "chromedriver.exe"}

browser = Browser('chrome', **executable_path, headless=False)

In [9]:
# Scrape the NASA Mars News Site:
# https://mars.nasa.gov/news

# Collect the latest News Title and Paragraph Text.
# Assign the text to variables.

nasa_news_url = 'https://mars.nasa.gov/news/'
browser.visit(nasa_news_url)

time.sleep(1)

html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Find the container of what we need
article = soup.find('div', class_='list_text')

# Get the news title and the paragraph text
news_title = article.find('a').text
news_p = article.find('div', class_='article_teaser_body').text

In [10]:
# Prove we got it!
print(f'News title: {news_title} \n\nNews Paragraph: {news_p}')

News title: NASA Engineers Checking InSight's Weather Sensors 

News Paragraph: An electronics issue is suspected to be preventing the sensors from sharing their data about Mars weather with the spacecraft.


## PL Mars Space Images - Featured Image

In [None]:
# Scrape the JPL Featured Space Image
# https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars
#
# Find the image url for the current Featured Mars Image
# Assign the url string to a variable called featured_image_url.
#
# Make sure to find the image url to the full size .jpg image.
# Make sure to save a complete url string for this image.
# 
# Example:
# featured_image_url = 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA16225_hires.jpg'

space_img_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
url_split = space_img_url.split('/spaceimages',1)

browser.visit(space_img_url)

time.sleep(1)

html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Zero in on the button for the image
img_link = soup.find('section', class_='main_feature').find('a', class_='button fancybox')['data-link']
img_page_url = url_split[0] + img_link

# Return the URL for the page that holds our image
print(img_page_url)

In [None]:
# Visit that new page and parse the HTML
browser.visit(img_page_url)

time.sleep(1)


html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Pull the full res image URL
img_details = soup.find('div', id='secondary_column').find_all('div', class_='download_tiff')
hires_image_url = img_details[1].find('a')['href']

# Append to the URL
featured_image_url = 'https:' + hires_image_url

# Return the URL
print(featured_image_url)

## Mars Facts


Visit the Mars Facts webpage:
https://space-facts.com/mars/
    
Use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.


Use Pandas to convert the data to a HTML table string.

In [None]:
# Set URL to pull from
mars_facts_url = 'https://space-facts.com/mars/'

# Read and store all of the tables
tables = pd.read_html(mars_facts_url)

# Get the table we want
mars_df = tables[0]

# Give columns names and reset the index
mars_df.columns=['Paramater','Value']
mars_df.set_index('Paramater', inplace=True)

# Convert the table to HTML and strip the new line returns
mars_html_table = mars_df.to_html()
mars_html_table = mars_html_table.replace('\n', '') 

# Return the HTML
print(mars_html_table)

## Mars Hemispheres


Visit the USGS Astrogeology site:
https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Marshere
Obtain high resolution images for each of Mar's hemispheres.

You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.

Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys img_url and title.

Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

Example:
hemisphere_image_urls = [
    {"title": "Valles Marineris Hemisphere", "img_url": "..."},
    {"title": "Cerberus Hemisphere", "img_url": "..."},
    {"title": "Schiaparelli Hemisphere", "img_url": "..."},
    {"title": "Syrtis Major Hemisphere", "img_url": "..."},
]

In [None]:
usgs_astro_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

# Split the URL to use later
usgs_astro_url_split = usgs_astro_url.split('/search',1)

# Visit the page
browser.visit(usgs_astro_url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [None]:
# Find the container for what we want
usgs_astro_products = soup.find('div', class_='collapsible results')

# Get all 4 of the image pages
hemisphere_pages = usgs_astro_products.find_all('div', class_='item')

# List where everything will be stored
hemisphere_image_urls = []

# Loop through the pages
for page in hemisphere_pages:
    # Clear the dictionary for each iteration
    hemisphere_dict = {}
    
    # Find the title and add it to the dictionary
    title = page.find('div', class_='description').find('a').text
    hemisphere_dict['title'] = title
    
    # Find the link to the detail page
    hemisphere_link = page.find('div', class_='description').find('a')['href']
    
    # Visit the page and parse it
    browser.visit(usgs_astro_url_split[0] + hemisphere_link)
    hemisphere_html = browser.html
    hemisphere_soup = BeautifulSoup(hemisphere_html, 'html.parser')
    
    # Find the link to the full image and add it to the dictionary
    img_link = hemisphere_soup.find('div', class_='content').find('a')['href']
    hemisphere_dict['img_url'] = img_link
    
    # Add the filled dictionary to the list
    hemisphere_image_urls.append(hemisphere_dict)
    
    # Go a page back, ready for the next iteration
    browser.back()

# Show those URLs in the list
hemisphere_image_urls