# Web Scraping - Mission to Mars

In [1]:
# Dependencies
from bs4 import BeautifulSoup as bs
from splinter import Browser
import pandas as pd

In [2]:
executable_path = {'executable_path': 'C:/temp/chromedriver'}
browser = Browser('chrome', **executable_path)

## NASA Mars News

In [3]:
# URL of page to be scraped
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

# Time delay for landing page 1 second 
browser.is_element_present_by_css("ul.item_list li.slide", wait_time=1)
# time.sleep(1) - allow 1 second for complete page content to be loaded

True

In [None]:
# Create a Beautiful Soup object
html = browser.html
soup = bs(html, 'html.parser')

In [None]:
# Collect the latest News Title
# News titles are returned as an iterable list
news_titles = soup.find_all('div', class_="content_title")

# Create list to hold news titles
titles = []

# Loop through returned results
for news in news_titles:
        # Error Handling
        try:
            # Identify and return title of news and print results
            title = news.find('a').text
            # print(news.find('a').text)
            
            titles.append(title)
        except:
            pass

# Get the first titles in the titles list
latest_title = titles[0]
print(latest_title)

In [None]:
# Collect the latest News Paragraph Text
news_p = soup.find('div', class_="article_teaser_body").text
print(news_p)

## JPL Mars Space Images

In [None]:
# URL of page to be scraped
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [None]:
# Splinter interacting with browser - click 'full image' 'button'
# full_image = browser.find_by_id('full_image')
# full_image.click()

In [None]:
# Splinter interacting with browser - click 'full image' 'button'
full_image_click = browser.click_link_by_partial_text('FULL IMAGE')

In [None]:
# Splinter interacting with browser - click 'more info' 'button'
more_info_click = browser.click_link_by_partial_text('more info')

In [None]:
# Create a Beautiful Soup object (Scrape page into Soup)
html = browser.html
soup = bs(html, 'html.parser')

In [None]:
# Find the image url
image = soup.find_all('div', class_="download_tiff")
image

In [None]:
# Use square brackets to grab href
thread = image[1]
featured_image_url = thread.find('a')['href']
featured_image_url

## Mars Facts

In [None]:
# Use read_html function in Pandas to scrape tabular data from page
url = 'https://space-facts.com/mars/'
tables = pd.read_html(url)
tables

In [None]:
# Use indexing to slice off the df containing facts about the planet including Diameter, Mass, etc.
mars_df = tables[0]

In [None]:
# Rename columns
df = mars_df.rename(columns={0: "Facts", 1: "Mars"})

In [None]:
# Set the index
df.set_index("Facts", inplace=True)
df.head()

In [None]:
# Use Pandas to_html method to generate html table from df
html_table = df.to_html()
html_table

In [None]:
# Strip unwanted new lines to clean up the table
html_table.replace('\n', '')

In [None]:
# Save the table directly to a file for displaying on webpage later
df.to_html('table.html')

## Mars Hemispheres

In [63]:
# URL of pages to be scraped
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)

In [66]:
# Create the list for the 4 hemisphere dicts 
hemisphere_image_urls = []

# First, get a list of all of the hemispheres
links = browser.find_by_css("a.product-item h3")

# Next, loop through those links, click the link, find the sample anchor, return the href
for i in range(len(links)):
    
    hemisphere = {}
    
    # Find the elements on each loop to avoid a stale element exception
    browser.find_by_css("a.product-item h3")[i].click()
    
    # Next, find the Sample image anchor tag and extract the href
    sample_elem = browser.links.find_by_text('Sample').first
    hemisphere['img_url'] = sample_elem['href']
    
    # Get Hemisphere title
    hemisphere['title'] = browser.find_by_css("h2.title").text
    
    # Append hemisphere dict to list
    hemisphere_image_urls.append(hemisphere)
    
    # Navigate backwards and complete for remaining hemisphere's
    browser.back()

In [67]:
print(hemisphere_image_urls)

[{'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg', 'title': 'Cerberus Hemisphere Enhanced'}, {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg', 'title': 'Schiaparelli Hemisphere Enhanced'}, {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg', 'title': 'Syrtis Major Hemisphere Enhanced'}, {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg', 'title': 'Valles Marineris Hemisphere Enhanced'}]
