# Mission To Mars

In [15]:
# Import Dependancies
from bs4 import BeautifulSoup as bs
from splinter import Browser
import pandas as pd
from webdriver_manager.chrome import ChromeDriverManager

In [16]:
# Create browser
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

[WDM] - Current google-chrome version is 88.0.4324
[WDM] - Get LATEST driver version for 88.0.4324


[WDM] - Get LATEST driver version for 88.0.4324
[WDM] - Trying to download new driver from https://chromedriver.storage.googleapis.com/88.0.4324.96/chromedriver_win32.zip
[WDM] - Driver has been saved in cache [C:\Users\AA\.wdm\drivers\chromedriver\win32\88.0.4324.96]


In [17]:
# Function to scrape websites
## INPUT: URL as a string
## OUTPUT: BeautifulSoup object with html from the scraped website
def scrapeURL(url):
    browser.visit(url)
    return bs(browser.html, "html.parser")

## NASA Mars Website

In [18]:
# Scrape NASA Mars website
nasa_url = "https://mars.nasa.gov/news/"
nasa_soup = scrapeURL(nasa_url)

In [19]:
# Get the news articles list and store the first (latest) article's information
first_article = nasa_soup.find("div", class_="list_text")
news_title = first_article.find("div", class_="content_title").text
news_p = first_article.find("div", class_="article_teaser_body").text

## JPL Mars Space Image

In [20]:
# Scrape JPL
jpl_url = "https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/index.html"
jpl_soup = scrapeURL(jpl_url)

In [21]:
# Capture the featured image on the JPL site
rel_path = jpl_soup.find("img", class_="headerimage")["src"]
featured_image_url = jpl_url + "/" + rel_path

## Mars Facts

In [23]:
# Scrape Mars Fact site tables
facts_url = "https://space-facts.com/mars/"
facts_tables = pd.read_html(facts_url)

In [24]:
# Convert each to HTML and store the list
facts_tables_html = []
for table in facts_tables:
    facts_tables_html.append(table.to_html())

## USGS Astrogeology

In [80]:
# Scrape USGS Astrogeology
usgs_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
usgs_root = "https://astrogeology.usgs.gov"
usgs_soup = scrapeURL(usgs_url)

In [84]:
# Capture links to each hemisphere page
item_list = usgs_soup.find_all("a", class_="itemLink")
title_list = []
for item in item_list:
    if item.text not in title_list and len(item.text) > 1:
        title_list.append(item.text)
title_list

['Cerberus Hemisphere Enhanced',
 'Schiaparelli Hemisphere Enhanced',
 'Syrtis Major Hemisphere Enhanced',
 'Valles Marineris Hemisphere Enhanced']

In [85]:
# Navigate to each hemisphere page, scrape it, and store the full size image url
img_list = []
for link in link_list:
    browser.links.find_by_partial_text(link).click()
    img = browser.find_by_css(".wide-image")["src"]
    img_list.append(img)
    browser.visit(usgs_url) # return to the main page
img_list

['https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg',
 'https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg',
 'https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg',
 'https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg']

In [87]:
# Iterate through the titles and image urls and create a dictionary for each
hemisphere_image_urls = []
for (t, i) in zip(title_list, img_list):
    hemisphere_image_urls.append({
        "title": t,
        "img_url": i
    })
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]