# Mission To Mars

In [1]:
# Import Dependencies
from bs4 import BeautifulSoup as bs
from splinter import Browser
import pandas as pd
from webdriver_manager.chrome import ChromeDriverManager

In [2]:
# Initialize browser
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

[WDM] - Current google-chrome version is 88.0.4324
[WDM] - Get LATEST driver version for 88.0.4324
[WDM] - Driver [C:\Users\AA\.wdm\drivers\chromedriver\win32\88.0.4324.96\chromedriver.exe] found in cache




In [3]:
# Function to scrape websites
## INPUT: URL as a string
## OUTPUT: BeautifulSoup object with html from the scraped website
def scrapeHTML(url):
    browser.visit(url)
    return bs(browser.html, "html.parser")

## NASA Mars Website

In [4]:
# Scrape NASA Mars website
nasa_url = "https://mars.nasa.gov/news/"
nasa_soup = scrapeHTML(nasa_url)

In [5]:
# Get the news articles list and store the first (latest) article's information
first_article = nasa_soup.find("div", class_="list_text")
news_title = first_article.find("div", class_="content_title").text
news_p = first_article.find("div", class_="article_teaser_body").text

## JPL Mars Space Image

In [6]:
# Scrape JPL
jpl_url = "https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/index.html"
jpl_root = "https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/"
jpl_soup = scrapeHTML(jpl_url)

In [7]:
# Capture the featured image on the JPL site
rel_path = jpl_soup.find("img", class_="headerimage")["src"]
featured_image_url = jpl_root + rel_path

## Mars Facts

In [8]:
# Scrape Mars Fact site table
facts_url = "https://space-facts.com/mars/"
facts_df = pd.read_html(facts_url)[0]

In [9]:
# Format Table
facts_df.set_index(0, inplace=True)
facts_df = facts_df.rename(columns={1: "Mars"})
facts_df.index.name = None
facts_df = facts_df.style.set_table_styles([dict(selector = 'th', props=[('text-align', 'left')])])

In [10]:
# Convert each to HTML and store the list
facts_html = facts_df.render()

## USGS Astrogeology

In [11]:
# Scrape USGS Astrogeology
usgs_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
usgs_soup = scrapeHTML(usgs_url)

In [12]:
# Capture links to each hemisphere page
item_list = usgs_soup.find_all("a", class_="itemLink")
title_list = []
for item in item_list:
    if item.text not in title_list and len(item.text) > 1:
        title_list.append(item.text)

In [13]:
# Navigate to each hemisphere page, scrape it, and store the full size image url
img_list = []
for link in title_list:
    browser.links.find_by_partial_text(link).click()
    img = browser.find_by_css(".wide-image")["src"]
    img_list.append(img)
    browser.visit(usgs_url) # return to the main page

In [14]:
# Iterate through the titles and image urls and create a dictionary for each
hemisphere_image_urls = []
for (t, i) in zip(title_list, img_list):
    hemisphere_image_urls.append({
        "title": t,
        "img_url": i
    })

In [15]:
browser.quit()