# Mission to Mars

Build a web application that scrapes various websites for data related to the Mission to Mars and displays the information in a single HTML page.

In [1]:
#import dependencies
from bs4 import BeautifulSoup as bs
import requests
import pymongo
import time
from splinter import Browser
import pandas as pd

In [2]:
#Complete your initial scraping using Jupyter Notebook, BeautifulSoup, Pandas, and Requests/Splinter.

# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

# Define database and collection
db = client.mars_db
collection = db.marsData


## NASA Mars News

Scrape the NASA Mars News Site and collect the latest News Title and Paragraph Text. Assign the text to variables that you can reference later.

In [3]:
# URL of page to be scraped
#found odd html from given url = 'https://mars.nasa.gov/news/'
#switched to search results landing for Mars Nasa News
news_url = 'https://mars.nasa.gov/mars2020/news/'

# Retrieve page with the requests module
news_response = requests.get(news_url)
# Create BeautifulSoup object; parse with 'html'
news_soup = bs(news_response.text, "html.parser")


In [4]:
# Scrape the latest News Title and Paragraph Text
news_title = news_soup.find("div", class_='listTextLabel').find('h2', class_='alt01').text.strip()
news_p = news_soup.find("div", class_='listTextLabel').find('p').text.strip()
print(news_title)
print(news_p)

NASA, ULA Launch Mars 2020 Perseverance Rover Mission to Red Planet
The agency's Mars 2020 mission is on its way. It will land at Jezero Crater in about seven months, on Feb. 18, 2021.


## JPL Mars Space Images - Featured Image

Visit the url for JPL Featured Space Image.

Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called featured_image_url.

In [5]:
#Splinter browser setup for MacOS
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [6]:
#JPL Mars Space Images URL
img_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
#browse to page
browser.visit(img_url)

In [7]:
#find the first full size image
img_html = browser.html
img_soup = bs(img_html, 'html.parser')


In [8]:
#featured image is in the carousel item class, style section; parse by single quote and take second element for url
imgs = img_soup.find(class_ = "carousel_item")['style']
featured_image = imgs.split("'")[1]
#add the domain as the base to get the full URL
img_base_url = 'https://www.jpl.nasa.gov'
featured_image_url = img_base_url + featured_image
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA08003-1920x1200.jpg


## Mars Facts

Visit the Mars Facts webpage and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.

In [9]:
#set the url and use pandas to pull table
facts_url = 'https://space-facts.com/mars/'
facts_url_tables = pd.read_html(facts_url)

In [10]:
#pull the first table on the page into a data frame
mars_facts_df = facts_url_tables[0]
mars_facts_df.columns = ['Label', 'Value']
mars_facts_df.set_index('Label', inplace=True)
#mars_facts_df

In [11]:
#use pandas to convert the table to a HTML table string
html_table = mars_facts_df.to_html()
from IPython.display import HTML
display(HTML(html_table))

Unnamed: 0_level_0,Value
Label,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


## Mars Hemispheres

Visit the USGS Astrogeology site to obtain high resolution images for each of Mars' hemispheres

In [12]:
#Splinter browser setup for MacOS
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

#set the URL and request
hemi_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
# hemi_response = requests.get(hemi_url)
browser.visit(hemi_url)

In [13]:
#parse the landing page, isolate the image links in the item div
hemi_html = browser.html
hemi_soup = bs(hemi_html, "html.parser")
hemi_links = hemi_soup.find_all("div", class_ = "item")

#create a list for the names and image links
hemi_img_urls = []

#loop thru each link and grab the name and full size image
for link in hemi_links:
    #dictionary to hold the name/link pairs
    hemi_dict = {}
    #name is in the h3 text
    img_name = link.find("h3").text
    #link is in each desctiption, a href
    img_link = link.find("div", class_ = "description").a["href"]
    #add the base url
    link_base = "https://astrogeology.usgs.gov"
    visit_link = link_base + img_link
    
    #visit the link
    browser.visit(visit_link)
    #don't go too fast
    time.sleep(5)
    
    #parse the html
    indv_hemi_html = browser.html
    indv_hemi_soup = bs(indv_hemi_html, 'html.parser')
    
    #grab the full image link from wide-image src
    indv_hemi_url = indv_hemi_soup.find("img", class_ = "wide-image")["src"]
    
    #add to dictionary as hemi_img_name, hemi_img_url
    hemi_dict['hemi_img_name'] = img_name
    hemi_dict['hemi_img_url'] = link_base + indv_hemi_url
    #put dictionary into hemi_img_urls
    hemi_img_urls.append(hemi_dict)
    browser.back()


In [14]:
import json
print(json.dumps(hemi_img_urls, indent=2))

[
  {
    "hemi_img_name": "Cerberus Hemisphere Enhanced",
    "hemi_img_url": "https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg"
  },
  {
    "hemi_img_name": "Schiaparelli Hemisphere Enhanced",
    "hemi_img_url": "https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg"
  },
  {
    "hemi_img_name": "Syrtis Major Hemisphere Enhanced",
    "hemi_img_url": "https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg"
  },
  {
    "hemi_img_name": "Valles Marineris Hemisphere Enhanced",
    "hemi_img_url": "https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg"
  }
]
