# Mission to Mars Webscraping

![](Images/tammiebrown.gif)

----


## Importing Dependencies

In [2]:
# Library
from bs4 import BeautifulSoup as bs
from splinter import Browser
import os
import pandas as pd
import time

In [None]:
# Opens browser
path = {'executable_path': '../chromedriver'}
browser = Browser('chrome', **path, headless=False)

----

## Step 1 - Scraping

### NASA Mars News 

In [6]:
# Set the URL to visit
browser.visit("https://mars.nasa.gov/news/")

# Parsed
parsed = bs(browser.html,'html.parser')

# Retrieve a list of the headings
article_headings = []
for article_heading in parsed.find_all('div',class_="content_title"):
    article_headings.append(article_heading.find('a').text)

# Retrieve a list of the article teaser texts
article_bodies = []
for article_body in parsed.find_all('div',class_="article_teaser_body"):
    article_bodies.append(article_body.text)
    
# Getting the latest news and saving the header and body as a variable
# This is so we can reference them later
latest_news_heading = article_headings[0]
latest_news_teaser = article_bodies[0]

### JPL Mars Space Images - Featured Image

In [15]:
# Set the URL
browser.visit("https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars")

# Parsed
parsed = bs(browser.html,'html.parser')

# Retrieving all images
images = []
for image in parsed.find_all('div',class_="img"):
    images.append(image.find('img').get('src'))

# Finding the most recent image
# from the list retrieved it seems like it's at the maximum resolution (640x350)
latest_image = images[0]
featured_image_url = "https://www.jpl.nasa.gov" + latest_image

### Mars Weather

In [28]:
# Set the URL
browser.visit("https://twitter.com/marswxreport?lang=en")

# Parsed
parsed = bs(browser.html,'html.parser')

# Retrieving all tweets
tweets = []
for tweet in parsed.find_all('p',class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text"):
    tweets.append(tweet.text)
    
# Finding the most recent tweet
mars_weather = tweets[0]

### Mars Facts

In [34]:
# Setting up the DF
facts_df = pd.read_html("http://space-facts.com/mars/")[0]
facts_df.rename_axis({0:"Variable", 1:"Value"},axis=1, inplace=True)

# Saving the DF as an HTML table string
facts_df_html = facts_df.to_html("facts_df.html", index=False)

# Doublechecking the DF
facts_df

Unnamed: 0,Variable,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


### Mars Hemispheres

In [66]:
# Setup the URL
browser.visit("https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars")

# Parsed
parsed = bs(browser.html,'html.parser')

# Fetch hemisphere image titles
hemisphere_titles = []
for title in parsed.find_all('div',class_="description"):
    hemisphere_titles.append(title.find('h3').text)

# Getting the titles as a link format so we can get the full size images
hemispheres = ["cerberus", "schiaparelli", "syrtis_major", "valles_marineris"]

# Fetch hemisphere image URLs
hemisphere_images = []
for name in hemispheres:
    hemisphere_images.append("https://astrogeology.usgs.gov/download/Mars/Viking/" + name + "_enhanced.tiff")

# Saved as a dictionary. Not sure what was meant by appending a list so we have one dictionary per hemisphere? 
# So I made two dictionaries, one with title and url as the key, and one as the title for the key
hemisphere_image_urls_key = {"title":hemisphere_titles,"img_url":hemisphere_images}
hemisphere_image_urls = dict(zip(hemisphere_titles, hemisphere_images))