# Mission to Mars
A web application that scrapes various websites for data related to planet Mars and displays the information in a single HTML page.

In [40]:
# Dependencies
from splinter import Browser
from bs4 import BeautifulSoup as bs
import pandas as pd
import requests
import pymongo
from flask import Flask, render_template, redirect
from flask_pymongo import PyMongo
from selenium import webdriver
import time

In [41]:

# to find chromedriver binary path run the following command

!which chromedriver

/usr/local/bin/chromedriver


In [42]:
# - create a browser instance using splinter (FOR MAC) - 

executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

# Step 1 - Scraping

# NASA Mars News

   . save the latest News Title
   
   . save the latest News Paragraph

In [43]:
# Visit url for NASA Mars News -- Latest News
news_url = "https://redplanetscience.com/"
browser.visit(news_url)
html = browser.html

# Parse HTML with Beautiful Soup
soup = bs(html, "html.parser")

# Extract article title and paragraph text
article = soup.find("div", class_='list_text')
news_title = article.find("div", class_="content_title").text
news_p = article.find("div", class_ ="article_teaser_body").text
print("News Title: ",news_title)
print("News Paragragh: ",news_p)

News Title:  NASA's MAVEN Explores Mars to Understand Radio Interference at Earth
News Paragragh:  NASA’s MAVEN spacecraft has discovered “layers” and “rifts” in the electrically charged part of the upper atmosphere of Mars.


# JPL Mars Space Images - Featured Image

. Visit the url for the Featured Space Image site.

. Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url  string to a variable called featured_image_url.


. Make sure to find the image url to the full size .jpg image.

. Make sure to save a complete url string for this image.

In [44]:
#  Visit url for JPL Featured Space Image

url = "https://spaceimages-mars.com/"
browser.visit(url)

In [45]:
# Parse HTML with Beautiful Soup

html = browser.html
soup = bs(html, "html.parser")


In [46]:
# Scrape the URL
img = [i.get("src") for i in soup.find_all("img", class_="headerimage fade-in")]
img[0]

'image/featured/mars2.jpg'

In [47]:
# Concatenate website url with scrapped route and print
featured_image_url = url + img[0]
featured_image_url

'https://spaceimages-mars.com/image/featured/mars2.jpg'

# Mars Facts

. Visit the Mars Facts webpage.

. Use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.

. Use Pandas to convert the data to a HTML table string.





In [54]:
# Visit Mars webpage for Mars facts 

url = "https://galaxyfacts-mars.com/"
browser.visit(url)
html = browser.html
soup = bs(html, "html.parser")

In [55]:
# Use Pandas to scrape the table containing facts about Mars
table = soup.find_all("table", class_="table")[0]
table


<table class="table">
<tbody>
<tr>
<th scope="row"><b> Mars - Earth Comparison</b></th>
<td><span class="orange"><b> Mars</b></span></td>
<td><span class="purple"> <b>Earth </b></span> </td>
</tr>
<tr>
<th scope="row">Diameter:</th>
<td><span class="orange">6,779 km</span></td>
<td><span class="purple">12,742 km</span> </td>
</tr>
<tr>
<th scope="row">Mass:</th>
<td><span class="orange">6.39 × 10^23 kg </span></td>
<td><span class="purple">5.97 × 10^24 kg</span> </td>
</tr>
<tr>
<th scope="row">Moons:</th>
<td><span class="orange">2</span></td>
<td><span class="purple">1</span> </td>
</tr>
<tr>
<th scope="row">Distance from Sun:</th>
<td><span class="orange">227,943,824 km</span></td>
<td><span class="purple">149,598,262 km</span> </td>
</tr>
<tr>
<th scope="row">Length of Year:</th>
<td><span class="orange">687 Earth days</span></td>
<td><span class="purple">365.24 days</span> </td>
</tr>
<tr>
<th scope="row">Temperature:</th>
<td><span class="orange">-87 to -5 °C</span></td>
<td><spa

In [56]:
# loop through text to get table headers for Mars_Earth comparison
table_header = [i.text for i in table("th")]
table_header

[' Mars - Earth Comparison',
 'Diameter:',
 'Mass:',
 'Moons:',
 'Distance from Sun:',
 'Length of Year:',
 'Temperature:']

In [57]:
# loop through text to get table columns for Mars
mars_column = [i.text for i in table("span", class_="orange")]
mars_column

[' Mars',
 '6,779 km',
 '6.39 × 10^23 kg ',
 '2',
 '227,943,824 km',
 '687 Earth days',
 '-87 to -5 °C']

In [58]:
# loop through text to get table columns for Earth

earth_column = [i.text for i in table("span", class_="purple")]
earth_column

[' Earth ',
 '12,742 km',
 '5.97 × 10^24 kg',
 '1',
 '149,598,262 km',
 '365.24 days',
 '\t-88 to 58°C']

In [60]:
#Rename titles and asign Description to table header 
table_df = {"Description": table_header, "Mars": mars_column, "Earth": earth_column}

In [61]:
df = pd.DataFrame(table_df)
df.set_index("Description", inplace=True)

In [62]:
df["Earth"] = df["Earth"].str.replace("\t", "")
df

Unnamed: 0_level_0,Mars,Earth
Description,Unnamed: 1_level_1,Unnamed: 2_level_1
Mars - Earth Comparison,Mars,Earth
Diameter:,"6,779 km","12,742 km"
Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
Moons:,2,1
Distance from Sun:,"227,943,824 km","149,598,262 km"
Length of Year:,687 Earth days,365.24 days
Temperature:,-87 to -5 °C,-88 to 58°C


# Mars Hemispheres

. Visit the astrogeology site here to obtain high resolution images for each of Mar's hemispheres.

. You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.

. Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. 

. Use a Python dictionary to store the data using the keys img_url and title.

. Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

In [25]:
# Visit astrogeology webpage for Mar"s hemispehere images
url = "https://marshemispheres.com/"
url_path = requests.get(url)
soup = bs(url_path.content)

In [26]:
# div_items
div_item = soup.find_all("div", class_="item")

In [27]:
# Create dictionary to store titles & links to images
links = []
for i in div_item:
    for a in i.find_all('a', href=True): 
        if a.text:
            links.append(a['href'])

In [28]:
full_links = [url + i for i in links]
full_links

['https://marshemispheres.com/cerberus.html',
 'https://marshemispheres.com/schiaparelli.html',
 'https://marshemispheres.com/syrtis.html',
 'https://marshemispheres.com/valles.html']

In [29]:
def extract_image(url):
    response = requests.get(url)
    soup = bs(response.text, "html.parser")
    image = soup.find_all("img", class_="wide-image")
    img = [i.get("src") for i in image]
    return "https://marshemispheres.com/" + img[0]

In [30]:
img_url = [extract_image(i) for i in full_links]
img_url

['https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg',
 'https://marshemispheres.com/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg',
 'https://marshemispheres.com/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg',
 'https://marshemispheres.com/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg']

In [31]:
def image_title(url):
    response = requests.get(url)
    soup = bs(response.text, "html.parser")
    title = soup.find_all("h2", class_="title")
    title_text = [i.text for i in title]
    return title_text[0]

In [32]:
# Print titles
title = [image_title(i) for i in full_links]
title

['Cerberus Hemisphere Enhanced',
 'Schiaparelli Hemisphere Enhanced',
 'Syrtis Major Hemisphere Enhanced',
 'Valles Marineris Hemisphere Enhanced']

In [33]:
list_tups = list(zip(title, img_url))
list_tups

[('Cerberus Hemisphere Enhanced',
  'https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'),
 ('Schiaparelli Hemisphere Enhanced',
  'https://marshemispheres.com/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'),
 ('Syrtis Major Hemisphere Enhanced',
  'https://marshemispheres.com/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'),
 ('Valles Marineris Hemisphere Enhanced',
  'https://marshemispheres.com/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg')]

In [34]:
def get_dict_list(keys, list_of_tups):
    list_of_dict = [dict(zip(keys, values)) for values in list_of_tups]
    return list_of_dict

In [50]:

keys = ("title", "img_url")
hemisphere_image_urls = get_dict_list(keys, list_tups)
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]