# Mission to Mars

In this assignment, you will build a web application that scrapes various websites for data related to the Mission to Mars and displays the information in a single HTML page. The following outlines what you need to do.

## Step 1 - Scraping

Complete your initial scraping using Jupyter Notebook, BeautifulSoup, Pandas, and Requests/Splinter.

In [1]:
# Dependencies
from bs4 import BeautifulSoup
from splinter import Browser
import pandas as pd
import pymongo
import requests
import os


### NASA Mars News


In [2]:
!which chromedriver

/usr/local/bin/chromedriver


In [3]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [4]:
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

In [5]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [6]:
# Get latest news title and content
titles = soup.find_all('div',class_='content_title')
news_title = titles[0].text.strip()
news_title

'Martian Skies Clearing over Opportunity Rover'

In [7]:
# get the latest news content
paragraphs = soup.find_all('div',class_='rollover_description_inner')
news_p = paragraphs[0].text.strip()

In [8]:
news_p

'As the skies above Opportunity continue to clear, engineers at JPL are increasing the frequency of commands asking the solar-powered rover to communicate with Earth.'


### JPL Mars Space Images - Featured Image


In [9]:
jpl_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(jpl_url)

In [10]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [11]:
#click on the url link of the Featured
browser.find_option_by_text('Featured').first.click() 

In [12]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [13]:
# get the featured article
art = soup.find_all('h1',class_='media_feature_title')
featured_article = art[0].text.strip()
featured_article

"Pluto's Colorful Composition"

In [14]:
# get the list of article titles and respective images for all featured article
title_img = []
for x in range(1,5):
    listelement = soup.find_all('a', class_='fancybox')
    for li in listelement:
        title_img.append ({'title':li['data-title'],
                           'img': li['data-fancybox-href']})
    if browser.find_by_name('More'):
        browser.find_by_name('More').first.click()

In [15]:
base_featured_url = "https://www.jpl.nasa.gov"
# get the image for the featured article
for n in title_img:
    if n['title'] == featured_article:
        featured_image_url = base_featured_url + n['img']
        break

In [16]:
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA09113_ip.jpg'


### Mars Weather


In [17]:
twitter_url = 'https://twitter.com/marswxreport?lang=e'
browser.visit(twitter_url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [18]:
tweets = soup.find_all('p',class_='TweetTextSize')

In [19]:
mars_weather = tweets[0].text

In [20]:
mars_weather

"Congrats to NASA/JPL for the Emmy Award for Outstanding Original Interactive Program for its coverage of the Cassini mission's Grand Finale at Saturn. https://www.jpl.nasa.gov/news/news.php?feature=7232\xa0…https://twitter.com/veronicamcg/status/1039221529005813762\xa0…"


### Mars Facts


In [21]:
url = 'http://space-facts.com/mars/'
tables = pd.read_html(url)

In [22]:
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [23]:
facts = tables[0]
facts.columns = (['fact','value'])
facts.set_index('fact', inplace=True)
facts

Unnamed: 0_level_0,value
fact,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [24]:
mars_facts = facts.to_dict()
mars_facts

{'value': {'Equatorial Diameter:': '6,792 km',
  'First Record:': '2nd millennium BC',
  'Mass:': '6.42 x 10^23 kg (10.7% Earth)',
  'Moons:': '2 (Phobos & Deimos)',
  'Orbit Distance:': '227,943,824 km (1.52 AU)',
  'Orbit Period:': '687 days (1.9 years)',
  'Polar Diameter:': '6,752 km',
  'Recorded By:': 'Egyptian astronomers',
  'Surface Temperature:': '-153 to 20 °C'}}



### Mars Hemispheres



In [25]:
hem_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

In [26]:
hemisphere_image_urls =[]

In [27]:
browser.visit(hem_url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [28]:
title_results = soup.find_all('h3')
title_results

[<h3>Cerberus Hemisphere Enhanced</h3>,
 <h3>Schiaparelli Hemisphere Enhanced</h3>,
 <h3>Syrtis Major Hemisphere Enhanced</h3>,
 <h3>Valles Marineris Hemisphere Enhanced</h3>]

In [29]:
for t in title_results:
    print(t.text.strip())

Cerberus Hemisphere Enhanced
Schiaparelli Hemisphere Enhanced
Syrtis Major Hemisphere Enhanced
Valles Marineris Hemisphere Enhanced


In [30]:
hemispheres = [t.text.strip() for t in title_results]

In [31]:
hemispheres

['Cerberus Hemisphere Enhanced',
 'Schiaparelli Hemisphere Enhanced',
 'Syrtis Major Hemisphere Enhanced',
 'Valles Marineris Hemisphere Enhanced']

In [32]:
all_img_results = soup.find_all('a',class_='itemLink product-item')

In [33]:
img_results = []
for i in all_img_results:
    if img_results == []:
        img_results.append(i['href'])
    elif img_results[-1] != i['href']:
        img_results.append(i['href'])
img_results

['/search/map/Mars/Viking/cerberus_enhanced',
 '/search/map/Mars/Viking/schiaparelli_enhanced',
 '/search/map/Mars/Viking/syrtis_major_enhanced',
 '/search/map/Mars/Viking/valles_marineris_enhanced']

In [34]:
# get the image after clicking each ref
img_src = []
count = 1
base_hem_url = "https://astrogeology.usgs.gov"
for i in img_results:
    ref = base_hem_url + i
    browser.visit(ref)
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    browser.click_link_by_text('Sample')
    img_url = browser.windows[count].url
    count=count+1
    img_src.append(img_url)

img_src    

['https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg']

In [35]:
hemisphere_image_urls = []
hemiz = zip(hemispheres, img_src)
for z in hemiz:
    print(z[1])
    hemisphere_image_urls.append({'title':z[0],'img_url':z[1]})


https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg


In [36]:
hemisphere_image_urls    

[{'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]


### MongoDB

In [37]:
# store all collected data into Mongo DB database for later consumption
import pymongo

# The default port used by MongoDB is 27017
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)
# Define the 'classDB' database in Mongo

db = client.marsDB
db.marsnews.drop()
db.marsnews.insert_one(
    {
        'news_title': news_title,
        'news_p': news_p,
        'featured_article': featured_article,
        'featured_image': featured_image_url,
        'mars_weather':mars_weather,
        'mars_facts':mars_facts,
        'hemisphere':hemisphere_image_urls
    }
)


<pymongo.results.InsertOneResult at 0x1119cf3c8>