# Mission to Mars

## 1) Scraping

### 1.1) import libs, initialize vars, scrape 3 "static" pages, and save in Soup object list

In [1]:
# import libraries
from bs4 import BeautifulSoup as bs
from splinter import Browser
import pandas as pd
import time

In [2]:
# define "static" list of URLs. These are the pages that can be scraped without browser interaction.
lstURL = ['https://mars.nasa.gov/news', \
          'https://twitter.com/marswxreport?lang=en', \
          'https://space-facts.com/mars']

In [3]:
# define list of Soup objects
lstSoup = []

In [4]:
# define the function to initiatize the Splinter/ChromeDriver browser.
# chromedriver.exe is assumed to be in the same dir as the code
def init_browser():
    strPath = {"executable_path": "chromedriver.exe"}
    return Browser("chrome", **strPath, headless=False)

In [5]:
# initialize browser
brsr = init_browser()

In [6]:
# iterate thru list of URLs and populate list of Soup objects
for strURL in lstURL:
    # visit NASA's Mars News website
    brsr.visit(strURL)
    # pause 1 second
    time.sleep(1)
    # scrape page into Soup
    html = brsr.html
    lstSoup.append(bs(html, 'html.parser'))

In [7]:
# quit browser
brsr.quit()

### 1.2) _NASA Mars News_: grab latest Title and Paragraph Text

In [8]:
# capture News Title:
# 1) define vars to capture News Title and count errors
news_title = ''
intErr = 0
# 2) get all divs with the class "image_and_description_container"
soupIDC = lstSoup[0].find_all('div', class_='image_and_description_container')
# 3) iterate thru divs for Title
for tagOuter in soupIDC:
    while news_title == '':
        tagInner = tagOuter.find('div', class_='content_title')
        try:
            news_title = tagInner.text
        except AttributeError as e:
            intErr+=intErr

In [9]:
# capture Paragraph Text:
# 1) define var to capture Paragraph Text
news_p = ''
# 2) get all divs with the class "article_teaser_body"
soupATB = lstSoup[0].find_all('div', class_='article_teaser_body')
# 3) iterate thru divs for Paragraph Text
for tagOuter in soupATB:
    while news_p == '':
        try:
            news_p = tagOuter.text
        except AttributeError as e:
            intErr+=intErr

In [10]:
# show results
news_title, news_p, intErr

('', '', 0)

### 1.3) _Mars Weather_: grab tweet text

In [11]:
# capture latest weather Tweet:
mars_weather = lstSoup[1].find('p', class_='TweetTextSize').contents[0]
mars_weather

'InSight sol 250 (2019-08-10) low -100.0ºC (-148.1ºF) high -26.2ºC (-15.1ºF)\nwinds from the SSE at 4.4 m/s (9.8 mph) gusting to 16.2 m/s (36.2 mph)\npressure at 7.60 hPa'

### 1.4) _Mars Facts_: grab facts table

In [12]:
dfMarsFacts = pd.read_html(lstSoup[2].find_all('table')[1].prettify())
dfMarsFacts

[                      0                               1
 0  Equatorial Diameter:                        6,792 km
 1       Polar Diameter:                        6,752 km
 2                 Mass:  6.39 × 10^23 kg  (0.11 Earths)
 3                Moons:       2 (  Phobos  &  Deimos  )
 4       Orbit Distance:       227,943,824 km  (1.38 AU)
 5         Orbit Period:            687 days (1.9 years)
 6  Surface Temperature:                    -87 to -5 °C
 7         First Record:               2nd millennium BC
 8          Recorded By:            Egyptian astronomers]

### 1.5) _JPL Featured Space Image_: grab full-size image URL

In [13]:
# define URL
strURL = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

In [14]:
# get URL of large image in several steps:
# (1) visit page
brsr = init_browser()
brsr.visit(strURL)
time.sleep(2)

In [15]:
# (2) click "Full Image" button
brsr.click_link_by_partial_text('FULL IMAGE')
time.sleep(2)

In [16]:
# (3) click "more info" button
brsr.click_link_by_partial_text('more info')
time.sleep(2)

In [17]:
# (4) grab href of img tab with the "main_image" class
html = brsr.html
soupImg = bs(html, 'html.parser')
brsr.quit()
featured_image_url = 'https://www.jpl.nasa.gov' + soupImg.find('img', class_='main_image')['src']
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA09178_hires.jpg'

### 1.6) _Mars Hemispheres_: grab hemispheres' images and names

In [18]:
# define URL
strURL = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

In [19]:
# get URLs of four hemispheres' images in several steps:
# (1) visit page
brsr = init_browser()
brsr.visit(strURL)
time.sleep(2)

In [20]:
# (2) define Hemispheres list, capture links
lstHemi = []
html = brsr.html
soupHemi = bs(html, 'html.parser')
for tag in soupHemi.find_all('a', class_='itemLink product-item'):
    strURL = 'https://astrogeology.usgs.gov' + tag['href']
    if(len(lstHemi) == 0 or lstHemi[-1] != strURL):
        lstHemi.append('https://astrogeology.usgs.gov' + tag['href'])
lstHemi

['https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced']

In [21]:
# (3) define hemisphere_image_urls, navigate to each page and populate hemisphere_image_urls
hemisphere_image_urls = []
for strURL in lstHemi:
    brsr.visit(strURL)
    time.sleep(2)
    html = brsr.html
    soupHemiPg = bs(html, 'html.parser')
    hemisphere_image_urls.append( \
        {'title': soupHemiPg.find('title').text.replace(' Enhanced | USGS Astrogeology Science Center', ''), \
         'img': 'https://astrogeology.usgs.gov' + soupHemiPg.find('img', class_='wide-image')['src']})
brsr.quit()
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere',
  'img': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'img': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere',
  'img': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere',
  'img': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]

# 2) MongoDB and Flask Application