# Mission to Mars

### Web-Scraping for Mars Info
---

#### Initial Setup

In [1]:
# Dependencies
import pandas as pd

from splinter import Browser
from bs4 import BeautifulSoup as bs

In [2]:
# Function that initializes browser
def init_browser():
    # Initialize browser using chromedriver
    executable_path = {"executable_path": "chromedriver.exe"}
    
    return Browser("chrome", **executable_path, headless=False)

In [3]:
# Function that visits a website and scrapes the html into Beautiful Soup
def get_html(url):    
    
    # visit website
    browser.visit(url)

    # scrape page into soup
    html = browser.html
    soup = bs(html, "lxml")
    
    return soup

---
Initialize browser using chromedriver

In [4]:
# Initialize browser using chromedriver
browser = init_browser()

---
#### NASA Mars News

* Get title and teaser paragraph for latest Mars news story

In [5]:
# set url and scrape site into soup
NASA_url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+'\
        'desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'

soup = get_html(NASA_url)

In [6]:
# get html for latest news story
latest_story = soup.find('li', attrs={'class': 'slide'})

# get title and paragraph from latest story (first story)
news_title = latest_story.find('div', {'class': 'content_title'}).text
print(f"Title: {news_title}")

news_p = latest_story.find('div', {'class': 'article_teaser_body'}).text
print(f"P: {news_p}")

Title: NASA Invites Students to Name Mars 2020 Rover
P: Through Nov. 1, K-12 students in the U.S. are encouraged to enter an essay contest to name NASA's next Mars rover.


---
#### JPL Mars Space Images

- Get url for featured Mars image

In [7]:
# set url and scrape site into soup
JPL_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

soup = get_html(JPL_url)

In [9]:
# find url for image
relative_url = soup.find('a', id='full_image')['data-fancybox-href']

JPL_base_url = 'https://www.jpl.nasa.gov'

featured_image_url = JPL_base_url + relative_url
print(f"featured_image_url:\n{featured_image_url}")

featured_image_url:
https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA17200_ip.jpg


---
#### Mars Weather

- Get latest tweet from Mars Weather Twitter account

In [10]:
# set url and scrape site into soup
weather_url = 'https://twitter.com/marswxreport?lang=en'

soup = get_html(weather_url)

In [11]:
# save text of latest tweet
mars_weather = soup.find('p', {'class': 'TweetTextSize'}).text.replace('\n', ', ')
print(mars_weather)

We won’t be hearing from @MarsCuriosity or @NASAInSight for the next 2 weeks during Mars solar conjunction. Read more about why Mars missions go silent every 2 years: https://www.wral.com/mars-spacecraft-go-quiet-during-solar-conjunction/18595551/ …pic.twitter.com/fWruE2v151


---
#### Mars Facts

- Visit the Mars Facts webpage here and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.

- Use Pandas to convert the data to a HTML table string.

In [19]:
# set url
facts_url = 'https://space-facts.com/mars/'

# read tables into pandas
tables = pd.read_html(facts_url)

# get relevant table
df = tables[1]

# convert df to html table string; remove index and col headings; set CSS id
html_table = df.to_html(index=False, header=False, table_id='mars-table', classes=['table', 'table-striped'])

html_table

'<table border="1" class="dataframe">\n  <tbody>\n    <tr>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <td>Surface Temperature:</td>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <td>First Record:</td>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <td>Recorded By:</td>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tbody>\n</table>'

---
#### Mars Hemispheres

* Visit the USGS Astrogeology site (https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars) to obtain high resolution images for each of Mar's hemispheres.

* You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.

* Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys `img_url` and `title`.

* Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

In [13]:
# set url for USGS Astrogeology site and visit
USGS_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

# names of hemispheres
hemi_names = [
    'Cerberus Hemisphere',
    'Schiaparelli Hemisphere',
    'Valles Marineris Hemisphere',
    'Syrtis Major Hemisphere'
]

# visit USGS website
browser.visit(USGS_url)

In [14]:
# set up empty list for img url dicts
hemisphere_image_urls = []

# loop through hemispheres, get img urls after clicking links
for hemi in hemi_names:

    # click link for hemisphere
    browser.click_link_by_partial_text(hemi)

    # get html with bs4
    soup = bs(browser.html, "lxml")

    # find image url in first anchor of first list item on page
    img_url = soup.find('li').find('a')['href']

    # create dict for image and add to list
    hemi_dict = dict(title=hemi, img_url=img_url)
    hemisphere_image_urls.append(hemi_dict)
    
    # redirect back
    browser.back()
    
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}]

---
Close browser

In [15]:
# close browser
browser.quit()