In [14]:
from bs4 import BeautifulSoup as bs
from splinter import Browser
import pandas as pd
import time

In [2]:
chrome_driver = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **chrome_driver, headless=False)

## Mars Nasa News

In [3]:
# Setup Nasa Scraper
nasa = 'https://mars.nasa.gov/news'
browser.visit(nasa)
nasa_link = browser.html
nasa_scraper = bs(nasa_link, 'html.parser')

# Extract Title & Teaser
nasa_title = nasa_scraper.find("div", class_="content_title").get_text()
nasa_teaser = nasa_scraper.find("div", class_="article_teaser_body").get_text()
print(nasa_title + " - " + nasa_teaser)

Meet the People Behind NASA's InSight Mars Lander - A series of NASA videos highlight scientists and engineers leading the next mission to Mars.


## JPL Mars Space Images - Featured Image


In [16]:
# Setup Image Clicker
mars_image_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(mars_image_url)
browser.click_link_by_partial_text('FULL IMAGE')
time.sleep(3)
browser.click_link_by_partial_text('more info')
image_html = browser.html

# grab the path of the featured image and grab the full url
image_scraper = bs(image_html, "html.parser")
image_path = image_scraper.find('figure', class_='lede')
image_url = "https://www.jpl.nasa.gov/" + image_path.a['href']
image_url

'https://www.jpl.nasa.gov//spaceimages/images/largesize/PIA14762_hires.jpg'

## Mars Weather

In [32]:
mars_twitter = "https://twitter.com/marswxreport?lang=en"
browser.visit(mars_twitter)

mars_weather = browser.html
weather_scraper = bs(mars_weather, 'html.parser')
mars_twitter = weather_scraper.find_all('p', class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text")

current_weather = ""
for tweet in mars_twitter:
    if tweet.text.startswith("Sol"):
        current_weather = tweet.text
        break
current_weather

'Sol 2108 (2018-07-12), Sunny, high -24C/-11F, low -65C/-84F, pressure at 8.06 hPa, daylight 05:19-17:27'

## Mars Facts

In [24]:
# read the html and transfer into a pandas dataframe
mars_facts = pd.read_html("https://space-facts.com/mars/")[0]
mars_facts.columns=['Property','Value']
mars_facts.set_index('Property', inplace=True)
mars_facts

Unnamed: 0_level_0,Value
Property,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [26]:
# turn the dict into an html
mars_facts.to_html()

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Value</th>\n    </tr>\n    <tr>\n      <th>Property</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\n 

## Mars Hemispheres

In [52]:
hemispheres_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(hemispheres_url)

# scrape initial html to grab list of hemispheres
mars_hemispheres = browser.html
hemispheres_scraper = bs(mars_hemispheres, 'html.parser')
hemispheres = hemispheres_scraper.find_all("h3")

# initialize list of dicts
hemisphere_dicts = []

for hem in hemispheres:
    #grab text only and click on link including that page
    hemisphere_text = hem.get_text()
    time.sleep(1)
    browser.click_link_by_partial_text(hemisphere_text)
    time.sleep(1)
    
    # scrape data from each individual web page and grab image link
    new_page = browser.html
    hem_page = bs(new_page, 'html.parser')
    img_link = hem_page.find('div', class_='downloads').find('li').a['href']
    
    # Create Dictionary to append to lists
    hem_dict = {}
    hem_dict['title'] = hemisphere_text
    hem_dict['img_url'] = img_link
    hemisphere_dicts.append(hem_dict)
    browser.back()
    
hemisphere_dicts

[{'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]