In [7]:
# Dependencies
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
from bs4 import BeautifulSoup
import requests
import time
import pandas as pd

In [8]:
# Include Chrome Web Driver path 
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [91]:
### Scrape NASA Mars News

newsurl = 'https://mars.nasa.gov/news/'
browser.visit(newsurl)

# HTML object
html = browser.html

# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')

# Retrieve the first <li> which contain the latest news
MarsArticles = soup.find_all('li', {"class":"slide"})[0]

# title is in the <div class="content_title"> tag
news_title = MarsArticles.find('div', class_='content_title').text
# news paragraph is in <div class="article_teaser_body"> tag
news_p = MarsArticles.find('div', class_='article_teaser_body').text

print (news_title)
print (news_p)

MarCO Makes Space for Small Explorers
A pair of NASA CubeSats flying to Mars are opening a new frontier for small spacecraft.


In [82]:
### Scrape JPL Mars Space Images

image_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(image_url)

# click on "FULL IMAGE" button - this take you to a page with just the medium size jpg
browser.click_link_by_partial_text('FULL IMAGE')

# doesn't work without sleep in between
time.sleep(20)

# Click on the "more info" button to get to a page with large jpg
browser.click_link_by_partial_text('more info')

# HTML object
image_html = browser.html
# Parse HTML with Beautiful Soup
soup = BeautifulSoup(image_html, 'html.parser')

# image path is in <figure class="lede">
# <a href="/spaceimages/images/largesize/PIA11777_hires.jpg"><img alt="This impact crater, as seen by NASA's Mars Reconnaissance Orbiter, appears relatively recent as it has a sharp rim and well-preserved ejecta." title="This impact crater, as seen by NASA's Mars Reconnaissance Orbiter, appears relatively recent as it has a sharp rim and well-preserved ejecta." class="main_image" src="/spaceimages/images/largesize/PIA11777_hires.jpg"></a>
# </figure>
image_path = soup.find('figure', class_='lede').a['href']
featured_image_url = "https://www.jpl.nasa.gov/" + image_path

print (featured_image_url)

https://www.jpl.nasa.gov//spaceimages/images/largesize/PIA18182_hires.jpg


In [92]:
###  Scrape Twitter for Mars Weather 

weather_url = "https://twitter.com/marswxreport?lang=en"
browser.visit(weather_url)

# HTML object
weather_html = browser.html
# Parse HTML with Beautiful Soup
soup = BeautifulSoup(weather_html, 'html.parser')

# the latest weather is in 
#   <p class="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text" lang="en" data-aria-label-part="0">
#       Sol 2171 (2018-09-14), high -12C/10F, low -65C/-84F, pressure at 8.79 hPa, daylight 05:43-17:59</p>

mars_weather = soup.find_all('p', {"class":"TweetTextSize"})[0].text

print (mars_weather)


Sol 2171 (2018-09-14), high -12C/10F, low -65C/-84F, pressure at 8.79 hPa, daylight 05:43-17:59


In [106]:
### Scrape Mars Facts 

fact_url = "http://space-facts.com/mars/"

# read table into a list
fact_list = pd.read_html(fact_url)
#fact_list
#type(fact_list)

# convert list to a DataFrame
fact_df = fact_list[0]
#type(fact_df)
#fact_df

fact_df.columns = ["Description", "Value"]
#fact_df

# Set the index to the State column
fact_df.set_index('Description', inplace=True)
fact_df


Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [108]:
# Convert DataFrame to a html table
fact_html_table = fact_df.to_html()
fact_html_table



'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Value</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>

In [12]:
##  Scrape USGS for Mars Hemispheres images

usgs_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(usgs_url)
usgs_html = browser.html
soup = BeautifulSoup(usgs_html, "html.parser")

# the link to each picture is in 
# <a href="/search/map/Mars/Viking/cerberus_enhanced" class="itemLink product-item"><h3>Cerberus Hemisphere Enhanced</h3></a>
# and there are 4 links on the page
# this does not work - sgsImgLinks = soup.find_all('a', class_="itemLink") - because there are two sets of these for each image
usgsImgLinks = soup.find_all('div', class_="description")

hemisphere_image_urls = []

for usgsImgLink in usgsImgLinks:
    title = usgsImgLink.h3.text
    img_url = "https://astrogeology.usgs.gov" + usgsImgLink.a['href']
    #print (title)
    #print (img_url)
    
    ####  new
    browser.visit(img_url)
    fullUsgs_html = browser.html
    Imgsoup = BeautifulSoup(fullUsgs_html, "html.parser") 
    
    #full image link is in <img class="wide-image" src="/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg">
    fullImgLink = "https://astrogeology.usgs.gov" +  Imgsoup.find('img',class_ ="wide-image" )['src']
    #print (fullImgLink)
    
    #### end new
    
    # add title and image url to a dictionary, then add the dictionary to the list
    image_dict = {}
    image_dict['title'] = title
    image_dict['img_url'] = fullImgLink
        
    hemisphere_image_urls.append(image_dict)
        
hemisphere_image_urls



https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg
https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg
https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg
https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg


[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]

In [119]:
# put all results into a dictionary
marsInfo_dict = {
        "news_title": news_title,
        "news_p": news_p,
        "featured_image_url": featured_image_url,
        "mars_weather": mars_weather,
        "fact_table": fact_html_table,
        "hemisphere_image_urls": hemisphere_image_urls
    }

marsInfo_dict

{'news_title': 'MarCO Makes Space for Small Explorers',
 'news_p': 'A pair of NASA CubeSats flying to Mars are opening a new frontier for small spacecraft.',
 'featured_image_url': 'https://www.jpl.nasa.gov//spaceimages/images/largesize/PIA18182_hires.jpg',
 'mars_weather': 'Sol 2171 (2018-09-14), high -12C/10F, low -65C/-84F, pressure at 8.79 hPa, daylight 05:43-17:59',
 'fact_table': '<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Value</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td