In [1]:
# Scraping dependencies
from bs4 import BeautifulSoup as bs
from splinter import Browser
import pandas as pd


## NASA Mars News

In [33]:
# Splinter browser
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
# Scrape the NASA Mars News Site
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

html = browser.html
soup = bs(html, 'html.parser')


In [4]:
# collect the latest News Title and Paragraph Text
latest_news = soup.find('li','slide')
latest_news

<li class="slide"><div class="image_and_description_container"><a href="/news/8416/after-a-reset-curiosity-is-operating-normally/" target="_self"><div class="rollover_description"><div class="rollover_description_inner">NASA's Mars rover Curiosity is in good health but takes a short break while engineers diagnose why it reset its computer. </div><div class="overlay_arrow"><img alt="More" src="/assets/overlay-arrow.png"/></div></div><div class="list_image"><img alt="NASA's Curiosity Mars took this image with its Mastcam on Feb. 10, 2019 (Sol 2316). The rover is currently exploring a region of Mount Sharp nicknamed &quot;Glen Torridon&quot; that has lots of clay minerals." src="/system/news_items/list_view_images/8416_PIA23047-th.jpg"/></div><div class="bottom_gradient"><div><h3>After a Reset, Curiosity Is Operating Normally</h3></div></div></a><div class="list_text"><div class="list_date">February 22, 2019</div><div class="content_title"><a href="/news/8416/after-a-reset-curiosity-is-op

In [5]:
# Assign the text to variables that you can reference later.
headline = latest_news.find(class_='content_title').text
print(headline)
article = latest_news.find(class_='article_teaser_body').text
print(article)

After a Reset, Curiosity Is Operating Normally
NASA's Mars rover Curiosity is in good health but takes a short break while engineers diagnose why it reset its computer. 


## JPL Mars Space Images

In [59]:
# Url for JPL Featured Space Image here.
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

# Click on full-image link
browser.find_by_id('full_image').click()

# Create html of webpage
html = browser.html


In [69]:
# Find the url for the current Featured Mars Image
# Beautifulsoup
soup = bs(html, 'html.parser')

# Find all buttons
button = soup.find(id='full_image')
part_url = button['data-link']

In [73]:
# Flask does not click this button
button_url = 'https://www.jpl.nasa.gov' + part_url
browser.visit(button_url)

html = browser.html
soup = bs(html, 'html.parser')

In [92]:
# Find download link
link = soup.find_all(class_='download_tiff')
featured_image_url = link[1].find('a')['href']

In [93]:
# Assign the url string to a variable called featured_image_url.
# Make sure to save a complete url string for this image.
featured_image_url

'//photojournal.jpl.nasa.gov/jpeg/PIA19180.jpg'

## Mars Weather

In [34]:
# Visit the Mars Weather twitter account
url = "https://twitter.com/marswxreport?lang=en"
browser.visit(url)

html = browser.html
soup = bs(html, 'html.parser')


In [35]:
# scrape the latest Mars weather tweet from the page
# attrs {data-name: Mars Weather} gets rid of any re-tweets
tweets = soup.find_all('div', class_='tweet', attrs={"data-name": "Mars Weather"})

In [38]:
# Pull paragraph elements from first tweet
# tweet_text = tweets[0].find_all('p')
# weather_dirty = tweet_text[0].contents
# weather = weather_dirty[0].replace('\n',' ')


# Sometimes tweets are not weather related
pressure = 'pressure'

for x in range(len(tweets)):
    tweet_text = tweets[x].find_all('p')
    weather_dirty = tweet_text[0].contents
    mars_weather = weather_dirty[0].replace('\n',' ')
    if pressure in mars_weather:
        break


## Mars Facts

In [40]:
# Visit the Mars Facts webpage here
url = 'https://space-facts.com/mars/'


In [41]:
# use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.
table = pd.read_html(url)
table

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [54]:
# Clean up table
df = table[0]
df_new = df.set_index([0])
df_new.index.names = [None]
df_rename = df_new.rename(columns={0:'Description',1:'Data'})

In [55]:
df_rename

Unnamed: 0,Data
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [42]:
# Use Pandas to convert the data to a HTML table string.
html_table = df_rename.to_html()

## Mars Hemispheres

In [140]:
# Visit the USGS Astrogeology site here 
url = 'https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced'


In [144]:
# Set list
hemispheres_list = []

In [142]:
# Image url string for the full resolution hemisphere image
    # & Hemisphere title containing the hemisphere name.
cerberus_dict = {'title':'Cerberus Hemisphere','img_url':'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'}
valles_dict = {'title':'Valles Marineris Hemisphere', 'img_url':'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}
syrtis_dict = {'title':'Syrtis Major Hemisphere', 'img_url':'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'}
schiaparelli_dict = {'title':'Schiaparelli Hemisphere', 'img_url':'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'}

In [145]:
# Append the dictionary with the image url string and the hemisphere title to a list. 
hemispheres_list.append(cerberus_dict)
hemispheres_list.append(valles_dict)
hemispheres_list.append(syrtis_dict)
hemispheres_list.append(schiaparelli_dict)


In [146]:
hemispheres_list

[{'title': 'Cerberus Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'}]