In [58]:
# import dependencies
from splinter import Browser
from bs4 import BeautifulSoup

# set up and initialize browser in splinter
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

### NASA Mars News

In [59]:
# URL of page to be scraped
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

In [60]:
# Create BeautifulSoup object
soup = BeautifulSoup(browser.html, 'html.parser')

In [6]:
# find the 1st "slide" list element & 1st title "content title" div to retrieve article title
article_list = soup.find('li', class_='slide')
news_title = article_list.find('div', class_='content_title').get_text()
# news_title

"The Mast Is Raised for NASA's Mars 2020 Rover"

In [7]:
# grab "article_teaser_body" as the paragraph text for the article
news_p = article_list.find('div', class_='article_teaser_body').get_text()
# news_p

'Engineers at JPL take a group selfie after attaching the remote sensing mast to the Mars 2020 rover.'

### JPL Mars Images

In [64]:
# url of images page to be scraped
url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(url)

In [65]:
# Create BeautifulSoup object
soup = BeautifulSoup(browser.html, 'html.parser')

In [66]:
# go to section with featured image
img_section = soup.find('div',class_ = "default floating_text_area ms-layer")\
    .footer.a
# print(img_section)

<a class="button fancybox" data-description="This image of NASAs Hubble Space Telescope shows Astronaut Jeffrey Hoffman and Story Musgrave installing the Wide Field and Planetary Camera 2 (WFPC2) on the Hubble Space Telescope, during SM1 in December, 1993." data-fancybox-group="images" data-fancybox-href="/spaceimages/images/mediumsize/PIA22911_ip.jpg" data-link="/spaceimages/details.php?id=PIA22911" data-title="Installing Hubble's New Camera" id="full_image">
					FULL IMAGE
				  </a>


In [67]:
# grab relative url for "more info" data-link
detail_link = img_section["data-link"]

# create url to get to image detail page
img_url = f"https://www.jpl.nasa.gov{detail_link}"

In [68]:
# scrape image detail page
browser.visit(img_url)
img_soup = BeautifulSoup(browser.html, 'html.parser')

In [69]:
# get relative path of full size image
full_res_p = img_soup.find('figure', class_='lede').a
img_path = full_res_p.img["src"]

In [70]:
# add relative path to jpl base url for full size image url
featured_img_url = f"https://www.jpl.nasa.gov{img_path}"
# print(featured_img_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA22911_hires.jpg


### Mars Weather

In [75]:
# go to URL to be scraped
w_url = "https://twitter.com/marswxreport?lang=en"
browser.visit(w_url)

In [76]:
# Create BeautifulSoup object
w_soup = BeautifulSoup(browser.html, 'html.parser')

In [77]:
# find first tweet and get text
mars_weather = w_soup.find('div', class_="js-tweet-text-container").p.text
# print(mars_weather)

InSight sol 198 (2019-06-17) low -104.8ºC (-156.6ºF) high -24.7ºC (-12.4ºF)
winds from the SSE at 4.0 m/s (8.9 mph) gusting to 15.5 m/s (34.6 mph)
pressure at 7.60 hPapic.twitter.com/YQcWLlcAy3


### Mars Facts

In [82]:
import pandas as pd

# Use pandas to read html and create a dataframe, then clean up df
facts_df = pd.read_html('https://space-facts.com/mars/')[0]
facts_df.columns = ['description', 'value']
facts_df.set_index('description', inplace=True)
# facts_df

Unnamed: 0_level_0,value
description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [83]:
# convert df to html table string
facts_df.to_html()

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>value</th>\n    </tr>\n    <tr>\n      <th>description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>

### Mars Hemispheres

In [51]:
# URL to be scraped using splinter
url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(url)

In [56]:
# set up list that will include each hemisphere
hemisphere_img_urls = []

# Get list of elements containing links for hemispheres
links = browser.find_by_css("a.product-item h3")

In [57]:
# Loop through the links and click into them for img element info
for link in range(len(links)):
    hem = {}
    
    # click into a href element
    browser.find_by_css("a.product-item h3")[link].click()
    
    # from new page, find url & title associated with text "Sample"
    img_elem = browser.find_link_by_text('Sample')
    hem['img_url'] = img_elem['href']
    
    hem['title'] = browser.find_by_css("h2.title").text
    
    # add 'hem' dictionary to list
    hemisphere_img_urls.append(hem)
    
    # back to previous page
    browser.back()

# print(hemisphere_img_urls)

[{'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg', 'title': 'Cerberus Hemisphere Enhanced'}, {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg', 'title': 'Schiaparelli Hemisphere Enhanced'}, {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg', 'title': 'Syrtis Major Hemisphere Enhanced'}, {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg', 'title': 'Valles Marineris Hemisphere Enhanced'}]
